/* mfcc - Mel-scaled Cepstral Coefficients analysis */

/* Mark Huckvale - University College London */

/* version 1.0 - June 1996 */

/* version 1.1 - July 1997
	- add deltas
*/

/* version 1.2 - October 2002
	- minimum window size = 20ms
*/

/* version 1.3 - August 2004
	- add HTK compatibility mode
	- make delta calculation compatible with HTK anyway
*/

#define PROGNAME "mfcc"
#define PROGVERS "1.3"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH MFCC 1 UCL SFS
.SH NAME
mfcc - compute mel-scaled cepstral coefficients
.SH SYNOPSIS
.B mfcc
(-i item) (-n ncoeff) (-l low_freq) (-h high_freq) (-r frame_rate) (-e|-F) (-p) (-d1|-d2) (-H) sfsfile
.SH DESCRIPTION
.I mfcc
designs a mel-scaled filterbank for the supplied number of
channels and processes the input speech signal through the filterbank into a set
of mel-scaled cepstral coefficients.  The method is based on that
described in Deller, et al, Discrete Time Processing of
Speech Signals.  It uses a Hamming window and a forward FFT and and a specialised
inverse DFT.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -n ncoeff
Select number of coeffients to calculate.  Default: 16.
.TP 11
.BI -l low_frequency
Specify lowest frequency of filterbank.  Default: 0Hz.
.TP 11
.BI -h high_frequency
Specify highest frequency of filterbank.  Default: half input sample rate.
.TP 11
.BI -r frame_rate
Specify frame rate for output.  Default 100 frames/sec.
.TP 11
.B -e
Append the log signal energy to the output frame.
.TP 11
.B -F
Store filter output rather than cepstral coefficents;
.TP 11
.B -p
Do not pre-emphasise signal.
.TP 11
.B -d1
Add first order delta cepstral coeffs to output vector.
.TP 11
.B -d2
Add first and second order delta cepstral coeffs to output vector.
.TP 11
.B -H
HTK compatibility mode.  Use the same MFCC calculation as HTK with
default parameter settings: TARGETRATE = 100000.0, WINDOWSIZE = 250000.0,
USEHAMMING = T, PREEMCOEF = 0.97, NUMCHANS = 20, CEPLIFTER = 23, NUMCEPS = 12.
You can adjust these by putting other mfcc switches after the -H.
.SH INPUT ITEMS
.IP 1.xx 11
Any speech waveform.
.SH OUTPUT ITEMS
.IP 11 11
Mel-scaled cepstral coefficients.
.SH VERSION/AUTHOR
1.3 - Mark Huckvale
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <math.h>
#include "sfs.h"
#include "fft.h"

/* default parameters */
#define DEFAULT_NCOEFF		16
#define DEFAULT_LOW_FREQ	0.0
#define DEFAULT_FRAME_RATE	100
#define DEFAULT_WINDOW_SIZE	0.02

#define PREEMP			0.95

/* buffers */
struct item_header		spitem;
struct item_header		coitem;
short				*sp;
struct co_rec			*co;
float				*hamming;	/* Hamming window */
float				*dftbuf;	/* DFT buffer */
float				*chan;		/* filterbank channels */

/* options */
char	filename[SFSMAXFILENAME];
int	ncoeff=DEFAULT_NCOEFF;		/* # cepstral coefficients */
int	nbase=DEFAULT_NCOEFF;		/* # base parameters */
int	nparam=DEFAULT_NCOEFF;		/* # parameters in vector */
double	lofreq=DEFAULT_LOW_FREQ;	/* first filter freq start */
double	hifreq=-1;			/* last filter freq end */
double	frate=DEFAULT_FRAME_RATE;	/* frame rate */
double	preemp=PREEMP;
int		cepslifter=0;	/* cepstral liftering */
int		numchan=0;		/* number of filterbank channels */
double	wintime=DEFAULT_WINDOW_SIZE;

int	dofilter=0;			/* output mel-scale filter, instead
					   of cepstral coefficients */
int	doenergy=0;			/* add energy */
int	verbose=0;			/* list filter design */
int	dopreemp=1;			/* signal pre-emphasis */
int	dodelta1=0;			/* do first deltas */
int	dodelta2=0;			/* do second deltas */
int	nfilter;			/* # filters */
int	winsize;			/* analysis window size */
int	stpsize;			/* step size */
int	fftsize;			/* FFT size */
int	histlen=1;			/* size of history */
int	outpos=0;			/* output vector position in history */
int	htkcompat=0;		/* HTK compatibility */

int	htk_dozeromean=0;
int htk_dozeroc=0;
int htk_dohamming=1;

int HTKConvertFrame(short *sig, int len, float *pbuf,int ncoeff);

/* filter bank parameters */
struct filter_rec {
	int	lidx;	/* low index */
	int	cidx;	/* centre index */
	int	hidx;	/* high index */
	double	*win;	/* FFT weighting window */
} *filtab;
char	filspec[1024];

double mylog10(double val)
{
	return (val < 1.0E-10) ? -10 : log10(val);
}
double HertzToMel(double hz)
{
	return (1000.0/log(2.0)) * log(1.0 + hz/1000.0);
}

double MelToHertz(double mel)
{
	return 1000 * (exp((log(2.0)/1000)*mel)-1.0);
}

/* design filter bank */
int designfbank(lf,hf,nfft,srate)
double	lf,hf;
int	nfft;
double	srate;
{
	int	i,j;
	double	fhz,fmel;
	int	nfilt;
	double	spacing=150;

	/* estimate number of filters required 300 mel width, 150 mel spacing */
	if (numchan==0)
		nfilt = (int)(1+(HertzToMel(hf) - HertzToMel(lf))/spacing);
	else {
		nfilt = numchan;
		spacing = (HertzToMel(hf) - HertzToMel(lf))/(numchan+1);
	}

	/* get memory for filter structures */
	filtab = calloc(nfilt,sizeof(struct filter_rec));
	for (i=0;i<nfilt;i++)
		if ((filtab[i].win = (double *)calloc(nfft/2,sizeof(double)))==NULL)
			error("could not get memory");

	/* fit triangular filters between limits */
	lf = MelToHertz(HertzToMel(lf)+spacing);
	hf = MelToHertz(HertzToMel(hf)-spacing);
	fhz = lf;
	nfilt=0;
	while (fhz <= hf) {
		fmel = HertzToMel(fhz);
		filtab[nfilt].lidx = (int)(0.5+nfft*(MelToHertz(fmel-spacing)/srate));
		filtab[nfilt].cidx = (int)(0.5+nfft*(MelToHertz(fmel)/srate));
		filtab[nfilt].hidx = (int)(0.5+nfft*(MelToHertz(fmel+spacing)/srate));
		fhz = MelToHertz(fmel+spacing);
		nfilt++;
	}

	/* create triangular weighting windows */
	for (i=0;i<nfilt;i++) {
		for (j=filtab[i].lidx;j<filtab[i].cidx;j++) {
			filtab[i].win[j] = 1.0 - (double)(filtab[i].cidx-j)/(double)(filtab[i].cidx-filtab[i].lidx+1);
		}
		for (j=filtab[i].cidx;j<=filtab[i].hidx;j++) {
			filtab[i].win[j] = 1.0 - (double)(j-filtab[i].cidx)/(double)(filtab[i].hidx-filtab[i].cidx+1);
		}
	}

	return(nfilt);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	extern int	optind;
	extern char	*optarg;
	int		c,errflg=0;

	int		fid,ofid;
	int		it;
	char		*ty;
	char		*sptype="0";
	int		i,j,k;
	double		srate;
	int		pos;
	double		omega;
	double		sum;
	double		sumsq;
	char		tbuf[32];
	int		nframe;
	int		totframes;
	int		hpos;
	int		delta1first=1,delta2first=1;
	int		ms20;
	int		hkind;

	while ((c=getopt(argc,argv,"Ii:n:l:h:r:evFpd:H"))!=EOF) switch (c) {
	case 'I':
		fprintf(stderr,"%s: Mel-scaled cepstral coefficients V%s\n",
			PROGNAME,PROGVERS);
		exit(0);
	case 'i':
		if (itspec(optarg,&it,&ty)==0) {
			if (it==SP_TYPE)
				sptype = ty;
			else
				error("unsuitable item type '%s'",optarg);
		}
		else
			error("illegal item selection '%s'",optarg);
		break;
	case 'n':
		ncoeff = atoi(optarg);
		break;
	case 'l':
		lofreq = atof(optarg);
		break;
	case 'h':
		hifreq = atof(optarg);
		break;
	case 'r':
		frate = atof(optarg);
		break;
	case 'e':
		doenergy=1;
		break;
	case 'v':
		verbose=1;
		break;
	case 'F':
		dofilter=1;
		htkcompat=0;
		break;
	case 'p':
		dopreemp=0;
		break;
	case 'd':
		if (atoi(optarg)==1)
			dodelta1=1;
		else if (atoi(optarg)==2) {
			dodelta1=1;
			dodelta2=1;
		}
		else
			error("-d1 or -d2 only supported");
		break;
	case 'H':	/* HTK compatibility - default parameters */
		htkcompat=1;
		ncoeff=12;
		preemp=0.97;
		cepslifter=23;
		numchan=20;
		wintime=0.025;
		break;
	case '?':
		errflg++;
	}
	if (errflg || (argc < 2))
		error("usage: %s (-I) (-i item) (-n nceoff) (-l lofreq) (-h hifreq) (-r framerate) (-e|-F) (-v) (-p) (-d1|-d2) (-H) sfsfile",PROGNAME);

	if (dofilter) {
		doenergy=0;
		dodelta1=0;
		dodelta2=0;
	}

	if (doenergy) nbase=ncoeff+1; else nbase=ncoeff;
	if (dodelta2) {
		nparam=3*nbase;
		histlen=7;
		hpos=2;
	}
	else if (dodelta1) {
		nparam=2*nbase;
		histlen=5;
		hpos=2;
	}
	else {
		nparam=nbase;
		histlen=1;
		hpos=0;
	}

	/* get SFS file name */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no input file");

	/* open it */
	if ((fid=sfsopen(filename,"w",NULL)) < 0)
		error("access error on '%s'",filename);

	/* find input item */
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("could not find input speech item in '%s'",filename);

	/* set up processing parameters */
	srate = 1.0/spitem.frameduration;
	if (hifreq < 0)
		hifreq = srate/2;
	else if (hifreq > srate/2)
		error("high frequency limit above half sample rate");

	ms20 = (int)(0.5+wintime/spitem.frameduration);
	stpsize = (int)(0.5+srate/frate);
	winsize = 2*stpsize;
	if (winsize < ms20) winsize=ms20;
	fftsize = 512;		/* minimum for accurate filter positions */
	while (fftsize < winsize) fftsize *= 2;

	/* design filter bank */
	nfilter = designfbank(lofreq,hifreq,fftsize,srate);

	if (verbose) {
		/* dump filter design */
		printf("Design of %d channel mel-scaled filterbank:\n",nfilter);
		for (i=0;i<nfilter;i++)
			printf("Chan %2d at %5dHz (%d - %d)\n",
				i+1,
				(int)(0.5+filtab[i].cidx*srate/fftsize),
				(int)(0.5+filtab[i].lidx*srate/fftsize),
				(int)(0.5+filtab[i].hidx*srate/fftsize));
#if 0
		for (i=0;i<nfilter;i++) {
			sum=0;
			printf("Chan %d: ",i+1);
			for (j=filtab[i].lidx;j<=filtab[i].hidx;j++) {
				sum += filtab[i].win[j];
				printf("%d-%.2f,",j,filtab[i].win[j]);
			}
			printf(",sum=%g\n",sum);
		}
#endif
	}

	/* create output channel */
	if (dofilter) {
		sfsheader(&coitem,CO_TYPE,-1,4,sfsstruct[CO_TYPE]/4+nfilter,
			spitem.frameduration,spitem.offset+1.0/frate,winsize,winsize-stpsize,0);
		sprintf(coitem.history,"%s(%d.%02d;nchannel=%d,lofreq=%g,hifreq=%g,frate=%g)",
			PROGNAME,
			spitem.datatype,spitem.subtype,
			nfilter,lofreq,hifreq,frate);
		sprintf(filspec,"labels=%d",(int)(0.5+filtab[0].lidx*srate/fftsize));
		for (i=1;i<nfilter;i++) {
			sprintf(tbuf,"|%d",(int)(0.5+filtab[i].lidx*srate/fftsize));
			strcat(filspec,tbuf);
		}
		strncpy(coitem.params,filspec,sizeof(coitem.params));
	}
	else {
		sfsheader(&coitem,CO_TYPE,-1,4,sfsstruct[CO_TYPE]/4+nparam,
			spitem.frameduration,(htkcompat)?0:(spitem.offset+winsize*spitem.frameduration/2),
			winsize,winsize-stpsize,0);
		sprintf(coitem.history,"%s(%d.%02d;ncoeff=%d,nparam=%d,lofreq=%g,hifreq=%g,frate=%g%s%s%s%s)",
			PROGNAME,
			spitem.datatype,spitem.subtype,
			ncoeff,nparam,lofreq,hifreq,frate,
			(doenergy)?",energy":"",
			(dodelta1)?",delta1":"",
			(dodelta2)?",delta2":"",
			(htkcompat)?",HTK":"");
		sprintf(filspec,"labels=C1");
		for (i=1;i<ncoeff;i++) {
			sprintf(tbuf,"|C%d",i+1);
			strcat(filspec,tbuf);
		}
		if (doenergy) strcat(filspec,"|E");
		if (dodelta1) {
			for (i=0;i<ncoeff;i++) {
				sprintf(tbuf,"|dC%d",i+1);
				strcat(filspec,tbuf);
			}
			if (doenergy) strcat(filspec,"|dE");
		}
		if (dodelta2) {
			for (i=0;i<ncoeff;i++) {
				sprintf(tbuf,"|DC%d",i+1);
				strcat(filspec,tbuf);
			}
			if (doenergy) strcat(filspec,"|DE");
		}
		hkind = 0x0006;
		if (doenergy) hkind |= 0x0040;
		if (dodelta1) hkind |= 0x0100;
		if (dodelta2) hkind |= 0x0200;
		filspec[110]='\0';
		sprintf(coitem.params,"htktype=%X,%s",hkind,filspec);
	}
	if ((ofid=sfschannel(filename,&coitem))<0)
		error("could not open output channel to '%s'",filename);

	/* get buffers */
	if ((sp=(short *)sfsbuffer(&spitem,fftsize))==NULL)
		error("could not get memory");
	if ((co=(struct co_rec *)sfsbuffer(&coitem,histlen))==NULL)
		error("could not get memory");
	if ((hamming=(float *)calloc(fftsize,sizeof(float)))==NULL)
		error("could not get memory");
	if ((dftbuf=(float *)calloc(fftsize+2,sizeof(float)))==NULL)
		error("could not get memory");
	if ((chan=(float *)calloc(nfilter,sizeof(float)))==NULL)
		error("could not get memory");

	/* design Hamming window */
	omega = 8.0 * atan(1.0) / (winsize - 1);
	for (i=0;i<winsize;i++)
		hamming[i] = (0.54 - (0.46 * cos(i * omega)));

	/* process the signal in windows */
	totframes = (spitem.numframes-winsize+stpsize)/stpsize;
	nframe=0;
	for (pos=0;sfsread(fid,pos,winsize,sp)==winsize;pos+=stpsize) {

		if (htkcompat) {
			HTKConvertFrame(sp,winsize,co[hpos].data,ncoeff);
		}
		else {
			/* pre-emphasise and apply Hamming window */
			if (dopreemp) {
				dftbuf[0] = (sp[1] - preemp * sp[0]) * hamming[0];
				for (i=1;i<winsize;i++)
					dftbuf[i] = (sp[i] - preemp * sp[i-1]) * hamming[i];
				for (;i<fftsize+2;i++) dftbuf[i]=0.0;
			}
			else {
				for (i=0;i<winsize;i++)
					dftbuf[i] = sp[i] * hamming[i];
				for (;i<fftsize+2;i++) dftbuf[i]=0.0;
			}

			/* if energy required */
			if (doenergy) {
				sumsq=0;
				for (i=0;i<winsize;i++)
					sumsq += dftbuf[i]*dftbuf[i];
				co[hpos].data[ncoeff] = mylog10(sumsq); /* deliberately in bels */
			}

			/* do forward FFT */
			REALFFT(dftbuf,fftsize/2,FORWARD);

			/* create magnitude */
			for (i=0;i<fftsize/2;i++)
				dftbuf[i] = sqrt(dftbuf[2*i+1]*dftbuf[2*i+1]+dftbuf[2*i+2]*dftbuf[2*i+2]);

			/* form filter outputs */
			for (i=0;i<nfilter;i++) {
				chan[i] = 0.0;
				for (j=filtab[i].lidx;j<=filtab[i].hidx;j++)
					chan[i] += filtab[i].win[j] * dftbuf[j];
			}

			/* store filter energies or cepstral coeffs */
			if (dofilter) {
				for (i=0;i<nfilter;i++)
					co[hpos].data[i] = 20.0*mylog10(chan[i]);
			}
			else {
				/* inverse DFT */
				for (i=0;i<ncoeff;i++) {
					omega = 4 * atan(1.0) * (i+1) / nfilter;
					sum = 0;
					for (j=0;j<nfilter;j++)
						sum += mylog10(chan[j]) * cos((j+0.5) * omega);
					co[hpos].data[i] = sum;
				}

				/* liftering */
				if (cepslifter) {
					for (i=0;i<ncoeff;i++)
						co[hpos].data[i] *= 1.0+cepslifter*sin((i+1)*M_PI/cepslifter)/2;
				}
			}
		}

		co[hpos].posn = pos;
		co[hpos].size = winsize;
		co[hpos].flag = 0;
		co[hpos].mix = 0.0;
		co[hpos].gain = 1.0;

		/* fill buffer if first time through */
		if (pos==0) {
			for (i=0;i<hpos;i++) {
				co[i].posn = co[hpos].posn;
				co[i].size = co[hpos].size;
				co[i].flag = co[hpos].flag;
				co[i].mix = co[hpos].mix;
				co[i].gain = co[hpos].gain;
				for (j=0;j<nbase;j++)
					co[i].data[j] = co[hpos].data[j];
			}
		}

		/* calculate first deltas */
		if (dodelta1 && (hpos >= 4)) {
			for (i=0;i<nbase;i++) {
				sum=0;
				for (j=1;j<=2;j++)
					sum += j*(co[hpos-2+j].data[i] - co[hpos-2-j].data[i]);
				co[hpos-2].data[i+nbase] = sum/10;
			}
			if (delta1first) {
				/* first time through fill buffer */
				for (i=0;i<hpos-2;i++)
					for (j=0;j<nbase;j++)
						co[i].data[nbase+j] = co[hpos-2].data[nbase+j];
				delta1first=0;
			}
		}

		/* calculate second deltas */
		if (dodelta2 && (hpos >= 6)) {
			for (i=0;i<nbase;i++) {
				sum=0;
				for (j=1;j<=2;j++)
					sum += j*(co[hpos-4+j].data[nbase+i] - co[hpos-4-j].data[nbase+i]);
				co[hpos-4].data[i+2*nbase] = sum/10;
			}
		}

		/* write CO record */
		if (dodelta2) {
			if (hpos >= 6) {
				if (sfswrite(ofid,1,&co[2])!=1)
					error("output write error on '%s'",filename);
				for (i=1;i<histlen;i++) {
					co[i-1].posn = co[i].posn;
					co[i-1].size = co[i].size;
					co[i-1].gain = co[i].gain;
					co[i-1].mix = co[i].mix;
					co[i-1].flag = co[i].flag;
					for (j=0;j<nparam;j++)
						co[i-1].data[j] = co[i].data[j];
				}
			}
			else
				hpos++;
		}
		else if (dodelta1) {
			if (hpos >= 4) {
				if (sfswrite(ofid,1,&co[2])!=1)
					error("output write error on '%s'",filename);
				for (i=1;i<histlen;i++) {
					co[i-1].posn = co[i].posn;
					co[i-1].size = co[i].size;
					co[i-1].gain = co[i].gain;
					co[i-1].mix = co[i].mix;
					co[i-1].flag = co[i].flag;
					for (j=0;j<nparam;j++)
						co[i-1].data[j] = co[i].data[j];
				}
			}
			else
				hpos++;
		}
		else {
			if (sfswrite(ofid,1,co)!=1)
				error("output write error on '%s'",filename);
		}

		++nframe;
		if (ttytest() && ((nframe%50)==0)) {
			printf("%d/%d frames\r",nframe,totframes);
			fflush(stdout);
		}
	}

	/* flush remaining records */
	if (dodelta2) {
		for (k=2;k<hpos-2;k++) {
			/* calculate first deltas */
			for (i=0;i<nbase;i++) {
				sum=0;
				for (j=1;j<=2;j++)
					sum += j*(co[hpos-2+j].data[i] - co[hpos-2-j].data[i]);
				co[hpos-2].data[i+nbase] = sum/10;
			}

			/* calculate second deltas */
			if (dodelta2 && (hpos >= 6)) {
				for (i=0;i<nbase;i++) {
					sum=0;
					for (j=1;j<=2;j++)
						sum += j*(co[hpos-4+j].data[nbase+i] - co[hpos-4-j].data[nbase+i]);
					co[hpos-4].data[i+2*nbase] = sum/10;
				}
			}

			if (sfswrite(ofid,1,&co[2])!=1)
				error("output write error on '%s'",filename);
			for (i=1;i<histlen;i++) {
				co[i-1].posn = co[i].posn;
				co[i-1].size = co[i].size;
				co[i-1].gain = co[i].gain;
				co[i-1].mix = co[i].mix;
				co[i-1].flag = co[i].flag;
				for (j=0;j<nparam;j++)
					co[i-1].data[j] = co[i].data[j];
			}
		}
		for (;k<hpos;k++) {
			/* duplicate first deltas */
			for (i=0;i<nbase;i++) {
				co[hpos-2].data[i+nbase] = co[hpos-3].data[i+nbase];
			}

			/* calculate second deltas */
			if (dodelta2 && (hpos >= 6)) {
				for (i=0;i<nbase;i++) {
					sum=0;
					for (j=1;j<=2;j++)
						sum += j*(co[hpos-4+j].data[nbase+i] - co[hpos-4-j].data[nbase+i]);
					co[hpos-4].data[i+2*nbase] = sum/10;
				}
			}

			if (sfswrite(ofid,1,&co[2])!=1)
				error("output write error on '%s'",filename);
			for (i=1;i<histlen;i++) {
				co[i-1].posn = co[i].posn;
				co[i-1].size = co[i].size;
				co[i-1].gain = co[i].gain;
				co[i-1].mix = co[i].mix;
				co[i-1].flag = co[i].flag;
				for (j=0;j<nparam;j++)
					co[i-1].data[j] = co[i].data[j];
			}
		}
	}
	else if (dodelta1) {
		for (k=2;k<hpos;k++) {
			/* calculate first deltas */
			if (dodelta1 && (hpos >= 4)) {
				for (i=0;i<nbase;i++) {
					sum=0;
					for (j=1;j<=2;j++)
						sum += j*(co[hpos-2+j].data[i] - co[hpos-2-j].data[i]);
					co[hpos-2].data[i+nbase] = sum/10;
				}
			}

			if (sfswrite(ofid,1,&co[2])!=1)
				error("output write error on '%s'",filename);
			for (i=1;i<histlen;i++) {
				co[i-1].posn = co[i].posn;
				co[i-1].size = co[i].size;
				co[i-1].gain = co[i].gain;
				co[i-1].mix = co[i].mix;
				co[i-1].flag = co[i].flag;
				for (j=0;j<nparam;j++)
					co[i-1].data[j] = co[i].data[j];
			}
		}
	}

	if (ttytest())
		printf("                          \r");

	/* update and exit */
	if (!sfsupdate(filename))
		error("update error on '%s'",filename);

	exit(0);
}


/*====================================================================================*/

/* HTK routines for MFCC calculation for comparison */

#define PI   3.14159265358979
#define TPI  6.28318530717959     /* PI*2 */
#define LZERO  (-1.0E10)   /* ~log(0) */
#define LSMALL (-0.5E10)   /* log values < LSMALL are set to LZERO */
#define MINEARG (-708.3)   /* lowest exp() arg  = log(MINLARG) */
#define MINLARG 2.45E-308  /* lowest log() arg  = exp(MINEARG) */
#define TRUE 1
#define FALSE 0

typedef float *Vector;     /* vector[1..size]   */

/* EXPORT->CreateVector:  Allocate space for vector v[1..size] */
Vector CreateVector(int size)
{
   Vector v;
   int *i;

   v = (Vector)calloc(size+1,sizeof(float));
   i = (int *) v; *i = size;
   return v;
}

void FreeVector(Vector v)
{
	free(v);
}

/* EXPORT->VectorSize: returns number of components in v */
int VectorSize(Vector v)
{
   int *i;

   i = (int *) v;
   return *i;
}

/* EXPORT->ZeroVector: Zero the elements of v */
void ZeroVector(Vector v)
{
   int i,n;

   n=VectorSize(v);
   for (i=1;i<=n;i++) v[i]=0.0;
}

/* ZeroMeanFrame: remove dc offset from given vector */
void ZeroMeanFrame(Vector v)
{
   int size,i;
   float sum=0.0,off;

   size = VectorSize(v);
   for (i=1; i<=size; i++) sum += v[i];
   off = sum / size;
   for (i=1; i<=size; i++) v[i] -= off;
}

static int hamWinSize = 0;          /* Size of current Hamming window */
static Vector hamWin = NULL;        /* Current Hamming window */

/* GenHamWindow: generate precomputed Hamming window function */
static void GenHamWindow (int frameSize)
{
   int i;
   float a;

   if (hamWin==NULL || VectorSize(hamWin) < frameSize)
      hamWin = CreateVector(frameSize);
   a = TPI / (frameSize - 1);
   for (i=1;i<=frameSize;i++)
      hamWin[i] = 0.54 - 0.46 * cos(a*(i-1));
   hamWinSize = frameSize;
}

/* EXPORT->Ham: Apply Hamming Window to Speech frame s */
void Ham (Vector s)
{
   int i,frameSize;

   frameSize=VectorSize(s);
   if (hamWinSize != frameSize)
      GenHamWindow(frameSize);
   for (i=1;i<=frameSize;i++)
      s[i] *= hamWin[i];
}

/* EXPORT->PreEmphasise: pre-emphasise signal in s */
void PreEmphasise (Vector s, float k)
{
   int i;
   float preE;

   preE = k;
   for (i=VectorSize(s);i>=2;i--)
      s[i] -= s[i-1]*preE;
   s[1] *= 1.0-preE;
}

/* EXPORT-> FFT: apply fft/invfft to complex s */
void FFT(Vector s, int invert)
{
   int ii,jj,n,nn,limit,m,j,inc,i;
   double wx,wr,wpr,wpi,wi,theta;
   double xre,xri,x;

   n=VectorSize(s);
   nn=n / 2; j = 1;
   for (ii=1;ii<=nn;ii++) {
      i = 2 * ii - 1;
      if (j>i) {
         xre = s[j]; xri = s[j + 1];
         s[j] = s[i];  s[j + 1] = s[i + 1];
         s[i] = xre; s[i + 1] = xri;
      }
      m = n / 2;
      while (m >= 2  && j > m) {
         j -= m; m /= 2;
      }
      j += m;
   };
   limit = 2;
   while (limit < n) {
      inc = 2 * limit; theta = TPI / limit;
      if (invert) theta = -theta;
      x = sin(0.5 * theta);
      wpr = -2.0 * x * x; wpi = sin(theta);
      wr = 1.0; wi = 0.0;
      for (ii=1; ii<=limit/2; ii++) {
         m = 2 * ii - 1;
         for (jj = 0; jj<=(n - m) / inc;jj++) {
            i = m + jj * inc;
            j = i + limit;
            xre = wr * s[j] - wi * s[j + 1];
            xri = wr * s[j + 1] + wi * s[j];
            s[j] = s[i] - xre; s[j + 1] = s[i + 1] - xri;
            s[i] = s[i] + xre; s[i + 1] = s[i + 1] + xri;
         }
         wx = wr;
         wr = wr * wpr - wi * wpi + wr;
         wi = wi * wpr + wx * wpi + wi;
      }
      limit = inc;
   }
   if (invert)
      for (i = 1;i<=n;i++)
         s[i] = s[i] / nn;

}

/* EXPORT-> Realft: apply fft to real s */
void Realft (Vector s)
{
   int n, n2, i, i1, i2, i3, i4;
   double xr1, xi1, xr2, xi2, wrs, wis;
   double yr, yi, yr2, yi2, yr0, theta, x;

   n=VectorSize(s) / 2; n2 = n/2;
   theta = PI / n;
   FFT(s, FALSE);
   x = sin(0.5 * theta);
   yr2 = -2.0 * x * x;
   yi2 = sin(theta); yr = 1.0 + yr2; yi = yi2;
   for (i=2; i<=n2; i++) {
      i1 = i + i - 1;      i2 = i1 + 1;
      i3 = n + n + 3 - i2; i4 = i3 + 1;
      wrs = yr; wis = yi;
      xr1 = (s[i1] + s[i3])/2.0; xi1 = (s[i2] - s[i4])/2.0;
      xr2 = (s[i2] + s[i4])/2.0; xi2 = (s[i3] - s[i1])/2.0;
      s[i1] = xr1 + wrs * xr2 - wis * xi2;
      s[i2] = xi1 + wrs * xi2 + wis * xr2;
      s[i3] = xr1 - wrs * xr2 + wis * xi2;
      s[i4] = -xi1 + wrs * xi2 + wis * xr2;
      yr0 = yr;
      yr = yr * yr2 - yi  * yi2 + yr;
      yi = yi * yr2 + yr0 * yi2 + yi;
   }
   xr1 = s[1];
   s[1] = xr1 + s[2];
   s[2] = 0.0;
}

/* -------------------- MFCC Related Operations -------------------- */

typedef struct{
   int frameSize;       /* speech frameSize */
   int numChans;        /* number of channels */
   long sampPeriod;     /* sample period */
   int fftN;            /* fft size */
   int klo,khi;         /* lopass to hipass cut-off fft indices */
   int usePower;    /* use power rather than magnitude */
   int takeLogs;    /* log filterbank channels */
   float fres;          /* scaled fft resolution */
   Vector cf;           /* array[1..pOrder+1] of centre freqs */
   short * loChan;     /* array[1..fftN/2] of loChan index */
   Vector loWt;         /* array[1..fftN/2] of loChan weighting */
   Vector x;            /* array[1..fftN] of fftchans */
} FBankInfo;

/* EXPORT->Mel: return mel-frequency corresponding to given FFT index */
float Mel(int k,float fres)
{
   return 1127 * log(1 + (k-1)*fres);
}

/* EXPORT->InitFBank: Initialise an FBankInfo record */
FBankInfo InitFBank(int frameSize, long sampPeriod, int numChans,
                    float lopass, float hipass, int usePower, int takeLogs)
{
   FBankInfo fb;
   float mlo,mhi,ms,melk;
   int k,chan,maxChan,Nby2;

   /* Save sizes to cross-check subsequent usage */
   fb.frameSize = frameSize; fb.numChans = numChans;
   fb.sampPeriod = sampPeriod;
   fb.usePower = usePower; fb.takeLogs = takeLogs;
   /* Calculate required FFT size */
   fb.fftN = 2;
   while (frameSize>fb.fftN) fb.fftN *= 2;
   Nby2 = fb.fftN / 2;
   fb.fres = 1.0E7/(sampPeriod * fb.fftN * 700.0);
   maxChan = numChans+1;
   /* set lo and hi pass cut offs if any */
   fb.klo = 2; fb.khi = Nby2;       /* apply lo/hi pass filtering */
   mlo = 0; mhi = Mel(Nby2+1,fb.fres);
   if (lopass>=0.0) {
      mlo = 1127*log(1+lopass/700.0);
      fb.klo = (int) ((lopass * sampPeriod * 1.0e-7 * fb.fftN) + 2.5);
      if (fb.klo<2) fb.klo = 2;
   }
   if (hipass>=0.0) {
      mhi = 1127*log(1+hipass/700.0);
      fb.khi = (int) ((hipass * sampPeriod * 1.0e-7 * fb.fftN) + 0.5);
      if (fb.khi>Nby2) fb.khi = Nby2;
   }
   if (verbose) {
      printf("FFT passband %d to %d out of 1 to %d\n",fb.klo,fb.khi,Nby2);
      printf("Mel passband %f to %f\n",mlo,mhi);
   }
   /* Create vector of fbank centre frequencies */
   fb.cf = CreateVector(maxChan);
   ms = mhi - mlo;
   for (chan=1;chan<=maxChan;chan++)
      fb.cf[chan] = ((float)chan/(float)maxChan)*ms + mlo;
   /* Create loChan map, loChan[fftindex] -> lower channel index */
   fb.loChan = (short *)calloc(Nby2+1,sizeof(short));
   for (k=1,chan=1; k<=Nby2; k++){
      melk = Mel(k,fb.fres);
      if (k<fb.klo || k>fb.khi) fb.loChan[k]=-1;
      else {
         while (fb.cf[chan] < melk  && chan<=maxChan) ++chan;
         fb.loChan[k] = chan-1;
      }
   }
   /* Create vector of lower channel weights */
   fb.loWt = CreateVector(Nby2);
   for (k=1; k<=Nby2; k++) {
      chan = fb.loChan[k];
      if (k<fb.klo || k>fb.khi) fb.loWt[k]=0.0;
      else {
         if (chan>0)
            fb.loWt[k] = ((fb.cf[chan+1] - Mel(k,fb.fres)) /
                          (fb.cf[chan+1] - fb.cf[chan]));
         else
            fb.loWt[k] = (fb.cf[1]-Mel(k,fb.fres))/(fb.cf[1] - mlo);
      }
   }
   /* Create workspace for fft */
   fb.x = CreateVector(fb.fftN);
   return fb;
}

/* EXPORT->Wave2FBank:  Perform filterbank analysis on speech s */
void Wave2FBank(Vector s, Vector fbank, float *te, FBankInfo info)
{
   const float melfloor = 1.0;
   int k,bin;
   float t1,t2;   /* real and imag parts */
   float ek;      /* energy of k'th fft channel */

   /* Check that info record is compatible */
   if (info.frameSize != VectorSize(s))
      fprintf(stderr,"Wave2FBank: frame size mismatch\n");
   if (info.numChans != VectorSize(fbank))
      fprintf(stderr,"Wave2FBank: num channels mismatch\n");
   /* Compute frame energy if needed */
   if (te != NULL){
      *te = 0.0;
      for (k=1; k<=info.frameSize; k++)
         *te += (s[k]*s[k]);
   }
   /* Apply FFT */
   for (k=1; k<=info.frameSize; k++)
      info.x[k] = s[k];    /* copy to workspace */
   for (k=info.frameSize+1; k<=info.fftN; k++)
      info.x[k] = 0.0;   /* pad with zeroes */
   Realft(info.x);                            /* take fft */
   /* Fill filterbank channels */
   ZeroVector(fbank);
   for (k = info.klo; k <= info.khi; k++) {             /* fill bins */
      t1 = info.x[2*k-1]; t2 = info.x[2*k];
      if (info.usePower)
         ek = t1*t1 + t2*t2;
      else
         ek = sqrt(t1*t1 + t2*t2);
      bin = info.loChan[k];
      t1 = info.loWt[k]*ek;
      if (bin>0) fbank[bin] += t1;
      if (bin<info.numChans) fbank[bin+1] += ek - t1;
   }
   /* Take logs */
   if (info.takeLogs)
      for (bin=1; bin<=info.numChans; bin++) {
         t1 = fbank[bin];
         if (t1<melfloor) t1 = melfloor;
         fbank[bin] = log(t1);
      }
}

/* EXPORT->FBank2MFCC: compute first n cepstral coeff */
void FBank2MFCC(Vector fbank, Vector c, int n)
{
   int j,k,numChan;
   float mfnorm,pi_factor,x;

   numChan = VectorSize(fbank);
   mfnorm = sqrt(2.0/(float)numChan);
   pi_factor = PI/(float)numChan;
   for (j=1; j<=n; j++)  {
      c[j] = 0.0; x = (float)j * pi_factor;
      for (k=1; k<=numChan; k++)
         c[j] += fbank[k] * cos(x*(k-0.5));
      c[j] *= mfnorm;
   }
}

/* EXPORT->FBank2C0: return zero'th cepstral coefficient */
float FBank2C0(Vector fbank)
{
   int k,numChan;
   float mfnorm,sum;

   numChan = VectorSize(fbank);
   mfnorm = sqrt(2.0/(float)numChan);
   sum = 0.0;
   for (k=1; k<=numChan; k++)
      sum += fbank[k];
   return sum * mfnorm;
}

/* ------------------- Feature Level Operations -------------------- */

static int cepWinSize=0;            /* Size of current cepstral weight window */
static int cepWinL=0;               /* Current liftering coeff */
static Vector cepWin = NULL;        /* Current cepstral weight window */

/* GenCepWin: generate a new cep liftering vector */
static void GenCepWin (int cepLiftering, int count)
{
   int i;
   float a, Lby2;

   if (cepWin==NULL || VectorSize(cepWin) < count)
      cepWin = CreateVector(count);
   a = PI/cepLiftering;
   Lby2 = cepLiftering/2.0;
   for (i=1;i<=count;i++)
      cepWin[i] = 1.0 + Lby2*sin(i * a);
   cepWinL = cepLiftering;
   cepWinSize = count;
}

/* EXPORT->WeightCepstrum: Apply cepstral weighting to c */
void WeightCepstrum (Vector c, int start, int count, int cepLiftering)
{
   int i,j;

   if (cepWinL != cepLiftering || count > cepWinSize)
      GenCepWin(cepLiftering,count);
   j = start;
   for (i=1;i<=count;i++)
      c[j++] *= cepWin[i];
}


static FBankInfo fbInfo;
static int	fbInfo_init=0;

/* ConvertFrame: convert frame in cf->s and store in pbuf, return total
   parameters stored in pbuf */
int HTKConvertFrame(short *sig, int len, float *pbuf,int ncoeff)
{
   float re,rawte,te,*p;
   int i,bsize;
   Vector s;
   Vector v;
   Vector c;
   Vector fbank;
   char buf[50];
   int rawE;

	s=CreateVector(len);
	for (i=1;i<=len;i++) s[i]=(float)(*sig++);
	fbank=CreateVector(numchan);
	c=CreateVector(ncoeff);

	if (!fbInfo_init) {
     fbInfo = InitFBank(len,(long)(10000000 * spitem.frameduration), numchan, lofreq,
                             hifreq, 0, 1);
     fbInfo_init=1;
    }

   p = pbuf;


   if (htk_dozeromean)
      ZeroMeanFrame(s);

	if (doenergy) {
     rawte = 0.0;
     for (i=1; i<=VectorSize(s); i++)
        rawte += s[i] * s[i];
	}
   if (preemp > 0)
      PreEmphasise(s,preemp);
   if (htk_dohamming) Ham(s);

      Wave2FBank(s, fbank, &te, fbInfo);
      FBank2MFCC(fbank, c, ncoeff);
      if (cepslifter > 0)
         WeightCepstrum(c, 1, ncoeff, cepslifter);
      v = c; bsize = ncoeff;

   for (i=1; i<=bsize; i++) *p++ = v[i];

   if (htk_dozeroc){
      *p = FBank2C0(fbank);
      ++p;
   }
   if (doenergy) {
      *p++ = (rawte<MINLARG) ? LZERO : log(rawte);
   }

	FreeVector(s);
	FreeVector(fbank);
	FreeVector(c);

   return p - pbuf;
}
