/* mfcc - "standard" MFCC calculation routines */

/*
 * These routines implement the MFCC calculation as defined in HTK
 * They borrow heavily and freely from HTK code
 * see HTK for copyright permissions, etc
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "sfs.h"
#include "mfcc.h"

#define PI   3.14159265358979
#define TPI  6.28318530717959     /* PI*2 */
#define LZERO  (-10)   /* ~log(0) */
#define LSMALL (-0.5E10)   /* log values < LSMALL are set to LZERO */
#define MINEARG (-708.3)   /* lowest exp() arg  = log(MINLARG) */
#define MINLARG 1E-10  /* lowest log() arg  = exp(MINEARG) */
#define TRUE 1
#define FALSE 0

typedef float *Vector;     /* vector[1..size]   */

static int hamWinSize = 0;          /* Size of current Hamming window */
static Vector hamWin = NULL;        /* Current Hamming window */
static int cepWinSize=0;            /* Size of current cepstral weight window */
static int cepWinL=0;               /* Current liftering coeff */
static Vector cepWin = NULL;        /* Current cepstral weight window */

static int htk_dozeromean=0;
static int htk_dozeroc=0;
static int htk_doenergy=0;
static int htk_dohamming=1;
static double	preemp=0.97;
static int		cepslifter=23;	/* cepstral liftering */
static int		numchan=20;		/* number of filterbank channels */
static double	lofreq=0;	/* first filter freq start */
static double	hifreq=-1;			/* last filter freq end */

typedef struct{
   int frameSize;       /* speech frameSize */
   int numChans;        /* number of channels */
   long sampPeriod;     /* sample period */
   int fftN;            /* fft size */
   int klo,khi;         /* lopass to hipass cut-off fft indices */
   int usePower;    /* use power rather than magnitude */
   int takeLogs;    /* log filterbank channels */
   float fres;          /* scaled fft resolution */
   Vector cf;           /* array[1..pOrder+1] of centre freqs */
   short * loChan;     /* array[1..fftN/2] of loChan index */
   Vector loWt;         /* array[1..fftN/2] of loChan weighting */
   Vector x;            /* array[1..fftN] of fftchans */
} FBankInfo;

static FBankInfo fbInfo;
static int	fbInfo_init=0;
static int fbInfo_len=0;
static int fbInfo_ncoeff=0;
static double fbInfo_fdur=0;

/* EXPORT->CreateVector:  Allocate space for vector v[1..size] */
static Vector CreateVector(int size)
{
   Vector v;
   int *i;

   v = (Vector)calloc(size+1,sizeof(float));
   i = (int *) v; *i = size;
   return v;
}

static void FreeVector(Vector *v)
{
	if (*v!=NULL) free(*v);
	*v=NULL;
}

/* EXPORT->VectorSize: returns number of components in v */
static int VectorSize(Vector v)
{
   int *i;

   i = (int *) v;
   return *i;
}

/* EXPORT->ZeroVector: Zero the elements of v */
static void ZeroVector(Vector v)
{
   int i,n;

   n=VectorSize(v);
   for (i=1;i<=n;i++) v[i]=0.0;
}

/* ZeroMeanFrame: remove dc offset from given vector */
static void ZeroMeanFrame(Vector v)
{
   int size,i;
   float sum=0.0,off;

   size = VectorSize(v);
   for (i=1; i<=size; i++) sum += v[i];
   off = sum / size;
   for (i=1; i<=size; i++) v[i] -= off;
}

/* GenHamWindow: generate precomputed Hamming window function */
static void GenHamWindow (int frameSize)
{
   int i;
   float a;

   if (hamWin==NULL || VectorSize(hamWin) < frameSize)
      hamWin = CreateVector(frameSize);
   a = (float)(TPI / (frameSize - 1));
   for (i=1;i<=frameSize;i++)
      hamWin[i] = (float)(0.54 - 0.46 * cos(a*(i-1)));
   hamWinSize = frameSize;
}

/* EXPORT->Ham: Apply Hamming Window to Speech frame s */
static void Ham (Vector s)
{
   int i,frameSize;

   frameSize=VectorSize(s);
   if (hamWinSize != frameSize)
      GenHamWindow(frameSize);
   for (i=1;i<=frameSize;i++)
      s[i] *= hamWin[i];
}

/* EXPORT->PreEmphasise: pre-emphasise signal in s */
static void PreEmphasise (Vector s, float k)
{
   int i;
   float preE;

   preE = k;
   for (i=VectorSize(s);i>=2;i--)
      s[i] -= s[i-1]*preE;
   s[1] = (float)(s[1] * (1.0-preE));
}

/* EXPORT-> FFT: apply fft/invfft to complex s */
static void FFT(Vector s, int invert)
{
   int ii,jj,n,nn,limit,m,j,inc,i;
   double wx,wr,wpr,wpi,wi,theta;
   double xre,xri,x;

   n=VectorSize(s);
   nn=n / 2; j = 1;
   for (ii=1;ii<=nn;ii++) {
      i = 2 * ii - 1;
      if (j>i) {
         xre = s[j]; xri = s[j + 1];
         s[j] = s[i];  s[j + 1] = s[i + 1];
         s[i] = (float)xre; s[i + 1] = (float)xri;
      }
      m = n / 2;
      while (m >= 2  && j > m) {
         j -= m; m /= 2;
      }
      j += m;
   };
   limit = 2;
   while (limit < n) {
      inc = 2 * limit; theta = TPI / limit;
      if (invert) theta = -theta;
      x = sin(0.5 * theta);
      wpr = -2.0 * x * x; wpi = sin(theta);
      wr = 1.0; wi = 0.0;
      for (ii=1; ii<=limit/2; ii++) {
         m = 2 * ii - 1;
         for (jj = 0; jj<=(n - m) / inc;jj++) {
            i = m + jj * inc;
            j = i + limit;
            xre = wr * s[j] - wi * s[j + 1];
            xri = wr * s[j + 1] + wi * s[j];
            s[j] = (float)(s[i] - xre); s[j + 1] = (float)(s[i + 1] - xri);
            s[i] = (float)(s[i] + xre); s[i + 1] = (float)(s[i + 1] + xri);
         }
         wx = wr;
         wr = wr * wpr - wi * wpi + wr;
         wi = wi * wpr + wx * wpi + wi;
      }
      limit = inc;
   }
   if (invert)
      for (i = 1;i<=n;i++)
         s[i] = s[i] / nn;

}

/* EXPORT-> Realft: apply fft to real s */
static void Realft (Vector s)
{
   int n, n2, i, i1, i2, i3, i4;
   double xr1, xi1, xr2, xi2, wrs, wis;
   double yr, yi, yr2, yi2, yr0, theta, x;

   n=VectorSize(s) / 2; n2 = n/2;
   theta = PI / n;
   FFT(s, FALSE);
   x = sin(0.5 * theta);
   yr2 = -2.0 * x * x;
   yi2 = sin(theta); yr = 1.0 + yr2; yi = yi2;
   for (i=2; i<=n2; i++) {
      i1 = i + i - 1;      i2 = i1 + 1;
      i3 = n + n + 3 - i2; i4 = i3 + 1;
      wrs = yr; wis = yi;
      xr1 = (s[i1] + s[i3])/2.0; xi1 = (s[i2] - s[i4])/2.0;
      xr2 = (s[i2] + s[i4])/2.0; xi2 = (s[i3] - s[i1])/2.0;
      s[i1] = (float)(xr1 + wrs * xr2 - wis * xi2);
      s[i2] = (float)(xi1 + wrs * xi2 + wis * xr2);
      s[i3] = (float)(xr1 - wrs * xr2 + wis * xi2);
      s[i4] = (float)(-xi1 + wrs * xi2 + wis * xr2);
      yr0 = yr;
      yr = (float)(yr * yr2 - yi  * yi2 + yr);
      yi = (float)(yi * yr2 + yr0 * yi2 + yi);
   }
   xr1 = s[1];
   s[1] = (float)(xr1 + s[2]);
   s[2] = 0.0;
}

/* -------------------- MFCC Related Operations -------------------- */

/* EXPORT->Mel: return mel-frequency corresponding to given FFT index */
static float Mel(int k,float fres)
{
   return (float)(1127 * log(1 + (k-1)*fres));
}

/* EXPORT->InitFBank: Initialise an FBankInfo record */
static FBankInfo InitFBank(int frameSize, long sampPeriod, int numChans,
                    float lopass, float hipass, int usePower, int takeLogs)
{
   static FBankInfo fb;
   float mlo,mhi,ms,melk;
   int k,chan,maxChan,Nby2;

   /* Save sizes to cross-check subsequent usage */
   fb.frameSize = frameSize; fb.numChans = numChans;
   fb.sampPeriod = sampPeriod;
   fb.usePower = usePower; fb.takeLogs = takeLogs;
   /* Calculate required FFT size */
   fb.fftN = 2;
   while (frameSize>fb.fftN) fb.fftN *= 2;
   Nby2 = fb.fftN / 2;
   fb.fres = (float)(1.0E7/(sampPeriod * fb.fftN * 700.0));
   maxChan = numChans+1;
   /* set lo and hi pass cut offs if any */
   fb.klo = 2; fb.khi = Nby2;       /* apply lo/hi pass filtering */
   mlo = 0; mhi = Mel(Nby2+1,fb.fres);
   if (lopass>=0.0) {
      mlo = (float)(1127*log(1+lopass/700.0));
      fb.klo = (int) ((lopass * sampPeriod * 1.0e-7 * fb.fftN) + 2.5);
      if (fb.klo<2) fb.klo = 2;
   }
   if (hipass>=0.0) {
      mhi = (float)(1127*log(1+hipass/700.0));
      fb.khi = (int) ((hipass * sampPeriod * 1.0e-7 * fb.fftN) + 0.5);
      if (fb.khi>Nby2) fb.khi = Nby2;
   }
   /* Create vector of fbank centre frequencies */
   FreeVector(&fb.cf);
   fb.cf = CreateVector(maxChan);
   ms = mhi - mlo;
   for (chan=1;chan<=maxChan;chan++)
      fb.cf[chan] = ((float)chan/(float)maxChan)*ms + mlo;
   /* Create loChan map, loChan[fftindex] -> lower channel index */
   fb.loChan = (short *)calloc(Nby2+1,sizeof(short));
   for (k=1,chan=1; k<=Nby2; k++){
      melk = Mel(k,fb.fres);
      if (k<fb.klo || k>fb.khi) fb.loChan[k]=-1;
      else {
         while (fb.cf[chan] < melk  && chan<=maxChan) ++chan;
         fb.loChan[k] = chan-1;
      }
   }
   /* Create vector of lower channel weights */
   FreeVector(&fb.loWt);
   fb.loWt = CreateVector(Nby2);
   for (k=1; k<=Nby2; k++) {
      chan = fb.loChan[k];
      if (k<fb.klo || k>fb.khi) fb.loWt[k]=0.0;
      else {
         if (chan>0)
            fb.loWt[k] = ((fb.cf[chan+1] - Mel(k,fb.fres)) /
                          (fb.cf[chan+1] - fb.cf[chan]));
         else
            fb.loWt[k] = (fb.cf[1]-Mel(k,fb.fres))/(fb.cf[1] - mlo);
      }
   }
   /* Create workspace for fft */
   FreeVector(&fb.x);
   fb.x = CreateVector(fb.fftN);
   return fb;
}

/* EXPORT->Wave2FBank:  Perform filterbank analysis on speech s */
static void Wave2FBank(Vector s, Vector fbank, float *te, FBankInfo info)
{
   const float melfloor = 1.0E-5;
   int k,bin;
   float t1,t2;   /* real and imag parts */
   float ek;      /* energy of k'th fft channel */

   /* Check that info record is compatible */
   if (info.frameSize != VectorSize(s))
      fprintf(stderr,"Wave2FBank: frame size mismatch\n");
   if (info.numChans != VectorSize(fbank))
      fprintf(stderr,"Wave2FBank: num channels mismatch\n");
   /* Compute frame energy if needed */
   if (te != NULL){
      *te = 0.0;
      for (k=1; k<=info.frameSize; k++)
         *te += (s[k]*s[k]);
   }
   /* Apply FFT */
   for (k=1; k<=info.frameSize; k++)
      info.x[k] = s[k];    /* copy to workspace */
   for (k=info.frameSize+1; k<=info.fftN; k++)
      info.x[k] = 0.0;   /* pad with zeroes */
   Realft(info.x);                            /* take fft */
   /* Fill filterbank channels */
   ZeroVector(fbank);
   for (k = info.klo; k <= info.khi; k++) {             /* fill bins */
      t1 = info.x[2*k-1]; t2 = info.x[2*k];
      if (info.usePower)
         ek = t1*t1 + t2*t2;
      else
         ek = (float)sqrt(t1*t1 + t2*t2);
      bin = info.loChan[k];
      t1 = info.loWt[k]*ek;
      if (bin>0) fbank[bin] += t1;
      if (bin<info.numChans) fbank[bin+1] += ek - t1;
   }
   /* Take logs */
   if (info.takeLogs)
      for (bin=1; bin<=info.numChans; bin++) {
         t1 = fbank[bin];
         if (t1<melfloor) t1 = melfloor;
         fbank[bin] = (float)log(t1);
      }
}

/* EXPORT->FBank2MFCC: compute first n cepstral coeff */
static void FBank2MFCC(Vector fbank, Vector c, int n)
{
   int j,k,numChan;
   float mfnorm,pi_factor,x;

   numChan = VectorSize(fbank);
   mfnorm = (float)sqrt(2.0/(float)numChan);
   pi_factor = (float)(PI/(float)numChan);
   for (j=1; j<=n; j++)  {
      c[j] = 0.0; x = (float)j * pi_factor;
      for (k=1; k<=numChan; k++)
         c[j] = (float)(c[j] + fbank[k] * cos(x*(k-0.5)));
      c[j] = (float)(c[j] * mfnorm);
   }
}

/* EXPORT->FBank2C0: return zero'th cepstral coefficient */
static float FBank2C0(Vector fbank)
{
   int k,numChan;
   float mfnorm,sum;

   numChan = VectorSize(fbank);
   mfnorm = (float)sqrt(2.0/(float)numChan);
   sum = 0.0;
   for (k=1; k<=numChan; k++)
      sum += fbank[k];
   return sum * mfnorm;
}

/* ------------------- Feature Level Operations -------------------- */

/* GenCepWin: generate a new cep liftering vector */
static void GenCepWin (int cepLiftering, int count)
{
	int i;
	float a, Lby2;

	if (cepWin==NULL || VectorSize(cepWin) < count) {
		FreeVector(&cepWin);
		cepWin = CreateVector(count);
	}
	a = (float)(PI/cepLiftering);
	Lby2 = (float)(cepLiftering/2.0);
	for (i=1;i<=count;i++)
		cepWin[i] = (float)(1.0 + Lby2*sin(i * a));
	cepWinL = cepLiftering;
	cepWinSize = count;
}

/* EXPORT->WeightCepstrum: Apply cepstral weighting to c */
static void WeightCepstrum (Vector c, int start, int count, int cepLiftering)
{
   int i,j;

   if (cepWinL != cepLiftering || count > cepWinSize)
      GenCepWin(cepLiftering,count);
   j = start;
   for (i=1;i<=count;i++)
      c[j++] *= cepWin[i];
}


/* ConvertFrame: convert frame in cf->s and store in pbuf, return total
   parameters stored in pbuf */
int MFCC(short *sig, int len, float *pbuf,int ncoeff,double fdur)
{
	float	*fbuf=(float *)malloc(len*sizeof(float));
	int	i,res;
	for (i=0;i<len;i++) fbuf[i]=(float)(*sig++);
	res=MFCCfloat(fbuf,len,pbuf,ncoeff,fdur);
	free(fbuf);
	return(res);
}

int MFCCfloat(float *sig, int len, float *pbuf,int ncoeff,double fdur)
{
   float rawte,te,*p;
   int i,bsize;
   Vector s;
   Vector v;
   Vector c;
   Vector fbank;

	s=CreateVector(len);
	for (i=1;i<=len;i++) s[i]=*sig++;
	fbank=CreateVector(numchan);
	c=CreateVector(ncoeff);

	if (!fbInfo_init||(len!=fbInfo_len)||(ncoeff!=fbInfo_ncoeff)||(fdur!=fbInfo_fdur)) {
		fbInfo = InitFBank(len,(long)(10000000 * fdur), numchan, (float)lofreq, (float)hifreq, 0, 1);
		fbInfo_init=1;
		fbInfo_len=len;
		fbInfo_ncoeff=ncoeff;
		fbInfo_fdur=fdur;
	}

	p = pbuf;

	if (htk_dozeromean) ZeroMeanFrame(s);

	if (htk_doenergy) {
		rawte = 0.0;
		for (i=1; i<=VectorSize(s); i++) rawte += s[i] * s[i];
	}

	PreEmphasise(s,(float)preemp);

	if (htk_dohamming) Ham(s);

	Wave2FBank(s, fbank, &te, fbInfo);
	FBank2MFCC(fbank, c, ncoeff);
	if (cepslifter > 0) WeightCepstrum(c, 1, ncoeff, cepslifter);
	v = c;
	bsize = ncoeff;

	for (i=1; i<=bsize; i++) *p++ = v[i];

	if (htk_dozeroc) {
		*p = FBank2C0(fbank);
		++p;
	}
	if (htk_doenergy) {
		*p++ = (float)((rawte<MINLARG) ? LZERO : log(rawte));
	}

	FreeVector(&s);
	FreeVector(&fbank);
	FreeVector(&c);

	return p - pbuf;
}

void MFCC_Configure(int zeromean,int zeroc,int energy,int hamming)
{
	htk_dozeromean=(zeromean)?1:0;
	htk_dozeroc=(zeroc)?1:0;
	htk_doenergy=(energy)?1:0;
	htk_dohamming=(hamming)?1:0;
}

#ifdef EMO
void main(int argc,char **argv)
{
	struct item_header spitem;
	short	*sp;
	int		i,ms25;
	float	coeff[16];

	if (argc!=2) {
		fprintf(stderr,"usage: mfcc test.sfs\n");
		exit(1);
	}

	getitem(argv[1],SP_TYPE,"0",&spitem,&sp);
	ms25=(int)(0.5+0.025/spitem.frameduration);

	MFCC(sp,ms25,coeff,16,spitem.frameduration);

	printf("coeffs=");
	for (i=0;i<16;i++) printf("%g,",coeff[i]);
	printf("\n");

}
#endif
