/* mfcc - Mel-scaled Cepstral Coefficients analysis */

/* Mark Huckvale - University College London */

/* version 1.0 - June 1996 */

/* version 1.1 - July 1997
	- add deltas
*/

#define PROGNAME "mfcc"
#define PROGVERS "1.1"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH MFCC 1 UCL SFS
.SH NAME
mfcc - compute mel-scaled cepstral coefficients
.SH SYNOPSIS
.B mfcc
(-i item) (-n ncoeff) (-l low_freq) (-h high_freq) (-r frame_rate) (-e|-F) (-p) (-d1|-d2) sfsfile
.SH DESCRIPTION
.I mfcc
designs a mel-scaled filterbank for the supplied number of
channels and processes the input speech signal through the filterbank into a set
of mel-scaled cepstral coefficients.  The method is based on that
described in Deller, et al, Discrete Time Processing of
Speech Signals.  It uses a Hamming window and a forward FFT and and a specialised
inverse DFT.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -n ncoeff
Select number of coeffients to calculate.  Default: 16.
.TP 11
.BI -l low_frequency
Specify lowest frequency of filterbank.  Default: 0Hz.
.TP 11
.BI -h high_frequency
Specify highest frequency of filterbank.  Default: half input sample rate.
.TP 11
.BI -r frame_rate
Specify frame rate for output.  Default 100 frames/sec.
.TP 11
.B -e
Append the log signal energy to the output frame.
.TP 11
.B -F
Store filter output rather than cepstral coefficents;
.TP 11
.B -p
Do not pre-emphasise signal.
.TP 11
.BI -d1
Add first order delta cepstral coeffs to output vector.
.TP 11
.BI -d2
Add first and second order delta cepstral coeffs to output vector.
.SH INPUT ITEMS
.IP 1.xx 11
Any speech waveform.
.SH OUTPUT ITEMS
.IP 11 11
Mel-scaled cepstral coefficients.
.SH VERSION/AUTHOR
1.1 - Mark Huckvale
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <math.h>
#include "sfs.h"
#include "fft.h"

/* default parameters */
#define DEFAULT_NCOEFF		16
#define DEFAULT_LOW_FREQ	0.0
#define DEFAULT_FRAME_RATE	100
#define PREEMP			0.95

/* buffers */
struct item_header		spitem;
struct item_header		coitem;
short				*sp;
struct co_rec			*co;
float				*hamming;	/* Hamming window */
float				*dftbuf;	/* DFT buffer */
float				*chan;		/* filterbank channels */

/* options */
char	filename[SFSMAXFILENAME];
int	ncoeff=DEFAULT_NCOEFF;		/* # cepstral coefficients */
int	nbase=DEFAULT_NCOEFF;		/* # base parameters */
int	nparam=DEFAULT_NCOEFF;		/* # parameters in vector */
double	lofreq=DEFAULT_LOW_FREQ;	/* first filter freq start */
double	hifreq=-1;			/* last filter freq end */
double	frate=DEFAULT_FRAME_RATE;	/* frame rate */
int	dofilter=0;			/* output mel-scale filter, instead
					   of cepstral coefficients */
int	doenergy=0;			/* add energy */
int	verbose=0;			/* list filter design */
int	dopreemp=1;			/* signal pre-emphasis */
int	dodelta1=0;			/* do first deltas */
int	dodelta2=0;			/* do second deltas */
int	nfilter;			/* # filters */
int	winsize;			/* analysis window size */
int	stpsize;			/* step size */
int	fftsize;			/* FFT size */
int	histlen=1;			/* size of history */
int	outpos=0;			/* output vector position in history */

/* filter bank parameters */
struct filter_rec {
	int	lidx;	/* low index */
	int	cidx;	/* centre index */
	int	hidx;	/* high index */
	double	*win;	/* FFT weighting window */
} *filtab;
char	filspec[1024];

double mylog10(double val)
{
	return (val < 1.0E-10) ? -10 : log10(val);
}
double HertzToMel(double hz)
{
	return (1000.0/log(2.0)) * log(1.0 + hz/1000.0);
}

double MelToHertz(double mel)
{
	return 1000 * (exp((log(2.0)/1000)*mel)-1.0);
}

/* design filter bank */
int designfbank(lf,hf,nfft,srate)
double	lf,hf;
int	nfft;
double	srate;
{
	int	i,j;
	double	fhz,fmel;
	int	nfilt;

	/* estimate number of filters required 300 mel width, 150 mel spacing */
	nfilt = (int)(1+(HertzToMel(hf) - HertzToMel(lf))/150);
	
	/* get memory for filter structures */
	filtab = calloc(nfilt,sizeof(struct filter_rec));
	for (i=0;i<nfilt;i++)
		if ((filtab[i].win = (double *)calloc(nfft/2,sizeof(double)))==NULL)
			error("could not get memory");

	/* fit triangular filters between limits */
	lf = MelToHertz(HertzToMel(lf)+150);
	hf = MelToHertz(HertzToMel(hf)-150);
	fhz = lf;
	nfilt=0;
	while (fhz <= hf) {
		fmel = HertzToMel(fhz);
		filtab[nfilt].lidx = (int)(0.5+nfft*(MelToHertz(fmel-150)/srate));
		filtab[nfilt].cidx = (int)(0.5+nfft*(MelToHertz(fmel)/srate));
		filtab[nfilt].hidx = (int)(0.5+nfft*(MelToHertz(fmel+150)/srate));
		fhz = MelToHertz(fmel+150);
		nfilt++;
	}

	/* create triangular weighting windows */
	for (i=0;i<nfilt;i++) {
		for (j=filtab[i].lidx;j<filtab[i].cidx;j++) {
			filtab[i].win[j] = 1.0 - (double)(filtab[i].cidx-j)/(double)(filtab[i].cidx-filtab[i].lidx+1);
		}
		for (j=filtab[i].cidx;j<=filtab[i].hidx;j++) {
			filtab[i].win[j] = 1.0 - (double)(j-filtab[i].cidx)/(double)(filtab[i].hidx-filtab[i].cidx+1);
		}
	}
	
	return(nfilt);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	extern int	optind;
	extern char	*optarg;
	int		c,errflg=0;

	int		fid,ofid;
	int		it;
	char		*ty;
	char		*sptype="0";
	int		i,j;
	double		srate;
	int		pos;
	double		omega;
	double		sum;
	double		sumsq;
	char		tbuf[32];
	int		nframe;
	int		totframes;
	int		hpos;
	int		delta1first=1,delta2first=1;
	
	while ((c=getopt(argc,argv,"Ii:n:l:h:r:evFpd:"))!=EOF) switch (c) {
	case 'I':
		fprintf(stderr,"%s: Mel-scaled cepstral coefficients V%s\n",
			PROGNAME,PROGVERS);
		exit(0);
	case 'i':
		if (itspec(optarg,&it,&ty)==0) {
			if (it==SP_TYPE)
				sptype = ty;
			else
				error("unsuitable item type '%s'",optarg);
		}
		else
			error("illegal item selection '%s'",optarg);
		break;
	case 'n':
		ncoeff = atoi(optarg);
		break;
	case 'l':
		lofreq = atof(optarg);
		break;
	case 'h':
		hifreq = atof(optarg);
		break;
	case 'r':
		frate = atof(optarg);
		break;
	case 'e':
		doenergy=1;
		break;
	case 'v':
		verbose=1;
		break;
	case 'F':
		dofilter=1;
		break;
	case 'p':
		dopreemp=0;
		break;
	case 'd':
		if (atoi(optarg)==1)
			dodelta1=1;
		else if (atoi(optarg)==2) {
			dodelta1=1;
			dodelta2=1;
		}
		else
			error("-d1 or -d2 only supported");
		break;
	case '?':
		errflg++;
	}
	if (errflg || (argc < 2))
		error("usage: %s (-I) (-i item) (-n nceoff) (-l lofreq) (-h hifreq) (-r framerate) (-e|-F) (-v) (-p) (-d1|-d2) sfsfile",PROGNAME);

	if (dofilter) {
		doenergy=0;
		dodelta1=0;
		dodelta2=0;
	}

	if (doenergy) nbase=ncoeff+1; else nbase=ncoeff;
	if (dodelta2) {
		nparam=3*nbase;
		histlen=9;
		hpos=4;
	}
	else if (dodelta1) {
		nparam=2*nbase;
		histlen=5;
		hpos=2;
	}
	else {
		nparam=nbase;
		histlen=1;
		hpos=0;
	}
	
	/* get SFS file name */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no input file");

	/* open it */
	if ((fid=sfsopen(filename,"w",NULL)) < 0)
		error("access error on '%s'",filename);

	/* find input item */
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("could not find input speech item in '%s'",filename);

	/* set up processing parameters */
	srate = 1.0/spitem.frameduration;
	if (hifreq < 0)
		hifreq = srate/2;
	else if (hifreq > srate/2)
		error("high frequency limit above half sample rate");
	
	stpsize = (int)(0.5+srate/frate);
	winsize = 2*stpsize;
	fftsize = 512;		/* minimum for accurate filter positions */
	while (fftsize < winsize) fftsize *= 2;
	
	/* design filter bank */	
	nfilter = designfbank(lofreq,hifreq,fftsize,srate);

	if (verbose) {
		/* dump filter design */
		printf("Design of %d channel mel-scaled filterbank:\n",nfilter);
		for (i=0;i<nfilter;i++)
			printf("Chan %2d at %5dHz (%d - %d)\n",
				i+1,
				(int)(0.5+filtab[i].cidx*srate/fftsize),
				(int)(0.5+filtab[i].lidx*srate/fftsize),
				(int)(0.5+filtab[i].hidx*srate/fftsize));
#if 0
		for (i=0;i<nfilter;i++) {
			sum=0;
			printf("Chan %d: ",i+1);
			for (j=filtab[i].lidx;j<=filtab[i].hidx;j++) {
				sum += filtab[i].win[j];
				printf("%d-%.2f,",j,filtab[i].win[j]);
			}
			printf(",sum=%g\n",sum);
		}
#endif
	}

	/* create output channel */
	if (dofilter) {
		sfsheader(&coitem,CO_TYPE,-1,4,sfsstruct[CO_TYPE]/4+nfilter,
			spitem.frameduration,spitem.offset+1.0/frate,winsize,winsize-stpsize,0);
		sprintf(coitem.history,"%s(%d.%02d;nchannel=%d,lofreq=%g,hifreq=%g,frate=%g)",
			PROGNAME,
			spitem.datatype,spitem.subtype,
			nfilter,lofreq,hifreq,frate);
		sprintf(filspec,"labels=%d",(int)(0.5+filtab[0].lidx*srate/fftsize));
		for (i=1;i<nfilter;i++) {
			sprintf(tbuf,"|%d",(int)(0.5+filtab[i].lidx*srate/fftsize));
			strcat(filspec,tbuf);
		}
		strncpy(coitem.params,filspec,sizeof(coitem.params));
	}
	else {
		sfsheader(&coitem,CO_TYPE,-1,4,sfsstruct[CO_TYPE]/4+nparam,
			spitem.frameduration,spitem.offset+winsize*spitem.frameduration/2,winsize,winsize-stpsize,0);
		sprintf(coitem.history,"%s(%d.%02d;ncoeff=%d,nparam=%d,lofreq=%g,hifreq=%g,frate=%g%s%s%s)",
			PROGNAME,
			spitem.datatype,spitem.subtype,
			ncoeff,nparam,lofreq,hifreq,frate,
			(doenergy)?",energy":"",
			(dodelta1)?",delta1":"",
			(dodelta2)?",delta2":"");
		sprintf(filspec,"labels=C1");
		for (i=1;i<ncoeff;i++) {
			sprintf(tbuf,"|C%d",i+1);
			strcat(filspec,tbuf);
		}
		if (doenergy) strcat(filspec,"|E");
		if (dodelta1) {
			for (i=0;i<ncoeff;i++) {
				sprintf(tbuf,"|dC%d",i+1);
				strcat(filspec,tbuf);
			}
			if (doenergy) strcat(filspec,"|dE");
		}
		if (dodelta2) {
			for (i=0;i<ncoeff;i++) {
				sprintf(tbuf,"|DC%d",i+1);
				strcat(filspec,tbuf);
			}
			if (doenergy) strcat(filspec,"|DE");
		}
		strncpy(coitem.params,filspec,sizeof(coitem.params));
	}
	if ((ofid=sfschannel(filename,&coitem))<0)
		error("could not open output channel to '%s'",filename);

	/* get buffers */
	if ((sp=(short *)sfsbuffer(&spitem,fftsize))==NULL)
		error("could not get memory");
	if ((co=(struct co_rec *)sfsbuffer(&coitem,histlen))==NULL)
		error("could not get memory");
	if ((hamming=(float *)calloc(fftsize,sizeof(float)))==NULL)
		error("could not get memory");
	if ((dftbuf=(float *)calloc(fftsize+2,sizeof(float)))==NULL)
		error("could not get memory");
	if ((chan=(float *)calloc(nfilter,sizeof(float)))==NULL)
		error("could not get memory");
	
	/* design Hamming window */
	omega = 8.0 * atan(1.0) / (winsize - 1);
	for (i=0;i<winsize;i++)
		hamming[i] = (0.54 - (0.46 * cos(i * omega)));
	
	/* process the signal in windows */
	totframes = (spitem.numframes-winsize+stpsize)/stpsize;
	nframe=0;
	for (pos=0;sfsread(fid,pos,winsize,sp)==winsize;pos+=stpsize) {

		/* pre-emphasise and apply Hamming window */
		if (dopreemp) {
			dftbuf[0] = (sp[1] - PREEMP * sp[0]) * hamming[0];
			for (i=1;i<winsize;i++)
				dftbuf[i] = (sp[i] - PREEMP * sp[i-1]) * hamming[i];
			for (;i<fftsize+2;i++) dftbuf[i]=0.0;
		}
		else {
			for (i=0;i<winsize;i++)
				dftbuf[i] = sp[i] * hamming[i];
			for (;i<fftsize+2;i++) dftbuf[i]=0.0;
		}

		/* if energy required */
		if (doenergy) {
			sumsq=0;
			for (i=0;i<winsize;i++)
				sumsq += dftbuf[i]*dftbuf[i];
			co[hpos].data[ncoeff] = mylog10(sumsq); /* deliberately in bels */
		}

		/* do forward FFT */
		REALFFT(dftbuf,fftsize/2,FORWARD);

		/* create magnitude */
		for (i=0;i<fftsize/2;i++)
			dftbuf[i] = sqrt(dftbuf[2*i+1]*dftbuf[2*i+1]+dftbuf[2*i+2]*dftbuf[2*i+2]);

		/* form filter outputs */
		for (i=0;i<nfilter;i++) {
			chan[i] = 0.0;
			for (j=filtab[i].lidx;j<=filtab[i].hidx;j++)
				chan[i] += filtab[i].win[j] * dftbuf[j];
		}

		/* store filter energies or cepstral coeffs */
		if (dofilter) {
			for (i=0;i<nfilter;i++)
				co[hpos].data[i] = 20.0*mylog10(chan[i]);
		}
		else {
			/* inverse DFT */
			for (i=0;i<ncoeff;i++) {
				omega = 4 * atan(1.0) * (i+1) / nfilter;
				sum = 0;
				for (j=0;j<nfilter;j++)
					sum += mylog10(chan[j]) * cos((j+0.5) * omega);
				co[hpos].data[i] = sum;
			}
		}
		co[hpos].posn = pos;
		co[hpos].size = winsize;
		co[hpos].flag = 0;
		co[hpos].mix = 0.0;
		co[hpos].gain = 1.0;

		/* fill buffer if first time through */
		if (pos==0) {
			for (i=0;i<hpos;i++) {
				co[i].posn = co[hpos].posn;
				co[i].size = co[hpos].size;
				co[i].flag = co[hpos].flag;
				co[i].mix = co[hpos].mix;
				co[i].gain = co[hpos].gain;
				for (j=0;j<nbase;j++)
					co[i].data[j] = co[hpos].data[j];
			}				
		}

		/* calculate first deltas */
		if (dodelta1 && (hpos >= 4)) {
			for (i=0;i<nbase;i++) {
				sum=0;
				for (j=1;j<=2;j++)
					sum += co[hpos-2+j].data[i] - co[hpos-2-j].data[i];
				co[hpos-2].data[i+nbase] = sum/4;
			}
			if (delta1first) {
				/* first time through fill buffer */
				for (i=0;i<hpos-2;i++)
					for (j=0;j<nbase;j++)
						co[i].data[nbase+j] = co[hpos-2].data[nbase+j];
				delta1first=0;
			}
		}

		/* calculate second deltas */
		if (dodelta2 && (hpos >= 8)) {
			for (i=0;i<nbase;i++) {
				sum=0;
				for (j=1;j<=2;j++)
					sum += co[hpos-4+j].data[nbase+i] - co[hpos-4-j].data[nbase+i];
				co[hpos-4].data[i+2*nbase] = sum/4;
			}
			if (delta2first) {
				/* first time through fill buffer */
				for (i=0;i<hpos-4;i++)
					for (j=0;j<nbase;j++)
						co[i].data[2*nbase+j] = co[hpos-4].data[2*nbase+j];
				delta2first=0;
			}
		}

		/* write CO record */
		if (dodelta2) {
			if (hpos >= 8) {
				if (sfswrite(ofid,1,&co[4])!=1)
					error("output write error on '%s'",filename);
				for (i=1;i<=8;i++) {
					co[i-1].posn = co[i].posn;
					co[i-1].size = co[i].size;
					co[i-1].gain = co[i].gain;
					co[i-1].mix = co[i].mix;
					co[i-1].flag = co[i].flag;
					for (j=0;j<nparam;j++)
						co[i-1].data[j] = co[i].data[j];
				}
			}
			else
				hpos++;
		}
		else if (dodelta1) {
			if (hpos >= 4) {
				if (sfswrite(ofid,1,&co[2])!=1)
					error("output write error on '%s'",filename);
				for (i=1;i<=4;i++) {
					co[i-1].posn = co[i].posn;
					co[i-1].size = co[i].size;
					co[i-1].gain = co[i].gain;
					co[i-1].mix = co[i].mix;
					co[i-1].flag = co[i].flag;
					for (j=0;j<nparam;j++)
						co[i-1].data[j] = co[i].data[j];
				}
			}
			else
				hpos++;
		}
		else {
			if (sfswrite(ofid,1,co)!=1)
				error("output write error on '%s'",filename);
		}

		++nframe;
		if (ttytest() && ((nframe%50)==0)) {
			printf("%d/%d frames\r",nframe,totframes);
			fflush(stdout);
		}
	}
	if (ttytest())
		printf("                          \r");

	/* update and exit */
	if (!sfsupdate(filename))
		error("update error on '%s'",filename);

	exit(0);
}
