/* fxcep -- fundamental frequency estimation by cepstral analysis, after Noll */

/* cep version 1.0 - L C Whitaker  20 august 1985         */
/* cep version 2.0 - D.M. Howard  March 1986              */
/* fxcep version 3.0 - M.A.Huckvale March 1991
	- SFS version
	- Non array-processor version
	- tidied code
*/

#define PROGNAME "fxcep"
#define PROGVERS "3.0"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH FXCEP 1 UCL/GEC SFS
.SH NAME
fxcep - Fx estimation by cepstrum algorithm
.SH SYNOPSIS
.B fxcep
(-I) (-t threshold) (-i item) file
.SH DESCRIPTION
.I fxcep
is a speech fundamental frequency measuring algorithm based on the cepstrum.
A 512 point FFT is performed on 40ms windows of the input speech to find the log spectrum, 
and then an FFT of that provides the cepstrum . Then the Noll rules are implemented 
to decide whether the input is V+ or V-, and if V+, a fundamental frequency value is determined.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and version number and exit.
.TP 11
.BI -t threshold
Threshold for cepstral peak indicating V+ or V-. 
Default set internally to 75 - value in arbitrary units 
.TP 11
.BI -i item
Select input item.
.SH INPUT ITEMS
.IP 1.xx 11
Any speech item.
.SH OUTPUT ITEMS
.IP 4.yy 11
Cepstrum Fx.
.SH VERSION/AUTHOR
.IP 1.0 11
L.C.Whitaker
.IP 2.0 11
D.M.Howard
.IP 3.0 11
M.A.Huckvale
.SH SEE ALSO
pp(SFS1), fxac(SFS1)
*/
/*--------------------------------------------------------------------------*/

#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"		/* header structures */
#include "fft.h"

/* manifest constants */
#define FRAME     	0.04		/* frame length */
#define STEP    	0.01		/* step length */
#define MAXVEC  	2048           	/* max number of points in fft */
#define HALFMAX 	1024          	/* fft is symmetrical about HALFMAX */
#define TWICEMAX 	4096		/* buffer size */
#define MAX_SAMP_RATE	48000.0		/* max sampling rate */
#define THRESHOLD	75.0		/* cepstral peak threshold */

#define MEMORY		3		/* frame memory for peak following */
#define THIS		2		/* this frame */
#define LAST		1		/* last frame */
#define PREVLAST	0		/* frame before last */

/* global data */
char	filename[SFSMAXFILENAME];			/* input file name */
struct item_header spitem;		/* speech item header data */
struct item_header fxitem;		/* output fx header */

/* buffer space */
int	framesize;			/* # samples in frame */
int	framehalf;			/* half # samples in frame */
short	wave[TWICEMAX];			/* input buffer */
float	alpha[TWICEMAX];		/* fft buffer */
float	omega[TWICEMAX];		/* cepstrum buffer */
float	window[TWICEMAX];		/* hamming window */
int	track=0;			/* tracking Fx contour */
int	ptime[MEMORY];			/* peak tracking memory */
float	pfreq[MEMORY];
float	pamp[MEMORY];

/* cepstal analysis */
void cepstrum(alpha,omega)
float	*alpha;
float	*omega;
{
	register int 	i;

	/* perform real fft */
	REALFFT(alpha,framehalf,FORWARD);

	/* find log magnitude spectrum */
	memset(omega,0,framesize*sizeof(float));
	omega[framehalf] = log(alpha[0]*alpha[0] + 1.0E-6);
	for (i=1;i<framehalf;i++) {
		/* reflected log magnitude */
		omega[framehalf-i] = 
			omega[framehalf+i] = log(alpha[2*i]*alpha[2*i] +
				   		 alpha[2*i+1]*alpha[2*i+1] + 1.0E-6);
	}
	
	/* second fft */
	REALFFT(omega,framehalf,FORWARD);

	/* calculate squared magnitude */
	for (i=0;i<framehalf;i++)
		omega[i] = (omega[2*i]*omega[2*i] + omega[2*i+1]*omega[2*i+1])/framesize;

}

int findpeak(curve,lowlimit,highlimit)
float 	*curve;
int 	lowlimit;
int 	highlimit;
{
	int 	i;
	int	maxpos=0;
	float	maxamp=0.0;

	if (lowlimit > framesize) return(0);
	if (highlimit > framesize) highlimit = framesize;

	for (i=lowlimit;i<highlimit;i++)
		if (curve[i] > maxamp) {
			maxamp = curve[i];
			maxpos = i;
		}

	return(maxpos);
}

/* ************* */
/* * c_rules.c * */
/* ************* */
/* *************************** */
/* * (c) SPAR GROUP May 1986 * */
/* *************************** */
/******************************************************/
/*   Automatic tracking of cepstral peaks             */
/*   Using Noll's algotithm                           */
/*   L C Whitaker    24 September 1985                */
/******************************************************/

void c_rules(omega,pt,amplitude,pfreq,threshold,s_rate)
float   *omega;     /* cepstral data */
int     *pt;        /* position of cepstral peak */
float   *amplitude; /* amplitude of peak */
float   *pfreq;     /* pitch estimate */
float   threshold;      /* threshold for cepstral peaks */
int	s_rate;		/* sampling rate */
{
	float    gradient;  /* gradient of linear weighting */
	float    intersect; /* linear weighting at time zero */
	float    line[MAXVEC]; /* linear weighting */
	float    templevel; /* temporary threshold */
	int      newpeak;
	int      lowlimit,highlimit;
	int      point;
	int      j,ms_5;
	float    temp1,temp2;
	float    b;

	/* initial values */
	gradient =  4.0*1000.0/14.0;
	intersect = 10.0/14.0;
	lowlimit = (int)(2.0*s_rate/1000); /* 2ms in samples */
	highlimit = 15*s_rate/1000; /* 15 ms in samples */
	templevel = threshold;
	point = 0;
		
	for(j=0;j<framehalf;j++){
		line[j] = (gradient*j/(float)s_rate) + intersect;
	}

	/* linear weighting */
	for(j=0;j<framehalf;j++){
		omega[j] = omega[j] * line[j];
	}

	/* detect maximum peak in cepstrum */
	pt[THIS] = findpeak(omega,lowlimit,highlimit);
	j = pt[THIS];
	amplitude[THIS] = omega[j];
	if (track == 1) {
		b = (float)(pt[THIS] - pt[LAST]);
		if (b < 0.0) b = -b;
		if (b/(float)s_rate <= 1.0/1000.0) templevel = threshold/2.0;
	}

	/* check if amplitude of cepstral peak is above threshold */
	if (amplitude[THIS] >= templevel)
		point = 3;
	else 
		point = 0;

	/* check for pitch doubling */
	if ((track == 1) && (pt[THIS] > 0) && (pt[LAST] > 0)) {
		temp1 = (float)s_rate/(float)pt[THIS];
		temp2 = (float)s_rate/(float)pt[LAST];
		if (temp2 >= 1.6*temp1) {
			ms_5 = 5*s_rate/10000;
			newpeak = findpeak(omega,(pt[THIS]/2) -ms_5,(pt[THIS]/2) +ms_5);
			b = (float)(newpeak - pt[LAST]);
			if (b < 0.0) b = -b;
			if (b/(float)s_rate <= 1.0/(1000.0)) templevel = threshold/2.0;
			else templevel = threshold;
			if (omega[newpeak] >= templevel){
				pt[THIS] = newpeak;
				amplitude[THIS] = omega[newpeak];
			}
		}
	}

	/* does previous peak exceed threshold */
	if (amplitude[LAST] >= templevel) {
		if (point == 3)
			point += 2;
		else
			point += 1;
	}
	if ((point ==3) || (point ==1)) {
		/* is peak at PREVLAST cepstrum a pitch peak */
		if (pfreq[PREVLAST] > 20.0) point += 1;
	}

	/* use switch to determine pitch of LAST frame */
	switch(point){
		case 0 :
			pfreq[LAST] = 0.0;
			pt[LAST] = 0;
			/* unvoiced frame */
			break;
		case 1 :
			pfreq[LAST] = 0.0;
			pt[LAST] = 0;
			/* isolated pitch peak - unvoiced frame */
			break;
		case 2 :
			track = 0;
			pfreq[LAST] = (float)s_rate/pt[LAST];
			/* voiced segment - stop pitch tracking */
			break;
		case 3 :
			pfreq[LAST] = 0.0;
			pt[LAST] = 0;
			/* unvoiced frame */
			break;
		case 4 :
			pt[LAST] = (pt[PREVLAST]+pt[THIS])/2;
			pfreq[LAST] = (float)s_rate/pt[LAST];
			/* isolated absence of pitch peak */
			break;
		case 5 :
			pfreq[LAST] = (float)s_rate/pt[LAST];
			track = 1;
			/* voiced frame - start or continue tracking */
			break;
		default :
			error("AARGH! error in cepstrules()");
			break;
	}
}

/* main program */
void main(argc,argv)
int argc;
char *argv[];
{
	/* option decoding */
	extern int 	optind;
	extern char	*optarg;
	int		c;
	int		errflg=0;

	/* item selection */
	int		it;			/* item specifiers */
	char		*ty;
	char		*sptype="0";		/* default input subtype = last */
	int		fid,ofid;		/* I/O descriptors */

	/* processing */
	int		i,j;
	int		overlap;		/* frame parameters */
	int		stepsize;
	double		twopi;			/* constant */
	int		samprate;		/* sampling rate */
	float           threshold=THRESHOLD;
	short		fxval;

	/* decode switches */
	while ( (c = getopt(argc,argv,"It:i:")) != EOF )
		switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Fx by cepstrum process V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 't' :	/* use preset threshold value (30) */
			sscanf(optarg,"%f",&threshold);
			break;
		case 'i' :	/* specific input item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-t threshold) (-i item) file\n",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0)
		error("access error on '%s'",filename);

	/* find input item */
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("could not find input item in '%s'",filename);

	/* get sampling rate */
	if ((samprate = (int)(1.0/spitem.frameduration)) > MAX_SAMP_RATE)
		error("sampling rate too big (%g) in %s",samprate,filename);

	/* find framesize (power of 2) about 40ms */
	framesize = (int)(FRAME / spitem.frameduration);
	for (i=1;i<framesize;i*=2) /* loop */;
	if (i > MAXVEC) framesize = MAXVEC; else framesize = i;
	framehalf = framesize/2;

	/* make stepsize about 10ms */
	stepsize = framesize/4;
	overlap = framesize - stepsize;

	/* find hamming window coefficients */
	twopi = 8.0*atan(1.0);
	for(i=0;i<framesize;i++)
		window[i] = 0.54 -(0.46 * cos(((double)i*twopi)/(framesize-1)));
	for (;i< MAXVEC;i++) window[i] = 0.0;

	/* create output Fx header */
	sfsheader(&fxitem,FX_TYPE,0,2,1,stepsize*spitem.frameduration,
			spitem.offset+stepsize*spitem.frameduration,4,3,0);
	sprintf(fxitem.history,"%s(%d.%02d;threshold=%5.2f)",PROGNAME,SP_TYPE,
		spitem.subtype,threshold);
	if ((ofid = sfschannel(filename,&fxitem)) < 0)
		error("unable to open output channel to '%s'",filename);

	/* now process speech in windows */
	for (i=0;sfsread(fid,i,framesize,wave)==framesize;i+=stepsize) {

		/* copy into float buffer and window */
		for(j=0;j<framesize;j++) {
			alpha[j] = ((float) wave[j]) * window[j];
		}

		/* calculate cepstrum */
		cepstrum(alpha,omega);
	
		/* apply Noll's rules to cepstrum */
		ptime[THIS]=0;
		pamp[THIS]=0.0;
		pfreq[THIS]=0.0;
		c_rules(omega,ptime,pamp,pfreq,threshold,samprate);

		/* output decision on last frame */
		fxval = (short)(pfreq[LAST]);
		sfswrite(ofid,1,&fxval);

		/* shift memory */
		for (j=0;j<MEMORY-1;j++) {
			ptime[j] = ptime[j+1];
			pamp[j] = pamp[j+1];
			pfreq[j] = pfreq[j+1];
		}

		/* report progress */
		if (ttytest()) {
			if (fxval)
				printf("Frame %d/%d %3dHz\r",1+i/stepsize,
					1+(spitem.numframes-framesize+stepsize)/stepsize,fxval);
			else
				printf("Frame %d/%d Unvoi\r",1+i/stepsize,
					1+(spitem.numframes-framesize+stepsize)/stepsize);
			fflush(stdout);
		}
	}
	if (ttytest())
		printf("                                \r");

	/* write last decision */
	fxval = (short)pfreq[LAST];
	sfswrite(ofid,1,&fxval);

	/* update file */ 
	if (!sfsupdate(filename))
		error("update error on '%s'",filename);

	/* that's all folks */
	exit(0);
}
