/* fxac -- estimate FX by autocorrelation method */

/* m.a. huckvale - University College London */

/* version 1.0 - february 1988 */
/* version 1.1 - february 1996
	- eliminate filtering
	- cube waveform prior to autocorrelation
*/
/* version 2.0 - June 1998
	- add option to process Lx instead of Sp
*/
/* version 2.1 - February 2001
	- add option to save display item
*/
/* version 2.2 - November 2011
	- add option to set freq range
*/

#define PROGNAME "fxac"
#define PROGVERS "2.2"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH FXAC SFS1 UCL
.SH NAME
fxac - estimate fx from sp using autocorrelation
.SH SYNOPSIS
.B fxac
(-i item) (-d) (-l lofreq) (-h hifreq) (-w wintime) (-s steptime) file
.SH DESCRIPTION
.I fxac
estimates FX from a speech or Laryngograph waveform by the following process: (i) cubing waveform
sample values, (ii) autocorrelation, and (iii) voicing & fundamental frequency
decision. The output FX is estimated
in 25ms windows with a repetition time of 5ms.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and exit.
.TP 11
.BI -i item
Select input item number and type.
.TP 11
.B -d
Output autocorrelation function as DISPLAY item.
.TP 11
.BI -l lofreq
Set lowest Fx value used in search. Default 60Hz.
.TP 11
.BI -h lofreq
Set highest Fx value used in search. Default 500Hz;
.TP 11
.BI -w wintime
Specify analysis windowsize in seconds. Default 0.03s;
.TP 11
.BI -s steptime
Specify analysis window stepsize in seconds. Default 0.005s;
.SH INPUT ITEMS
.IP 1.xx 11
Speech waveform.
.IP 2.xx 11
Laryngograph waveform.
.SH OUTPUT ITEMS
.IP 4 11
FX estimate by autocorrelation.
.IP 9 11
Autocorrelation display.
.SH VERSION/AUTHOR
2.2 - Mark Huckvale.
.SH SEE ALSO
fxcep(SFS1)
*/
/*--------------------------------------------------------------------------*/

/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"			/* header structures */

/* FX estimation control */
#define	STEP	0.005	/* 5ms step */
#define WINDOW	0.030	/* 30ms window = two cycles at 15ms */
#define RANGE	4.0	/* required dynamic range of autocorrelation */
#define ARATIO1	0.35	/* size of autocorrelation peak for voicing start */
#define ARATIO2 0.25	/* size of autocorrelation peak for voicing continue */
#define CRATIO1	0.70	/* size of crosscorrelation peak for voicing start */
#define CRATIO2 0.50	/* size of crosscorrelation peak for voicing continue */

/* global data */
struct item_header 	spitem;		/* item header data */
short			*sp;		/* speech data buffer (1 x 25ms window) */
float			*fsp;		/* float speech buffer */
struct item_header 	fxitem;		/* item header data */
double		sampfreq;	/* current sampling frequency */
float			*acoeff;	/* autocorrelation function */
struct item_header	diitem;
struct di_rec		*di;
int			dodisp=0;
int			docross=0;
double		lofreq=60;
double		hifreq=500;
double		windowsize=WINDOW;
double		stepsize=STEP;

// -----------------------------------------------------------------------------------------
// Calculates the co-ordinates xe, ye of the extremum and returns
// the 2nd derivative d2 if the extremum is deemed reliable or 0.0 otherwise
// Negative return value indicates a valid maximum, positive return value a valid minimum.
// Zero return value indicates an unreliable extremum (this does not mean that d2 is zero).
// When d2 is exact zero, the function returns exact 0.0 in both xe and ye.
// http://www.ebyte.it/library/codesnippets/P3Interpolation.html
//
double P3Interpolate_Extremum(double xl,double xc,double xu,double yl,double yc,double yu,double *xe,double *ye)
{
	double	d1,d2;
	d2 = 2*((yu-yc)/(xu-xc)-(yl-yc)/(xl-xc))/(xu-xl);
	d1 = (yu-yc)/(xu-xc) - 0.5*d2*(xu-xc);
	if (d2) {
		*xe = xc - d1/d2;
		*ye = yc + 0.5*d1*(*xe-xc);
	}
	else { // Degenerate d2
		*xe = xc; // This could be NAN
		*ye = yc; // This could be NAN
	    return 0.0;
	}
	if ((*xe<xl)||(*xe>xu))
		return 0.0; // Reliability test
	else
		return d2;
}

/* autocorrelation */
void autoc(float *sp,int len,float *acoeff,int l1,int l2)
{
	register int	i,j;
	int		num;
	float		sum;
	float		*s1,*s2;

	/* zero autocorrelation vector */
	for (i=0;i<len;i++) acoeff[i]=(float)0.0;

	/* for zero delay */
	sum=(float)0.0;
	num=len;
	s1 = sp;
	for (j=0;j<num;j++,s1++) sum += *s1 * *s1;
	acoeff[0] = sum/len;

	/* for each delay in expected freq. range */
	for (i=l1;i<=l2;i++) {
		sum=(float)0.0;
		num=len-i;
		s1 = sp;
		s2 = sp + i;
		for (j=0;j<num;j++) sum += *s1++ * *s2++;
		acoeff[i] = sum/num;
	}

}

/* normalised cross-correlation */
float crossc(float *sp,int len,float *acoeff,int l1,int l2)
{
	register int	i,j;
	int		num;
	float		sum,sumsq1,sumsq2,norm;
	float		*s1,*s2;
	float		mean=0;

	/* zero autocorrelation vector */
	for (i=0;i<len;i++) acoeff[i]=(float)0.0;

	/* for zero delay */
	sum=(float)0.0;
	num=len;
	s1 = sp;
	for (j=0;j<num;j++,s1++) sum += *s1 * *s1;
	acoeff[0] = sum/len;

	/* for each delay in expected freq. range */
	for (i=l1;i<=l2;i++) {
		sumsq1=sumsq2=sum=(float)0.0;
		num = 3*i;	/* up to three possible cycles */
		if (num > (len-i)) num = len-i;
		s1 = sp;
		s2 = sp + i;
		for (j=0;j<num;j++) {
			sumsq1 += *s1 * *s1;
			sumsq2 += *s2 * *s2;
			sum += *s1++ * *s2++;
		}
		norm = (float)(sqrt(sumsq1)*sqrt(sumsq2)/num);
		acoeff[i] = (sum/num)/norm;
		mean += acoeff[i];
	}
	return(mean/(l2-l1+1));
}

/* find peak */
int findpeak(float *acoeff,int len,int l1,int l2)
{
	register int	i,pos;
	float		max,min;

	/* find largest peak > max freq allowed */
	pos = 0;
	min = max = acoeff[l1+1];
	for (i=l1+1;i<l2;i++) {
		if (acoeff[i] > max) {
			max = acoeff[i];
			pos=i;
		}
		if (acoeff[i] < min)
			min = acoeff[i];
	}
	if ((max < acoeff[l1]) || (max < acoeff[l2]) || (min > max/RANGE))
		return(0);	/* falling, rising or too flat */
	else
		return(pos);
}

/* estimate fx by autocorrelation */
int fxproc(float *sp,int len,short lastfx)
{
	short	fxval;		/* estimated fx */
	int	lofx,hifx;	/* autocorrelation limits */
	int	peak;		/* location of peak */
	double	fpeak,apeak;

	/* get autocorrelation coefficients */
	hifx = (int)(sampfreq/hifreq);		/* 2 ms = 500Hz */
	lofx = (int)(0.5+sampfreq/lofreq);	/* 15 ms = 60Hz */
	if (docross) {
		if (dodisp)
			crossc(sp,len,acoeff,1,lofx);
		else
			crossc(sp,len,acoeff,hifx,lofx);
	}
	else {
		if (dodisp)
			autoc(sp,len,acoeff,1,lofx);
		else
			autoc(sp,len,acoeff,hifx,lofx);
	}

	/* find autocorrelation peak */
	peak = findpeak(acoeff,len,hifx,lofx);

	/* use autocorrelation values as voicing determiner */
	if (acoeff[0] < 1000000)
		return(0);		/* no speech */
	else if (peak==0)
		return(0);		/* no peak */
	else {
		if (P3Interpolate_Extremum((double)(peak-1),(double)peak,(double)(peak+1),acoeff[peak-1],acoeff[peak],acoeff[peak+1],&fpeak,&apeak)!=0)
			fxval = (int)(0.5+sampfreq/fpeak);
		else
			fxval = (int)(0.5+sampfreq/peak);
		if (docross) {
			if ((lastfx==0) || (2*abs(fxval-lastfx)/(fxval+lastfx) > 0.33)) {
				/* voicing start or rapid change */
				if (acoeff[peak] < CRATIO1)
					return(0);	/* peak not high enough */
			}
			/* voicing continuation */
			else if (acoeff[peak] < CRATIO2)
				return(0);	/* peak not high enough */
		}
		else {
			if ((lastfx==0) || (2*abs(fxval-lastfx)/(fxval+lastfx) > 0.33)) {
				/* voicing start or rapid change */
				if ((acoeff[peak]/acoeff[0]) < ARATIO1)
					return(0);	/* peak not high enough */
			}
			/* voicing continuation */
			else if ((acoeff[peak]/acoeff[0]) < ARATIO2)
				return(0);	/* peak not high enough */
		}
	}

	/* return fx */
	return(fxval);
}

/* main program */
void main(int argc,char *argv[])
{
	/* local variables */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument */
	int		errflg=0;	/* option error flag */
	int		c;		/* option char */
	int		it;
	char		*ty;
	int		iptype=SP_TYPE;
	char		*sptype = "0";	/* input sp sub-type = last */
	char		filename[SFSMAXFILENAME];
					/* database file name */
	int		fid,ofid;	/* file descriptors */
	int		dfid;

	/* processing variables */
	int		stsize;		/* # samples in step */
	int		wisize;		/* # samples in window */
	int		i,f=0;		/* counters */
	int		nframes;	/* # output frames */
	short		fxval;		/* estimated fx value */
	short		lastfx=0;	/* previous fx value */
	int		j;
	float		sum,val,amin,amax;
	int		numpixel;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:dcl:h:w:s:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Fx by autocorrelation V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* item spec */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE) {
					iptype=SP_TYPE;
					sptype=ty;
				}
				else if (it == LX_TYPE) {
					iptype=LX_TYPE;
					sptype=ty;
				}
				else
					error("unsuitable item specification %s",optarg);
			}
			else
				error("illegal item specification %s",optarg);
			break;
		case 'd' :	/* create display item */
			dodisp++;
			break;
		case 'c' :	/* use normalised cross-correlation */
			docross++;
			break;
		case 'l' :	/* set low frequency */
			lofreq=atof(optarg);
			break;
		case 'h' :	/* set high frequency */
			hifreq=atof(optarg);
			break;
		case 'w':
			windowsize=atof(optarg);
			if (windowsize > 1) windowsize /= 1000.0;
			break;
		case 's':
			stepsize=atof(optarg);
			if (stepsize > 1) stepsize /= 1000.0;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-c) (-d) (-l lofreq) (-h hifreq) (-w windowsize) (-s stepsize) file\n",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("'%s': file not found",filename);
		else
			error("access error on '%s'",filename);
	}

	/* locate input speech item */
	if (!sfsitem(fid,iptype,sptype,&spitem))
		error("cannot find input item in '%s'",filename);

	/* get window parameters */
	sampfreq = 1.0/spitem.frameduration;
	stsize = (int)(0.5 + (stepsize/spitem.frameduration));
	wisize = (int)(0.5 + (windowsize/spitem.frameduration));
	numpixel = (int)(sampfreq/lofreq);

	/* estimate # output frames for user info */
	nframes = (spitem.numframes/stsize)-(wisize/stsize)+1;

	/* get input buffer */
	if ((sp=(short *)sfsbuffer(&spitem,wisize)) == NULL)
		error("cannot get buffer for speech",NULL);
	if ((fsp=(float *)calloc(wisize,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);
	if ((acoeff=(float *)calloc(wisize,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);

	/* generate output item header */
	sfsheader(&fxitem,FX_TYPE,0,2,1,
		stsize*spitem.frameduration,
		spitem.offset+WINDOW/2,1,0,0);
	sprintf(fxitem.history,"%s(%d.%02d;lo=%g,hi=%g,win=%g,stp=%g%s)",
		PROGNAME,spitem.datatype,spitem.subtype,lofreq,hifreq,windowsize,stepsize,(docross)?";cross":"");

	/* open output channel */
	if ((ofid=sfschannel(filename,&fxitem)) < 0)
		error("cannot open temporary file",NULL);

	/* check for display output */
	if (dodisp) {
		sfsheader(&diitem,DI_TYPE,0,1,numpixel+sfsstruct[DI_TYPE],
			stsize*spitem.frameduration,
			spitem.offset+WINDOW/2,1,0,0);
		sprintf(diitem.params,"minf=0,maxf=%g,sfsformat,title=Delay[s]",1.0/lofreq);
		sprintf(diitem.history,"%s/DI(%d.%02d;lo=%g,hi=%g%s)",
			PROGNAME,spitem.datatype,spitem.subtype,lofreq,hifreq,(docross)?";cross":"");

		di = (struct di_rec *)sfsbuffer(&diitem,1);

		/* open output channel */
		if ((dfid=sfschannel(filename,&diitem)) < 0)
			error("cannot open temporary file",NULL);
	}

	/* perform processing */
	for (i=0;sfsread(fid,i,wisize,sp)==wisize;i+=stsize,f++) {

		/* copy waveform into float buffer */
		for (j=0,sum=0;j<wisize;j++) {
			fsp[j] = (float)(sp[j]);
			sum += fsp[j];
		}

		/* remove mean and cube */
		sum /= wisize;
		for (j=0;j<wisize;j++) {
			val = fsp[j] - sum;
			fsp[j] = val*val*val;
		}

		/* process window */
		fxval = fxproc(fsp,wisize,lastfx);

		/* save estimate */
		sfswrite(ofid,1,&fxval);

		/* save display record */
		if (dodisp) {
			di->posn=f;
			di->size=1;
			if (docross) {
				amin=-1;
				amax=1;
				for (j=1;j<numpixel;j++)
					di->pixel[j] = (char)((15.99 * (acoeff[j]-amin)/(amax-amin)));
				di->pixel[0] = di->pixel[1];
			}
			else {
				amin=amax=acoeff[0];
				for (j=1;j<numpixel;j++) {
					if (acoeff[j]<amin) amin=acoeff[j];
					if (acoeff[j]>amax) amax=acoeff[j];
				}
				for (j=0;j<numpixel;j++)
					di->pixel[j] = (char)((15.99 * (acoeff[j]-amin)/(amax-amin)));
			}
			sfswrite(dfid,1,di);
		}

		/* report progress */
		if (((f%10)==9) && ttytest()) {
			printf("\rFrame %d/%d",f+1,nframes);
			fflush(stdout);
		}

		lastfx = fxval;
	}
	if (ttytest()) printf("\r                       \r");

	/* and update file */
	if (!sfsupdate(filename))
		error("backup error on '%s'",filename);

	/* ... that's all folks */
	exit(0);
}
