/* fxac -- estimate FX by autocorrelation method */

/* m.a. huckvale - february 1988 */

/* version 1.1 - february 1996
	- eliminate filtering
	- cube waveform prior to autocorrelation
*/

#define PROGNAME "fxac"
#define PROGVERS "1.1"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH FXAC SFS1 UCL
.SH NAME
fxac - estimate fx from sp using autocorrelation
.SH SYNOPSIS
.B fxac
(-i item) file
.SH DESCRIPTION
.I fxac
estimates FX from a speech waveform by the following process: (i) cubing waveform
sample values, (ii) autocorrelation, and (iii) voicing & fundamental frequency
decision. The output FX is estimated 
in 25ms windows with a repetition time of 5ms.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and exit.
.TP 11
.BI -i item
Select input item number.
.SH INPUT ITEMS
.IP 1.xx 11
Speech waveform.
.SH OUTPUT ITEMS
.IP 4 11
FX estimate by autocorrelation.
.SH VERSION/AUTHOR
1.1 - Mark Huckvale.
.SH SEE ALSO
fxcep(SFS1)
*/
/*--------------------------------------------------------------------------*/

/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"			/* header structures */

/* FX estimation control */
#define	STEP	0.005	/* 5ms step */
#define WINDOW	0.025	/* 25ms window */
#define RANGE	4.0	/* required dynamic range of autocorrelation */
#define ARATIO1	0.35	/* size of autocorrelation peak for voicing start */
#define ARATIO2 0.25	/* size of autocorrelation peak for voicing continue */

/* global data */
struct item_header 	spitem;		/* item header data */
short			*sp;		/* speech data buffer (1 x 25ms window) */
float			*fsp;		/* float speech buffer */
struct item_header 	fxitem;		/* item header data */
int			sampfreq;	/* current sampling frequency */
float			*acoeff;	/* autocorrelation function */

/* autocorrelation */
void autoc(sp,len,acoeff,l1,l2)
float	*sp;
int	len;
float	*acoeff;
int	l1,l2;
{
	register int	i,j;
	int		num;
	float		sum;
	float		*s1,*s2;

	/* zero autocorrelation vector */
	for (i=0;i<len;i++) acoeff[i]=0;

	/* for zero delay */
	sum=0;
	num=len;
	s1 = sp;
	for (j=0;j<num;j++,s1++) sum += *s1 * *s1;
	acoeff[0] = sum/len;

	/* for each delay in expected freq. range */
	for (i=l1;i<l2;i++) {	/* from 2.5ms to 12.5ms */
		sum=0;
		num=len-i;
		s1 = sp;
		s2 = sp + i;
		for (j=0;j<num;j++) sum += *s1++ * *s2++;
		acoeff[i] = sum/num;
	}

}

/* find peak */
int findpeak(acoeff,len,l1,l2)
float	*acoeff;
int	len;
int	l1,l2;
{
	register int	i,pos;
	float		max,min;

	/* find largest peak > max freq allowed */
	pos = 0;
	min = max = acoeff[l1];
	for (i=l1+1;i<l2;i++) {
		if (acoeff[i] > max) {
			max = acoeff[i];
			pos=i;
		}
		if (acoeff[i] < min)
			min = acoeff[i];
	}
	if ((max < acoeff[l2]) || (max < RANGE*min))
		return(0);
	else
		return(pos);
}

/* estimate fx by autocorrelation */
int fxproc(sp,len,lastfx)
float	*sp;			/* input signal buffer */
int	len;			/* buffer length */
short	lastfx;			/* previous fx value */
{
	short	fxval;		/* estimated fx */
	int	lofx,hifx;	/* autocorrelation limits */
	int	peak;		/* location of peak */

	/* get autocorrelation coefficients */
	hifx = sampfreq/4;	/* 2.5 ms = 400Hz */
	lofx = 1.25*sampfreq;	/* 12.5 ms = 80Hz */
	autoc(sp,len,acoeff,hifx,lofx);

	/* find autocorrelation peak */
	peak = findpeak(acoeff,len,hifx,lofx);

	/* use autocorrelation values as voicing determiner */
	if (acoeff[0] < 1000000)
		return(0);		/* no speech */
	else if (peak==0)
		return(0);		/* no peak */
	else {
		fxval = 100*sampfreq/peak;
		if ((lastfx==0) || (abs(fxval-lastfx) > 50)) {
			/* voicing start or rapid change */
			if ((acoeff[peak]/acoeff[0]) < ARATIO1) 
				return(0);	/* peak not high enough */
		}
		/* voicing continuation */
		else if ((acoeff[peak]/acoeff[0]) < ARATIO2)
			return(0);	/* peak not high enough */
	}

	/* return fx */
	return(fxval);
}

/* main program */
void main(argc,argv)
int argc;
char *argv[];
{
	/* local variables */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument */
	int		errflg=0;	/* option error flag */
	int		c;		/* option char */
	int		it;
	char		*ty;
	char		*sptype = "0";	/* input sp sub-type = last */
	char		filename[SFSMAXFILENAME];
					/* database file name */
	int		fid,ofid;	/* file descriptors */

	/* processing variables */
	int		stsize;		/* # samples in step */
	int		wisize;		/* # samples in window */
	int		i,f=0;		/* counters */
	int		nframes;	/* # output frames */
	short		fxval;		/* estimated fx value */
	short		lastfx=0;	/* previous fx value */
	int		j;
	
	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Fx by autocorrelation V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* item spec */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype=ty;
				else
					error("unsuitable item specification %s",optarg);
			}
			else
				error("illegal item specification %s",optarg);
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) file\n",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("'%s': file not found",filename);
		else
			error("access error on '%s'",filename);
	}

	/* locate input speech item */
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("cannot find input SP item in '%s'",filename);

	/* get window parameters */
	sampfreq = 0.5 + (0.01/spitem.frameduration);
	stsize = 0.5 + (STEP/spitem.frameduration);
	wisize = 0.5 + (WINDOW/spitem.frameduration);

	/* estimate # output frames for user info */
	nframes = (spitem.numframes/stsize)-(wisize/stsize)+1;

	/* get input buffer */
	if ((sp=(short *)sfsbuffer(&spitem,wisize)) == NULL)
		error("cannot get buffer for speech",NULL);
	if ((fsp=(float *)calloc(wisize,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);
	if ((acoeff=(float *)calloc(wisize,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);
		
	/* generate output item header */
	sfsheader(&fxitem,FX_TYPE,0,2,1,stsize*spitem.frameduration,spitem.offset+WINDOW/2,1,0,0);
	sprintf(fxitem.history,"%s(%d.%02d)",
		PROGNAME,spitem.datatype,spitem.subtype);

	/* open output channel */
	if ((ofid=sfschannel(filename,&fxitem)) < 0)
		error("cannot open temporary file",NULL);

	/* perform processing */
	for (i=0;sfsread(fid,i,wisize,sp)==wisize;i+=stsize,f++) {

		/* copy cubed waveform into float buffer */
		for (j=0;j<wisize;j++)
			fsp[j] = (float)(sp[j])*(float)(sp[j])*(float)(sp[j]);

		/* process window */
		fxval = fxproc(fsp,wisize,lastfx);

		/* save estimate */
		sfswrite(ofid,1,&fxval);

		/* report progress */
		if (((f%10)==9) && ttytest()) {
			printf("\rFrame %d/%d",f+1,nframes);
			fflush(stdout);
		}

		lastfx = fxval;
	}
	if (ttytest()) printf("\r                       \r");

	/* and update file */
	if (!sfsupdate(filename))
		error("backup error on '%s'",filename);

	/* ... that's all folks */
	exit(0);
}
