/* fmtrack -- track formant peaks and convert fm to sy */

/* m.a.huckvale - may 1986 */

/* version 1.0 */
/* version 1.1  - removed dropouts
		- increased formant distributions
   version 1.2  - new history access
*/
/* version 1.3 - december 1986
	- amplitude normalisation
	- "no display" flag
*/
/* version 2.0s - March 1988
	- SFS implementation, no display
*/

/*--------------------------------------------------------------------------*/
/**MAN
.TH FMTRACK 1 UCL
.SH NAME
fmtrack - track formant peaks and generate synthesizer control data
.SH SYNOPSIS
.B fmtrack
(-i item) (-v) file
.SH DESCRIPTION
.I fmtrack
converts a set of spectral peaks to synthesizer control data in which 
the frequency, amplitude and bandwidth of the first four formants are 
specified every 10ms.  Formant tracking proceeds in three passes: 
Pass 1 does primary reassignment of peaks, 
Pass 2 picks up basic tracks, Pass 3 "creeps" along existing tracks adding
to ends.  If a Fx contour is available in the file, it is used to create
the Fx parameter in the SY item, otherwise a monotone of 100Hz is inserted.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and exit.
.TP 11
.BI -i item
Select input item.
.TP 11
.B -v
Take voicing decision from FX data.  Default: use "flag" value in FM data.
.SH INPUT ITEMS
.IP 12.xx 11
Any raw formant estimates item.
.IP 4.xx 11
Any Fundamental frequency contour.
.SH OUTPUT ITEMS
.IP 7 11
Formant tracked synthesizer control data.
.SH VERSION/AUTHOR
2.0 - Mark Huckvale
.SH BUGS
Formant frequency distributions are assumed to be adult male.
*/
/*--------------------------------------------------------------------------*/

/* program name and version */
#define	PROGNAME "fmtrack"
#define PROGVERS "2.0s"
char	*progname=PROGNAME;

/* manifest constants */
#define	F1MEAN	700.0		/* F1 mean frequency */
#define F1DEV	300.0		/* F1 standard deviation */
#define	F2MEAN	1800.0		/* etc */
#define	F2DEV	600.0
#define	F3MEAN	3000.0
#define	F3DEV	800.0
#define	F4MEAN	4000.0
#define	F4DEV	1000.0
#define SLEW	0.08		/* slew rate as fraction change per frame */
#define ENDSLEW	0.1		/* slew rate as fraction change per frame */
#define AMPMAX	60.0		/* maximum output amplitude */
#define LOWFX	40		/* freq at wihich fx = unvoiced */

/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>		/* math library */
#include "sfs.h"		/* database filing system structures */

/* global data */
struct item_header 	fmitem;		/* fm item header */
struct item_header 	fxitem;		/* fx item header */
short			*fx;		/* fx buffer */
struct item_header 	syitem;		/* output sy item header */

/* global data tables */
struct	ffrec	{
	short	pos[4];
} *ftab;
struct fm_rec	*table;

/* find largest value in array */
int best(tab,len)
double	tab[];
int	len;
{
	int	i,j;
	double	val;

	j = 0;
	val = tab[j];
	for (i=1;i<len;i++) {
		if (tab[i] > val) {
			val = tab[i];
			j = i;
		}
	}
	return(j);
}

/******************** Pass 1 - fix outrageous assignments *******************/
void proc1()
{
	int		i,j,b;
	struct fm_rec	*fm;
	struct fm_rec_array zero;
	double		dist[20],fit();

	/* initialise "zero" formant */
	zero.freq = (float)0.0;
	zero.amp  = (float)0.0;
	zero.band = (float)0.0;

	/* scan each input frame */
	for (i=0;i<fmitem.numframes;i++) {
		fm = &table[i];
		switch (fm->npeaks) {
		case 0:		/* no peaks */
			break;
		case 1:		/* 1 peak */
			dist[0]	= fit(fm->formant[0].freq,-1.0,-1.0,-1.0);
			dist[1]	= fit(-1.0,fm->formant[0].freq,-1.0,-1.0);
			dist[2]	= fit(-1.0,-1.0,fm->formant[0].freq,-1.0);
			dist[3]	= fit(-1.0,-1.0,-1.0,fm->formant[0].freq);
			b = best(dist,4);
			switch (b) {
			case 1:	fm->formant[1] = fm->formant[0];
				fm->formant[0] = zero;
				fm->npeaks=2;
				break;
			case 2:	fm->formant[2] = fm->formant[0];
				fm->formant[1] = zero;
				fm->formant[0] = zero;
				fm->npeaks=3;
				break;
			case 3:	fm->formant[3] = fm->formant[0];
				fm->formant[2] = zero;
				fm->formant[1] = zero;
				fm->formant[0] = zero;
				fm->npeaks=4;
				break;
			}
			break;
		case 2:		/* 2 peaks */
			dist[0] = fit (fm->formant[0].freq,fm->formant[1].freq,-1.0,-1.0);
			dist[1] = fit (fm->formant[0].freq,-1.0,fm->formant[1].freq,-1.0);
			dist[2] = fit (fm->formant[0].freq,-1.0,-1.0,fm->formant[1].freq);
			dist[3] = fit (-1.0,fm->formant[0].freq,fm->formant[1].freq,-1.0);
			dist[4] = fit (-1.0,fm->formant[0].freq,-1.0,fm->formant[1].freq);
			dist[5] = fit (-1.0,-1.0,fm->formant[0].freq,fm->formant[1].freq);
			b = best(dist,6);
			switch (b) {
			case 1:	fm->formant[2] = fm->formant[1];
				fm->formant[1] = zero;
				fm->npeaks=3;
				break;
			case 2:	fm->formant[3] = fm->formant[1];
				fm->formant[1] = zero;
				fm->npeaks=4;
				break;
			case 3:	fm->formant[2] = fm->formant[1];
				fm->formant[1] = fm->formant[0];
				fm->formant[0] = zero;
				fm->npeaks=3;
				break;
			case 4:	fm->formant[3] = fm->formant[1];
				fm->formant[1] = fm->formant[0];
				fm->formant[0] = zero;
				fm->npeaks=4;
				break;
			case 5:	fm->formant[3] = fm->formant[1];
				fm->formant[2] = fm->formant[0];
				fm->formant[1] = zero;
				fm->formant[0] = zero;
				fm->npeaks=4;
				break;
			}
			break;
		case 3:		/* 3 peaks */
			dist[0] = fit(fm->formant[0].freq,fm->formant[1].freq,fm->formant[2].freq,-1.0);
			dist[1] = fit(fm->formant[0].freq,fm->formant[1].freq,-1.0,fm->formant[2].freq);
			dist[2] = fit(fm->formant[0].freq,-1.0,fm->formant[1].freq,fm->formant[2].freq);
			dist[3] = fit(-1.0,fm->formant[0].freq,fm->formant[1].freq,fm->formant[2].freq);
			b = best (dist,4);
			switch (b) {
			case 1:	fm->formant[3] = fm->formant[2];
				fm->formant[2] = zero;
				fm->npeaks=4;
				break;
			case 2:	fm->formant[3] = fm->formant[2];
				fm->formant[2] = fm->formant[1];
				fm->formant[1] = zero;
				fm->npeaks=4;
				break;
			case 3:	fm->formant[3] = fm->formant[2];
				fm->formant[2] = fm->formant[1];
				fm->formant[1] = fm->formant[0];
				fm->formant[0] = zero;
				fm->npeaks=4;
				break;
			}
			break;
		case 4:		/* 4 peaks */
		
			dist[0] = fit (fm->formant[0].freq,fm->formant[1].freq,
				       fm->formant[2].freq,fm->formant[3].freq);
			dist[1] = fit (fm->formant[0].freq,fm->formant[1].freq,
				       0.0,fm->formant[2].freq);
			dist[2] = fit (fm->formant[0].freq,0.0,
				       fm->formant[1].freq,fm->formant[2].freq);
			dist[3] = fit (0.0,fm->formant[0].freq,
				       fm->formant[1].freq,fm->formant[2].freq);

			b = best (dist,4);
			switch (b) {
			case 1:	fm->formant[3] = fm->formant[2];
				fm->formant[2] = zero;
				break;
			case 2:	fm->formant[3] = fm->formant[2];
				fm->formant[2] = fm->formant[1];
				fm->formant[1] = zero;
				break;
			case 3:	fm->formant[3] = fm->formant[2];
				fm->formant[2] = fm->formant[1];
				fm->formant[1] = fm->formant[0];
				fm->formant[0] = zero;
				break;
			}
			break;
		default:		/* 5 or more peaks */
		
			dist[0] = fit (fm->formant[0].freq,fm->formant[1].freq,
				       fm->formant[2].freq,fm->formant[3].freq);
			dist[1] = fit (fm->formant[0].freq,fm->formant[1].freq,
				       fm->formant[2].freq,fm->formant[4].freq);
			dist[2] = fit (fm->formant[0].freq,fm->formant[1].freq,
				       fm->formant[3].freq,fm->formant[4].freq);
			dist[3] = fit (fm->formant[0].freq,fm->formant[2].freq,
				       fm->formant[3].freq,fm->formant[4].freq);
			dist[4] = fit (fm->formant[1].freq,fm->formant[2].freq,
				       fm->formant[3].freq,fm->formant[4].freq);

			b = best (dist,5);
			switch (b) {
			case 1:	fm->formant[3] = fm->formant[4];
				break;
			case 2:	fm->formant[2] = fm->formant[3];
				fm->formant[3] = fm->formant[4];
				break;
			case 3:	fm->formant[1] = fm->formant[2];
				fm->formant[2] = fm->formant[3];
				fm->formant[3] = fm->formant[4];
				break;
			case 4:	fm->formant[0] = fm->formant[1];
				fm->formant[1] = fm->formant[2];
				fm->formant[2] = fm->formant[3];
				fm->formant[3] = fm->formant[4];
				break;
			}
			break;
		}
		/* whatever found, set up four peaks in record */
		for (j=fm->npeaks;j<4;j++) fm->formant[j] = zero;
		fm->npeaks = 4;
	}
}

/* fit frequencies */
double fit(f1,f2,f3,f4)
float	f1,f2,f3,f4;
{
	double	val,exp();

	if (f1 == 0.0) f1 = F1MEAN-F1DEV*2;
	if (f2 == 0.0) f2 = F2MEAN-F2DEV*2;
	if (f3 == 0.0) f3 = F3MEAN-F3DEV*2;
	if (f4 == 0.0) f4 = F4MEAN-F4DEV*2;
	if (f1 == -1.0) f1 = F1MEAN;
	if (f2 == -1.0) f2 = F2MEAN;
	if (f3 == -1.0) f3 = F3MEAN;
	if (f4 == -1.0) f4 = F4MEAN;

	val  = exp(-(F1MEAN - f1)*(F1MEAN - f1)/(F1DEV * F1DEV))/(F1DEV * F1DEV);
	val *= exp(-(F2MEAN - f2)*(F2MEAN - f2)/(F2DEV * F2DEV))/(F2DEV * F2DEV);
	val *= exp(-(F3MEAN - f3)*(F3MEAN - f3)/(F3DEV * F3DEV))/(F3DEV * F3DEV);
	val *= exp(-(F4MEAN - f4)*(F4MEAN - f4)/(F4DEV * F4DEV))/(F4DEV * F4DEV);

	return(val);
}

/******************** Pass 2 - Basic Continuity ********************/
void proc2()
{
	int	i,j;
	double	last,curr,next,fabs();

	/* clear ftable */
	for (i=0;i<fmitem.numframes;i++)
		for (j=0;j<4;j++)
			ftab[i].pos[j] = -1;

	/* triplet stability assignment */
	for (i=1;i<fmitem.numframes-1;i++) {
		for (j=0;j<4;j++) {
			/* basic assignment */
			last = table[i-1].formant[j].freq;
			curr = table[i  ].formant[j].freq;
			next = table[i+1].formant[j].freq;
			/* allow one hole in triplet */
			if ((next == 0.0) && (i<fmitem.numframes-2))
				next = table[i+2].formant[j].freq;
			else if ((last == 0.0) && (i>1))
				last = table[i-2].formant[j].freq;
			/* found triplet */
			if ((last == 0.0) || (curr == 0.0) || (next == 0.0))
				/* nothing */ ;
			else if (((curr >= last) && (curr <= next)) ||
			    ((curr <= last) && (curr >= next)))
				ftab[i].pos[j]=j;
			else if ((fabs((curr-last)/curr) < SLEW) &&
				 (fabs((next-curr)/curr) < SLEW))
				ftab[i].pos[j]=j;
		}
	}
}

/******************** Pass 3 - Forward/Backward Creep  ********************/
void proc3()
{
	int	i,j;
	double	last,curr,fabs();

	/* backward pass */
	for (i=fmitem.numframes-3;i>=0;i--) {
		for (j=0;j<4;j++) if ((ftab[i+2].pos[j] >= 0) &&
				      (ftab[i+1].pos[j] >= 0) &&
				      (ftab[i  ].pos[j] == -1)) {
			last  = table[i+1].formant[j].freq;
			curr  = table[i  ].formant[j].freq;
			if (curr == 0.0)
				/* nothing */ ;
			else if (fabs((curr-last)/curr) < ENDSLEW)
				ftab[i].pos[j]=j;
		}
	}

	/* forward pass */
	for (i=2;i<fmitem.numframes;i++) {
		for (j=0;j<4;j++) if ((ftab[i-2].pos[j] >= 0) &&
				      (ftab[i-1].pos[j] >= 0) &&
				      (ftab[i  ].pos[j] == -1)) {
			last  = table[i-1].formant[j].freq;
			curr  = table[i  ].formant[j].freq;
			if (curr == 0.0)
				/* nothing */ ;
			else if (fabs((curr-last)/curr) < ENDSLEW)
				ftab[i].pos[j]=j;
		}
	}
}

/* get fx value from fx buffer */
int getfxval(t)
double	t;	/* time in seconds */
{
	int	ptr;

	t += fmitem.offset-fxitem.offset;
	ptr = (int)((t/fxitem.frameduration) + 0.5);
	if ((ptr < 0) || (ptr > fxitem.numframes))
		return(0);
	else
		return(fx[ptr]);
}


/********************** Create SY item from tables *****************/
void createsy(filename,fxok,fxvoice)
char		*filename;
int		fxok,fxvoice;
{
	int		i,j,k,f;
	int		k1,k2;
	int		ofid;
	double		m1,m2;
	float		flast[4];
	struct fm_rec_array	f1,f2;
	int		sampno;
	short		frame[19];
	double		maxamp=0.0,ampmod=0.0,amp;

	/* find maximum amplitude and amplitude modification */
	for (i=0;i<fmitem.numframes;i++) {
		for (j=0;j<table[i].npeaks;j++) {
			if (table[i].formant[j].amp > maxamp)
				maxamp = table[i].formant[j].amp;
		}
	}
	ampmod = AMPMAX - maxamp;

	/* create output item header */
	sfsheader(&syitem,SY_TYPE,0,2,19,0.01,fmitem.offset,1,0,0);
	if (fxok && fxvoice) 
		sprintf(syitem.history,"%s(%d.%02d,%d.%02d;fxvoice)",PROGNAME,fmitem.datatype,fmitem.subtype,FX_TYPE,fxok);
	else if (fxok) 
		sprintf(syitem.history,"%s(%d.%02d,%d.%02d)",PROGNAME,fmitem.datatype,fmitem.subtype,FX_TYPE,fxok);
	else
		sprintf(syitem.history,"%s(%d.%02d)",PROGNAME,fmitem.datatype,fmitem.subtype);
	sprintf(syitem.params,"maxamp=%g",AMPMAX);

	/* open output channel */
	if ((ofid=sfschannel(filename,&syitem)) < 0)
		error("unable to open channel to '%s'",filename);

	/* find first good formant values */
	for (i=0;i<4;i++) flast[i]=(float)0.0;
	i=0;
	while (flast[0]*flast[1]*flast[2]*flast[3] == 0.0) {
		for (j=0;j<4;j++) {
			k = ftab[i].pos[j];
			if ((k >= 0) && (flast[j] == 0.0)) flast[j]=table[i].formant[k].freq;
		}
		i++;
	}

	/* produce SY item */
	for (i=0;;i++) {
		/* find adjacent formant frames */
		sampno = (int)(i * syitem.frameduration / fmitem.frameduration);
		f = 0;
		while ((table[f+1].posn < sampno) && ((f+1) < fmitem.numframes)) f++;
		if (f+1 >= fmitem.numframes) break;

		/* calculate addmixture coeffs */
		m1 = ((float)(table[f+1].posn - sampno))/(table[f+1].posn - table[f].posn);
		m2 = ((float)(sampno - table[f].posn))/(table[f+1].posn - table[f].posn);

		/* get FX */
		if (fxok)
			frame[0]=getfxval(i * syitem.frameduration);
		else
			frame[0] = 100;	/* default 100 Hz */

		/* set up SY frame parameters */
		frame[1] = 0;
		frame[2] = 0;
		for (j=0;j<4;j++) {
			k1 = ftab[f].pos[j];
			if (k1 >= 0) 
				f1 = table[f].formant[k1]; 
			else {
				f1.freq = flast[j];
				f1.amp  = table[f].formant[j].amp;
				f1.band = table[f].formant[j].band;
			}
			flast[j] = f1.freq;
			k2 = ftab[f+1].pos[j];
			if (k2 >= 0) 
				f2 = table[f+1].formant[k2]; 
			else {
				f2.freq = flast[j];
				f2.amp  = table[f+1].formant[j].amp;
				f2.band = table[f+1].formant[j].band;
			}
			frame[3*(j+1)] = (short)(m1*f1.freq + m2*f2.freq);
			amp = 10 * (m1*f1.amp + m2*f2.amp + ampmod);
			frame[3*(j+1)+1] = (short)((amp < 0.0 ) ? 0.0 : amp);
			frame[3*(j+1)+2] = (short)(m1*f1.band + m2*f2.band);
		}
		frame[15] = 250;
		frame[16] = frame[4];
		frame[17] = 0;
		frame[18] = 248 * (m1 * table[f].flag + m2 * table[f+1].flag);
		/* write to file */
		if (!sfswrite(ofid,1,frame))
			error("write error on output file",NULL);
	}
}

/* correct voicing in FM item */
void vcorrect()
{
	int	i;
	double	t;

	for (i=0;i<fmitem.numframes;i++) {
		t = table[i].posn * fmitem.frameduration;
		if (getfxval(t)==0)
			table[i].flag=0;
		else
			table[i].flag=1;
	}
}

/* smooth provided FX contour -- filling in holes */
void smoothfx()
{
	int	i,j;
	int	startfx=0,length=0,endfx;
	int	ingap=0,first=1;
	float	slope;

	/* get rid of gaps in input FX */
	for (i=0;i<fxitem.numframes;i++) {
		if (!ingap) {
			if (fx[i] > LOWFX)
				startfx=fx[i];
			else {
				ingap++;
				length=1;
			}
		}
		else {
			length++;
			if (fx[i] > LOWFX) {
				endfx=fx[i];
				if (first) {
					startfx=endfx;
					first=0;
				}
				slope = (float)(endfx-startfx)/length;
				for (j=1;j<length;j++)
					fx[i-length+j]=(short)(startfx+j*slope);
				startfx=fx[i];
				ingap=0;
			}
		}
	}
	if (ingap)
		for (j=1;j<length;j++)
			fx[fxitem.numframes-length+j]=startfx;

}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	char		*fmtype = "0";	/* default FM sub-type = last */
	char		*fxtype = "0";	/* default FX sub-type = last */
	int		fxok=0;		/* fx loaded ok flag */
	int		fxvoice=0;	/* use FX voicing */
	int		c;		/* option switch */
	int		it;		/* item/sub-type specifiers */
	char		*ty;
	/* file variables */
	char		filename[SFSMAXFILENAME];	/* dbase file name */
	int		fid;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:nv")) != EOF )
		switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Formant tracking V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == FM_TYPE)
					fmtype = ty;
				else if (it == FX_TYPE)
					fxtype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'v' :
			fxvoice++;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	/* check for option decoding error */
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) dbase_file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open data file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("cannot find file '%s'",filename);
		else
			error("access error on '%s'",filename);
	}

	/* read in FM item */
	if (!sfsitem(fid,FM_TYPE,fmtype,&fmitem))
		error("cannot find input FM item in '%s'",filename);
	if ((table = (struct fm_rec *)sfsbuffer(&fmitem,fmitem.numframes))==NULL)
		error("cannot get buffer for FM item",NULL);
	if (sfsread(fid,0,fmitem.numframes,table) != fmitem.numframes)
		error("read error on '%s'",filename);

	/* load FX item if present */
	if (sfsitem(fid,FX_TYPE,fxtype,&fxitem)) {
		if ((fx=(short *)sfsbuffer(&fxitem,fxitem.numframes))==NULL)
			error("cannot get buffer for FX item",NULL);
		if (sfsread(fid,0,fxitem.numframes,fx) != fxitem.numframes)
			error("read error on '%s'",filename);
		if (fxvoice) vcorrect();
		smoothfx();
		fxok++;
	}

	/* Pass 1 - fix outrageous assignments */
	proc1();

	/* Pass 2 - basic continuity */
	ftab = (struct ffrec *) calloc(fmitem.numframes,sizeof(struct ffrec));
	if (ftab == NULL)
		error("unable to create internal buffer for frequencies",NULL);
	proc2();

	/* Pass3 - forward and backward creep */
	proc3();

	/* create SY item */
	createsy(filename,fxok,fxvoice);

	/* update file */
	if (!sfsupdate(filename))
		error("update error on %s",filename);

	/* and exit */
	exit(0);
}

