/* formanal -- speech waveform to formant estimates by LPC and root-solving */

#define PROGNAME "formanal"
#define PROGVERS "1.0"
char	*progname=PROGNAME;

#undef EBUG

/*-------------------------------------------------------------------------*/
/**MAN
.TH FORMANAL SFS1 UCL
.SH NAME
formanal - formant frequency estimation with built-in tracker
.SH SYNOPSIS
.B formanal
(-i item) (-s) file
.SH DESCRIPTION
.I formanal
is a formant tracker based on LPC polynomial roots and dynamic programming.
.PP
At each frame, the LPC poles are ordered by increasing frequency.  All
   "reasonable" mappings of the poles to F1, F2, ... are performed.
   The cost of "connecting" each of these mappings with each of the mappings
   in the previous frame is computed.  The lowest cost connection is then
   chosen as the optimum one.  At each frame, each mapping has associated
   with it a cost based on the formant bandwidths and frequencies.  This
   "local" cost is finally added to the cost of the best "connection."  At
   the end of the utterance the best
   mappings for the entire utterance may be found by retracing back through
   best candidate mappings..
.PP
This algorithm was originally used in the "formant" program of ESPS.
.SS LICENCE
Some code in this program was originally written by David Talkin and
John Shore as part of the Entropic Signal Processing System and is
used under licence from Microsoft.
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -s
Output Synthesizer control data (SY) rather than Formant estimates (FM).
To generate SY data, a fundamental frequency track must be available in the file.
.SH INPUT ITEMS
.IP 1.xx
Speech waveform.
.SH OUTPUT ITEMS
.IP FM
Formant estimates
.IP SY
Synthesizer control data.  Requires FX item in file.
.SH VERSION/AUTHOR
.IP ESPS
David Talkin and John Shore
.IP SFS
Mark Huckvale
.SH SEE ALSO
fmanal(SFS1) fmtrack(SFS1)
.SH BUGS
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */

#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <malloc.h>
#include "sfs.h"
#include "filter.h"

/* global data */
struct item_header	spitem;		/* speech item header */
short			*sp;		/* speech buffer */
struct item_header	opitem;		/* output speech item header */
struct item_header	pcitem;		/* output LPC item header */
struct pc_rec		*pc;
struct item_header	fmitem;		/* output formant item header */
struct fm_rec		*fm;
struct item_header	fxitem;		/* input FX item */
short				*fx;
struct item_header	syitem;		/* output SY item header */
short				syframe[20];

#define FALSE 0
#define TRUE 1
int	debug=0;
int dosy=0;

#ifndef M_PI
#define M_PI		3.14159265358979323846
#endif

/* analysis parameters */
#define MAXORDER	30	/* maximum permissible LPC order */
#define NFORMANT	5	/* save up to 5 formants/frame */
int		lpc_ord=12;
double 	lpc_stabl=70.0;
double	wdur=0.049;
double	frame_int=0.01;
double	preemp=0.7;
int		w_type=2;
double	nom_f1=-10;

double	lpca[MAXORDER];
double	frp[MAXORDER];
double	bap[MAXORDER];

/*========================================================================*/
/* lpcfloat.c */


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
rwindow(din, dout, n, preemp)
     register short *din;
     register double *dout, preemp;
     register int n;
{
  register short *p;

/* If preemphasis is to be performed,  this assumes that there are n+1 valid
   samples in the input buffer (din). */
  if(preemp != 0.0) {
    for( p=din+1; n-- > 0; )
      *dout++ = (double)(*p++) - (preemp * *din++);
  } else {
    for( ; n-- > 0; )
      *dout++ =  *din++;
  }
}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
void cwindow(din, dout, n, preemp)
     register short *din;
     register double *dout, preemp;
     register int n;
{
  register int i;
  register short *p;
  static int wsize = 0;
  static double *wind=NULL;
  register double *q, co;

  if(wsize != n) {		/* Need to create a new cos**4 window? */
    register double arg;

    if(wind) wind = (double*)realloc(wind,n*sizeof(double));
    else wind = (double*)malloc(n*sizeof(double));
    wsize = n;
    for(i=0, arg=3.1415927*2.0/wsize, q=wind; i < n; ) {
      co = 0.5*(1.0 - cos(((double)i++) * arg));
      *q++ = co * co * co * co;
    }
  }
/* If preemphasis is to be performed,  this assumes that there are n+1 valid
   samples in the input buffer (din). */
  if(preemp != 0.0) {
    for(i=n, p=din+1, q=wind; i-- > 0; )
      *dout++ = *q++ * ((double)(*p++) - (preemp * *din++));
  } else {
    for(i=n, q=wind; i-- > 0; )
      *dout++ = *q++ * *din++;
  }
}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
void hwindow(din, dout, n, preemp)
     register short *din;
     register double *dout, preemp;
     register int n;
{
  register int i;
  register short *p;
  static int wsize = 0;
  static double *wind=NULL;
  register double *q;

  if(wsize != n) {		/* Need to create a new Hamming window? */
    register double arg;

    if(wind) wind = (double*)realloc(wind,n*sizeof(double));
    else wind = (double*)malloc(n*sizeof(double));
    wsize = n;
    for(i=0, arg=3.1415927*2.0/(wsize+1), q=wind; i < n; )
      *q++ = (.54 - .46 * cos(((double)i++) * arg));
  }
/* If preemphasis is to be performed,  this assumes that there are n+1 valid
   samples in the input buffer (din). */
  if(preemp != 0.0) {
    for(i=n, p=din+1, q=wind; i-- > 0; )
      *dout++ = *q++ * ((double)(*p++) - (preemp * *din++));
  } else {
    for(i=n, q=wind; i-- > 0; )
      *dout++ = *q++ * *din++;
  }
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
void window(din, dout, n, preemp, type)
     register short *din;
     register double *dout, preemp;
     register int n;
{
  switch(type) {
  case 0:
    rwindow(din, dout, n, preemp);
    return;
  case 1:
    hwindow(din, dout, n, preemp);
    return;
  case 2:
    cwindow(din, dout, n, preemp);
    return;
  default:
    printf("Unknown window type (%d) requested in window()\n",type);
  }
}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
void autoc( windowsize, s, p, r, e )
register int windowsize, p;
register double *s, *r, *e;
/*
 * Compute the pp+1 autocorrelation lags of the windowsize samples in s.
 * Return the normalized autocorrelation coefficients in r.
 * The rms is returned in e.
 */
{
  register int i, j;
  register double *q, *t, sum, sum0;

  for ( i=0, q=s, sum0=0.; i< windowsize; q++, i++){
	sum0 += (*q) * (*q);
  }
  *r = 1.;  /* r[0] will always =1. */
  if ( sum0 == 0.){   /* No energy: fake low-energy white noise. */
  	*e = 1.;   /* Arbitrarily assign 1 to rms. */
		   /* Now fake autocorrelation of white noise. */
	for ( i=1; i<=p; i++){
		r[i] = 0.;
	}
	return;
  }
  for( i=1; i <= p; i++){
	for( sum=0., j=0, q=s, t=s+i; j < (windowsize)-i; j++, q++, t++){
		sum += (*q) * (*t);
	}
	*(++r) = sum/sum0;
  }
  *e = sqrt(sum0/windowsize);
}


void durbin ( r, k, a, p, ex)
register int p;
register double *r, *k, *a, *ex;
/*
* Compute the AR and PARCOR coefficients using Durbin's recursion.
* Note: Durbin returns the coefficients in normal sign format.
*	(i.e. a[0] is assumed to be = +1.)
*/
{
    int i, j;
    double b[MAXORDER], e, s;

    e = *r;
    *k = -r[1]/e;
    *a = *k;
    e *= (1. - (*k) * (*k));
    for ( i=1; i < p; i++){
	s = 0;
	for ( j=0; j<i; j++){
		s -= a[j] * r[i-j];
	}
	k[i] = ( s - r[i+1] )/e;
	a[i] = k[i];
	for ( j=0; j<=i; j++){
		b[j] = a[j];
	}
	for ( j=0; j<i; j++){
		a[j] += k[i] * b[i-j-1];
	}
	e *= ( 1. - (k[i] * k[i]) );
    }
    *ex = e;
}

int lpc(lpc_ord,lpc_stabl,wsize,data,lpca,ar,lpck,normerr,rms,preemp,type)
     int lpc_ord, wsize, type;
     double lpc_stabl, *lpca, *ar, *lpck, *normerr, *rms, preemp;
     short *data;
{
  static double *dwind=NULL;
  static int nwind=0;
  double rho[MAXORDER+1], k[MAXORDER], a[MAXORDER+1],*r,*kp,*ap,en,er;

  if((wsize <= 0) || (!data) || (lpc_ord > MAXORDER)) return(FALSE);

  if(nwind != wsize) {
    if(dwind) dwind = (double*)realloc(dwind,wsize*sizeof(double));
    else dwind = (double*)malloc(wsize*sizeof(double));
    if(!dwind) {
      printf("Can't allocate scratch memory in lpc()\n");
      return(FALSE);
    }
    nwind = wsize;
  }

  window(data, dwind, wsize, preemp, type);
  if(!(r = ar)) r = rho;
  if(!(kp = lpck)) kp = k;
  if(!(ap = lpca)) ap = a;
  autoc( wsize, dwind, lpc_ord, r, &en );
  if(lpc_stabl > 1.0) { /* add a little to the diagonal for stability */
    int i;
    double ffact;
    ffact =1.0/(1.0 + exp((-lpc_stabl/20.0) * log(10.0)));
    for(i=1; i <= lpc_ord; i++) rho[i] = ffact * r[i];
    *rho = *r;
    r = rho;
  }
  durbin ( r, kp, &ap[1], lpc_ord, &er);
  *ap = 1.0;
  if(rms) *rms = en;
  if(normerr) *normerr = er;
  return(TRUE);
}

/*========================================================================*/
/* lbpoly.c */

#define MAX_ITS	100	/* Max iterations before trying new starts */
#define MAX_TRYS	100	/* Max number of times to try new starts */
#define MAX_ERR		1.e-6	/* Max acceptable error in quad factor */
#define MAXORD		30	/* Maximum allowable polynomial order */
#define TRUE		1
#define FALSE		0


quad(a,b,c,r1r,r1i,r2r,r2i) /* find x, where a*x**2 + b*x + c = 0 	*/
double	a, b, c;
double *r1r, *r2r, *r1i, *r2i; /* return real and imag. parts of roots */
{
double  sqrt(), numi;
double  den, y;

	if(a == 0.0){
		if(b == 0){
		   printf("Bad coefficients to _quad().\n");
		   return(FALSE);
		}
		*r1r = -c/b;
		*r1i = *r2r = *r2i = 0;
		return(TRUE);
	}
	den = 2.0 * a;
	numi = b*b -(4.0 * a * c);
	if(numi >= 0.0){
		*r1i = *r2i = 0.0;
		y = sqrt(numi);
		*r1r = (-b + y)/den;
		*r2r = (-b - y)/den;
		return(TRUE);
	}
	else {
		*r1i = sqrt( -numi )/den;
		*r2i = -*r1i;
		*r2r = *r1r = -b/den;
		return(TRUE);
	}
}

lbpoly(a,order,rootr,rooti) /* return FALSE on error */
double	*a;	/* coefficients of the polynomial (increasing order) */
int	order; /* the order of the polynomial */
double	*rootr, *rooti; /* the real and imaginary roots of the polynomial */
/* Rootr and rooti are assumed to contain starting points for the root search
	on entry to lbpoly(). */
{
int ord, ordp1, ordm1, itcnt, i, k, mmk, mmkp2, mmkp1, ntrys;
double fabs(), err, p, q, delp, delq, b[MAXORD], c[MAXORD], den;

/* kluge kluge kluge kluge kluge kluge kluge kluge kluge kluge  */
/* For now, "fortranify" the indices: */
a--; rootr--; rooti--;
/* kluge kluge kluge kluge kluge kluge kluge kluge kluge kluge  */


for(ord = order; ord > 2; ord -= 2){
	ordp1 = ord+1;
	ordm1 = ord-1;
/* Here is a kluge to prevent UNDERFLOW! (Sometimes the near-zero roots left
	in rootr and/or rooti cause underflow here...	*/
	if(fabs(rootr[ord]) < 1.0e-10) rootr[ord] = 0.0;
	if(fabs(rooti[ord]) < 1.0e-10) rooti[ord] = 0.0;
	p = -2.0 * rootr[ord]; /* set initial guesses for quad factor */
	q = (rootr[ord] * rootr[ord]) + (rooti[ord] * rooti[ord]);
   for(ntrys = 0; ntrys < MAX_TRYS; ntrys++){
	for(itcnt = 0;itcnt < MAX_ITS; itcnt++){
		b[ordp1] = a[ordp1];
		b[ord] = a[ord] - (p * b[ordp1]);
		c[ordp1] = b[ordp1];
		c[ord] = b[ord] - (p * c[ordp1]);
		for(k = 2;k <= ordm1; k++){
			mmk = ordp1 - k;
			mmkp2 = mmk+2;
			mmkp1 = mmk+1;
			b[mmk] = a[mmk] - (p* b[mmkp1]) - (q* b[mmkp2]);
			c[mmk] = b[mmk] - (p* c[mmkp1]) - (q* c[mmkp2]);
		}
/* ????		b[1] = a[1] - q * b[3];			*/
		b[1] = a[1] - p * b[2] - q * b[3];

		err = fabs(b[1]) + fabs(b[2]);

		if(err <= MAX_ERR) break;

		den = (c[3] * c[3]) - (c[4] * (c[2] - b[2]));
		if(den == 0.0){
			/* printf("Zero den in _lbpoly.\n"); */
			return(FALSE);
		}
		delp = ((c[3] * b[2]) - (c[4] * b[1]))/den;
		delq = ((c[3] * b[1]) - (b[2] * (c[2] - b[2])))/den;

/*
printf("\nerr=%f  delp=%f  delq=%f  p=%f  q=%f",err,delp,delq,p,q);
*/
		p += delp;
		q += delq;
	}
	if(itcnt >= MAX_ITS){ /* try some new starting values */
		p = ((double)rand() - (1<<30))/(1<<31);
		q = ((double)rand() - (1<<30))/(1<<31);
/*		printf("\nTried new values: p=%f  q=%f\n",p,q); */
	}
	else	/* we finally found the root! */
		break;

  } /* for(ntrys... */
	if((itcnt >= MAX_ITS) && (ntrys >= MAX_TRYS)){
		/* printf("Exceeded maximum trial count in _lbpoly.\n"); */
		return(FALSE);
	}

	if(!quad(1.0,p,q,&rootr[ord],&rooti[ord],&rootr[ordm1],&rooti[ordm1]))
			return(FALSE);

/* Update the coefficient array with the coeffs. of the reduced polynomial. */
	for( i = 1; i <= ordm1; i++) a[i] = b[i+2];
}

if(ord == 2){ /* Is the last factor a quadratic? */
	if(!quad(a[3],a[2],a[1],&rootr[2],&rooti[2],&rootr[1],&rooti[1]))
			return(FALSE);
	return(TRUE);
}
if(ord < 1) {
	/* printf("Bad ORDER parameter in _lbpoly()\n"); */
	return(FALSE);
}

if( a[2] != 0.0) rootr[1] = -a[1]/a[2];
else {
	rootr[1] = 100.0; /* arbitrary recovery value */
	/* printf("Numerical problems in lbpoly()\n"); */
}
rooti[1] = 0.0;

return(TRUE);
}

/*========================================================================*/
/* formant.c */

/*      ----------------------------------------------------------      */
/* Find the roots of the LPC denominator polynomial and convert the z-plane
	zeros to equivalent resonant frequencies and bandwidths.	*/
/* The complex poles are then ordered by frequency.  */
formant(lpc_order,s_freq,lpca,n_form,freq,band,init)
int	lpc_order, /* order of the LP model */
	*n_form,   /* number of COMPLEX roots of the LPC polynomial */
	init; 	   /* preset to true if no root candidates are available */
double	s_freq,    /* the sampling frequency of the speech waveform data */
	*lpca, 	   /* linear predictor coefficients */
	*freq,     /* returned array of candidate formant frequencies */
	*band;     /* returned array of candidate formant bandwidths */
{
  double  x, flo, pi2t, theta;
  static double  rr[31], ri[31];
  int	i,ii,iscomp1,iscomp2,fc,swit;

  if(debug & 4) {
    printf("formant: lpc_order:%d",lpc_order);
    for(i=0;i<=lpc_order;i++) printf("%9.5f",lpca[i]);
    printf("\n");
  }

  if(init){ /* set up starting points for the root search near unit circle */
    x = M_PI/(lpc_order + 1);
    for(i=0;i<=lpc_order;i++){
      flo = lpc_order - i;
      rr[i] = 2.0 * cos((flo + 0.5) * x);
      ri[i] = 2.0 * sin((flo + 0.5) * x);
    }
  }
  if(! lbpoly(lpca,lpc_order,rr,ri)){ /* find the roots of the LPC polynomial */
    *n_form = 0;		/* was there a problem in the root finder? */
    return(FALSE);
  }

  pi2t = M_PI * 2.0 /s_freq;

  /* convert the z-plane locations to frequencies and bandwidths */
  for(fc=0, ii=0; ii < lpc_order; ii++){
    if((rr[ii] != 0.0)||(ri[ii] != 0.0)){
      theta = atan2(ri[ii],rr[ii]);
      freq[fc] = fabs(theta / pi2t);
      if((band[fc] = 0.5 * s_freq *
	  log(((rr[ii] * rr[ii]) + (ri[ii] * ri[ii])))/M_PI) < 0.0)
	band[fc] = -band[fc];
      fc++;			/* Count the number of real and complex poles. */

      if((rr[ii] == rr[ii+1])&&(ri[ii] == -ri[ii+1]) /* complex pole? */
	 && (ri[ii] != 0.0)) ii++; /* if so, don't duplicate */
    }
  }


  /* Now order the complex poles by frequency.  Always place the (uninteresting)
     real poles at the end of the arrays. 	*/
  theta = s_freq/2.0;		/* temporarily hold the folding frequency. */
  for(i=0; i < fc -1; i++){	/* order the poles by frequency (bubble) */
    for(ii=0; ii < fc -1 -i; ii++){
      /* Force the real poles to the end of the list. */
      iscomp1 = (freq[ii] > 1.0) && (freq[ii] < theta);
      iscomp2 = (freq[ii+1] > 1.0) && (freq[ii+1] < theta);
      swit = (freq[ii] > freq[ii+1]) && iscomp2 ;
      if(swit || (iscomp2 && ! iscomp1)){
	flo = band[ii+1];
	band[ii+1] = band[ii];
	band[ii] = flo;
	flo = freq[ii+1];
	freq[ii+1] = freq[ii];
	freq[ii] = flo;
      }
    }
  }
  /* Now count the complex poles as formant candidates. */
  for(i=0, theta = theta - 1.0, ii=0 ; i < fc; i++)
    if( (freq[i] > 1.0) && (freq[i] < theta) ) ii++;
  *n_form = ii;
  if(debug & 4) {
    int j;
    printf("#poles:%4d  ",ii);
    for(j=0;j<ii;j++)
      printf("%7.0f",freq[j]);
    printf("\n             ");
    for(j=0;j<ii;j++)
      printf("%7.0f",band[j]);
    printf("\n");
  }
  return(TRUE);
}

/*========================================================================*/
/* dpform.c */

/* a formant tracker based on LPC polynomial roots and dynamic programming */
				/***/
/* At each frame, the LPC poles are ordered by increasing frequency.  All
   "reasonable" mappings of the poles to F1, F2, ... are performed.
   The cost of "connecting" each of these mappings with each of the mappings
   in the previous frame is computed.  The lowest cost connection is then
   chosen as the optimum one.  At each frame, each mapping has associated
   with it a cost based on the formant bandwidths and frequencies.  This
   "local" cost is finally added to the cost of the best "connection."  At
   end of utterance (or after a reasonable delay like .5sec) the best
   mappings for the entire utterance may be found by retracing back through
   best candidate mappings, starting at end of utterance (or current frame).
*/

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/* Structure definitions for the formant tracker.. */

typedef struct form_latt { /* structure of a DP lattice node for formant tracking */
	short ncand; /* # of candidate mappings for this frame */
	short **cand;      /* pole-to-formant map-candidate array */
	short *prept;	 /* backpointer array for each frame */
	double *cumerr; 	 /* cum. errors associated with each cand. */
} FORM;

typedef struct pole_array {   /* structure to hold raw LPC analysis data */
	double rms;    /* rms for current LPC analysis frame */
	double rms2;    /* rms for current F0 analysis frame */
	double f0;     /* fundamental frequency estimate for this frame */
	double pv;		/* probability that frame is voiced */
	double change; /* spec. distance between current and prev. frames */
	short npoles; /* # of complex poles from roots of LPC polynomial */
	double *freq;  /* array of complex pole frequencies (Hz) */
	double *band;  /* array of complex pole bandwidths (Hz) */
} POLE;
/* End of structure definitions for the formant tracker. */

/* Here are the major fudge factors for tweaking the formant tracker. */
#define MAXFORMANTS 7
#define MAXCAN	300  /* maximum number of candidate mappings allowed */
static double MISSING = 1, /* equivalent delta-Hz cost for missing formant */
	NOBAND = 1000, /* equivalent bandwidth cost of a missing formant */
	DF_FACT =  20.0, /* cost for proportional frequency changes */
	/* with good "stationarity" function:*/
/*        DF_FACT =  80.0, /*  cost for proportional frequency changes */
	DFN_FACT = 0.3, /* cost for proportional dev. from nominal freqs. */
	BAND_FACT = .002, /* cost per Hz of bandwidth in the poles */
/*	F_BIAS	  = 0.0004,   bias toward selecting low-freq. poles */
	F_BIAS	  = 0.000, /*  bias toward selecting low-freq. poles */
	F_MERGE = 2000.0; /* cost of mapping f1 and f2 to same frequency */
static double	fre[MAXORDER],
		fnom[]  = {  500, 1500, 2500, 3500, 4500, 5500, 6500},/*  "nominal" freqs.*/
		fmins[] = {   50,  400, 1000, 2000, 2000, 3000, 3000}, /* frequency bounds */
		fmaxs[] = { 1500, 3500, 4500, 5000, 6000, 6000, 8000}; /* for 1st 5 formants */

static int	maxp,	/* number of poles to consider */
		maxf,	/* number of formants to find */
		ncan,  domerge = TRUE;

static short **pcand;

static int canbe(pnumb, fnumb) /* can this pole be this freq.? */
int	pnumb, fnumb;
{
	return((fre[pnumb] >= fmins[fnumb])&&(fre[pnumb] <= fmaxs[fnumb]));
}

/* This does the real work of mapping frequencies to formants. */
static void candy(cand,pnumb,fnumb)
     int	cand, /* candidate number being considered */
       pnumb, /* pole number under consideration */
       fnumb;	/* formant number under consideration */
{
	int i,j;

	if (fnumb < maxf) pcand[cand][fnumb] = -1;
	if ((pnumb < maxp)&&(fnumb < maxf)) {
		/*   printf("\ncan:%3d  pnumb:%3d  fnumb:%3d",cand,pnumb,fnumb); */
		if (canbe(pnumb,fnumb)) {
			pcand[cand][fnumb] = pnumb;
			if (domerge && (fnumb==0) && (canbe(pnumb,fnumb+1))) {
				/* allow for f1,f2 merger */
				ncan++;
				pcand[ncan][0] = pcand[cand][0];
				candy(ncan,pnumb,fnumb+1); /* same pole, next formant */
			}
			candy(cand,pnumb+1,fnumb+1); /* next formant; next pole */
			if (((pnumb+1) < maxp) && canbe(pnumb+1,fnumb)) {
				/* try other frequencies for this formant */
				ncan++;			/* add one to the candidate index/tally */
				/*		printf("\n%4d  %4d  %4d",ncan,pnumb+1,fnumb); */
				for (i=0; i<fnumb; i++)	/* clone the lower formants */
					 pcand[ncan][i] = pcand[cand][i];
				candy(ncan,pnumb+1,fnumb);
			}
		}
		else {
			candy(cand,pnumb+1,fnumb);
		}
	}

	/* If all pole frequencies have been examined without finding one which
     will map onto the current formant, go on to the next formant leaving the
     current formant null. */
	if ((pnumb >= maxp) && (fnumb < maxf-1) && (pcand[cand][fnumb] < 0)) {
	    if (fnumb) {
			j=fnumb-1;
			while ((j>0) && pcand[cand][j] < 0) j--;
			i = ((j=pcand[cand][j]) >= 0)? j : 0;
		}
		else
			i = 0;
		candy(cand,i,fnumb+1);
	}
}

/* Given a set of pole frequencies and allowable formant frequencies
   for nform formants, calculate all possible mappings of pole frequencies
   to formants, including, possibly, mappings with missing formants. */
get_fcand(npole,fab,nform,pcan)
     int	npole, nform;
     short **pcan;
     struct fm_rec_array *fab; /* poles ordered by increasing FREQUENCY */
{
	int i;
	ncan = 0;
	pcand = pcan;
	for (i=0;i<npole;i++) fre[i]=fab[i].freq;
	for (;i<nform;i++) fre[i]=0;
	maxp = npole;
	maxf = nform;
	candy(ncan, 0, 0);
	ncan++;	/* (converts ncan as an index to ncan as a candidate count) */
}

set_nominal_freqs(f1)
     double f1;
{
	int i;
	for (i=0; i < MAXFORMANTS; i++) {
		fnom[i] = ((i * 2) + 1) * f1;
		fmins[i] = fnom[i] - ((i+1) * f1) + 50.0;
		fmaxs[i] = fnom[i] + (i * f1) + 1000.0;
	}
}

dpform(struct fm_rec *fm, int nframe, double frate, int nform, double nom_f1)
{
	double pferr, conerr, minerr, dffact, ftemp, berr, ferr, bfact, ffact,
         rmsmax=0, fbias, **fr, **ba, **am, rmsdffact, merger, merge_cost,
         FBIAS, get_stat_max();
	register int	i, j, k, l, ic, ip, mincan;
	short	**pcan;
	FORM	**fl;
	int dmaxc,dminc,dcountc,dcountf;

	if(nom_f1 > 0.0) set_nominal_freqs(nom_f1);
	rmsmax=fm[0].gain;
	for (i=1;i<nframe;i++)
		if (fm[i].gain > rmsmax)
			rmsmax = fm[i].gain;
    FBIAS = F_BIAS /(.01 * frate);

    /* Setup working values of the cost weights. */
    dffact = (DF_FACT * .01) * frate; /* keep dffact scaled to frame rate */
    bfact = BAND_FACT /(.01 * frate);
    ffact = DFN_FACT /(.01 * frate);
    merge_cost = F_MERGE;
    if(merge_cost > 1000.0) domerge = FALSE;

    /* Allocate space for the formant and bandwidth arrays to be passed back. */
    fr = (double **)calloc(nform * 3,sizeof(double *));
    ba = fr + nform;
    am = fr + 2*nform;
    for(i=0;i < nform*3; i++){
      fr[i] = (double *)calloc(nframe,sizeof(double));
    }
    /* allocate space for candidate array */
	pcan = (short **)calloc(MAXCAN,sizeof(short *));
	for(i=0;i<MAXCAN;i++) pcan[i] = (short*)calloc(nform, sizeof(short));

	/* Allocate space for the dp lattice */
	fl = (FORM **)calloc(nframe,sizeof(FORM *));
	for(i=0;i<nframe; i++) fl[i] = (FORM *)calloc(1,sizeof(FORM));

	/*******************************************************************/
	/* main formant tracking loop */
	/*******************************************************************/
	for(i=0; i < nframe; i++){	/* for all analysis frames... */

		ncan = 0;		/* initialize candidate mapping count to 0 */

		/* moderate the cost of frequency jumps by the relative amplitude */
		rmsdffact = fm[i].gain;
		rmsdffact = rmsdffact/rmsmax;
		rmsdffact = rmsdffact * dffact;

		/* Get all likely mappings of the poles onto formants for this frame. */
		if (fm[i].npeaks > 0) {	/* if there ARE pole frequencies available... */

			get_fcand(fm[i].npeaks,fm[i].formant,nform,pcan);

			/* Allocate space for this frame's candidates in the dp lattice. */
			fl[i]->prept =  (short *)calloc(ncan, sizeof(short));
			fl[i]->cumerr = (double *)calloc(ncan,sizeof(double));
			fl[i]->cand =   (short **)calloc(ncan,sizeof(short *));
			for(j=0;j<ncan;j++){	/* allocate cand. slots and install candidates */
				fl[i]->cand[j] = (short*)calloc(nform,sizeof(short));
			    for(k=0; k<nform; k++)
			      fl[i]->cand[j][k] = pcan[j][k];
			}
		}
		fl[i]->ncand = ncan;

		/* compute the distance between the current and previous mappings */
		for(j=0;j<ncan;j++){	/* for each CURRENT mapping... */
			if (i) {		/* past the first frame? */
				minerr = 0;
				if (fl[i-1]->ncand) minerr = 2.0e30;
				mincan = -1;
				for (k=0; k < fl[i-1]->ncand; k++) { /* for each PREVIOUS map... */
					for (pferr=0.0, l=0; l<nform; l++) {
						ic = fl[i]->cand[j][l];
						ip = fl[i-1]->cand[k][l];
						if ((ic >= 0)	&& (ip >= 0)) {
							ftemp = 2.0 * fabs(fm[i].formant[ic].freq - fm[i-1].formant[ip].freq)/
								(fm[i].formant[ic].freq + fm[i-1].formant[ip].freq);
							/* cost prop. to SQUARE of deviation to discourage large jumps */
							pferr += ftemp * ftemp;
						}
						else pferr += MISSING;
					}
					/* scale delta-frequency cost and add in prev. cum. cost */
					conerr = (rmsdffact * pferr) + fl[i-1]->cumerr[k];
					if (conerr < minerr) {
						minerr = conerr;
						mincan = k;
					}
				}			/* end for each PREVIOUS mapping... */
			}
			else {		/* (i.e. if this is the first frame... ) */
				minerr = 0;
			}

			fl[i]->prept[j] = mincan; /* point to best previous mapping */
			/* (Note that mincan=-1 if there were no candidates in prev. fr.) */
			/* Compute the local costs for this current mapping. */
			for (k=0, berr=0, ferr=0, fbias=0; k<nform; k++) {
				ic = fl[i]->cand[j][k];
				if (ic >= 0) {
					if ( !k ) {		/* F1 candidate? */
						ftemp = fm[i].formant[ic].freq;
						merger = (domerge && (ftemp == fm[i].formant[fl[i]->cand[j][1]].freq))? merge_cost: 0.0;
					}
					berr += fm[i].formant[ic].band;
					ferr += (fabs(fm[i].formant[ic].freq-fnom[k])/fnom[k]);
					fbias += fm[i].formant[ic].freq;
				}
				else {		/* if there was no freq. for this formant */
					fbias += fnom[k];
					berr += NOBAND;
					ferr += MISSING;
				}
			}

			/* Compute the total cost of this mapping and best previous. */
			fl[i]->cumerr[j] = (FBIAS * fbias) + (bfact * berr) + merger +
	                     (ffact * ferr) + minerr;
		}			/* end for each CURRENT mapping... */

	}				/* end for all analysis frames... */

	/**************************************************************************/

	/* Pick the candidate in the final frame with the lowest cost. */
	/* Starting with that min.-cost cand., work back thru the lattice. */
	dmaxc = 0;
	dminc = 100;
	dcountc = dcountf = 0;
	for (mincan = -1, i=nframe - 1; i>=0; i--) {
		if (mincan < 0)		/* need to find best starting candidate? */
			if (fl[i]->ncand) {	/* have candidates at this frame? */
				minerr = fl[i]->cumerr[0];
				mincan = 0;
				for (j=1; j<fl[i]->ncand; j++)
					if( fl[i]->cumerr[j] < minerr ) {
						minerr = fl[i]->cumerr[j];
						mincan = j;
					}
			}
		if(mincan >= 0){	/* if there is a "best" candidate at this frame */
			if ((j = fl[i]->ncand) > dmaxc)
				dmaxc = j;
			else if( j < dminc)
				dminc = j;
			dcountc += j;
			dcountf++;
			for (j=0; j<nform; j++) {
				k = fl[i]->cand[mincan][j];
				if (k >= 0) {
					fr[j][i] = fm[i].formant[k].freq;
					ba[j][i] = fm[i].formant[k].band;
					am[j][i] = fm[i].formant[k].amp;
				}
				else {		/* IF FORMANT IS MISSING... */
					if (i < nframe - 1) {
						fr[j][i] = fr[j][i+1]; /* replicate backwards */
						ba[j][i] = ba[j][i+1];
						am[j][i] = am[j][i+1];
					}
					else {
						fr[j][i] = fnom[j]; /* or insert neutral values */
						ba[j][i] = NOBAND;
						am[j][i] = 0.0;
					}
				}
			}
			mincan = fl[i]->prept[mincan];
		}
		else {		/* if no candidates, fake with "nominal" frequencies. */
			for (j=0; j < nform; j++) {
				fr[j][i] = fnom[j];
				ba[j][i] = NOBAND;
				am[j][i] = 0.0;
			}
		}			/* note that mincan will remain =-1 if no candidates */
	}				/* end unpacking formant tracks from the dp lattice */

	/* Deallocate all the DP lattice work space. */
	for (i=nframe - 1; i>=0; i--) {
		if (fl[i]->ncand) {
			if (fl[i]->cand) {
				for (j=0; j<fl[i]->ncand; j++) free(fl[i]->cand[j]);
				free(fl[i]->cand);
				free(fl[i]->cumerr);
				free(fl[i]->prept);
			}
		}
	}
	for(i=0; i<nframe; i++)	free(fl[i]);
	free(fl);
	fl = 0;

	/* Deallocate space for the raw candidate aray. */
	for(i=0;i<MAXCAN;i++) free(pcan[i]);
	free(pcan);

	/* return formant and bandwidth values in fm */
	for (i=0;i<nframe;i++) {
		fm[i].npeaks = nform;
		for (j=0;j<nform;j++) {
			fm[i].formant[j].freq = (float)fr[j][i];
			fm[i].formant[j].band = (float)ba[j][i];
			fm[i].formant[j].amp = (float)am[j][i];
		}
	}

	/* free formant array */
    for (i=0;i < nform*3; i++) free(fr[i]);
    free(fr);
}

/*========================================================================*/

/* downsample to 10000Hz */
int downsample(struct item_header *item,short *sp)
{
	float	*fsp;
	double	srate=1.0/item->frameduration;
	int		interp;
	FILTER	*lpf;
	int		i,j,numf;
	double	m1,m2;

	if (srate < 20000)
		interp=4;
	else if (srate < 30000)
		interp=3;
	else if (srate < 40000)
		interp=2;
	else
		interp=1;

	fsp=(float *)calloc(interp*item->numframes,sizeof(float));

	lpf=filter_design(FILTER_LOW_PASS,4,4750,srate*interp/2,interp*srate);
	for (i=0,j=0;i<item->numframes;i++) {
		fsp[j++] = filter_sample(lpf,sp[i]);
		if (interp>3) fsp[j++] = filter_sample(lpf,0);
		if (interp>2) fsp[j++] = filter_sample(lpf,0);
		if (interp>1) fsp[j++] = filter_sample(lpf,0);
	}

	numf=(int)(10000.0*item->numframes/srate);

	for (i=0;i<numf;i++) {
		j=(int)(i*interp*srate/10000);
		m1=1.0-(i*interp*srate/10000-j);
		m2=1.0-m1;
		sp[i] = (short)(m1*fsp[j]+m2*fsp[j+1]);
	}
	return(numf);
}

/* calculate amplitude at given frequency in LPC frame */
float calcamplitude(struct pc_rec *lpcrec,int ncoeff,double tau, double freq)
{
	double		sumsin;		/* sum of sine products */
	double		sumcos;		/* sum of cosine products */
	int		i;
	double		omega;		/* 2.pi.f.t */
	double		amp;		/* amplitude */

	/* initialise omega */
	omega = 8.0 * atan(1.0) * freq * tau;

	/* initialise sine and cosine products */
	sumsin = 0.0;
	sumcos = 1.0;

	/* sum sine and cosine products */
	for (i=0;i<ncoeff;i++) {
		sumsin += lpcrec->data[i] * sin(omega * (i+1));
		sumcos += lpcrec->data[i] * cos(omega * (i+1));
	}

	/* find amplitude of filter at freq */
	amp = sqrt(sumsin*sumsin + sumcos*sumcos);

	/* and return input amplitude */
	return((float)(20.0*log10(lpcrec->gain/amp)));
}

/* get a Fx value from buffer */
short getfxval(double t)
{
	int	idx;

	idx = (int)(0.5+(t-fxitem.offset)/fxitem.frameduration);
	if ((idx < 0)||(idx >= fxitem.numframes))
		return(0);
	else
		return(fx[idx]);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item selections */
	char	*ty;
	char	*sptype="0";	/* default sub-type = last */
	char	*fxtype="0";	/* default sub-type = last */
	/* file variables */
	char	filename[SFSMAXFILENAME];	/* dbase file name */
	int		ofid;
	/* data variables */
	double	srate;
	int		numf,nframe;
	FILTER	*hpf;
	int		wisize,stsize;
	double	normerr,energy;
	int		i,j,k,n;
	int		init,nform;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:s")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Formant analysis V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype = ty;
				else if (it == FX_TYPE)
					fxtype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 's' :	/* output SY data */
			dosy=1;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-s) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* load speech into memory */
	getitem(filename,SP_TYPE,sptype,&spitem,&sp);

	/* downsample to 10000Hz */
	srate = 1.0/spitem.frameduration;
	if (srate > 10000) {
		numf=downsample(&spitem,sp);
#ifdef EBUG
		sfsheader(&opitem,SP_TYPE,0,2,1,1.0/10000,0.0,1,0,0);
		sprintf(opitem.history,"%s/SP(%d.%02d;downsampled)",
			PROGNAME,spitem.datatype,spitem.subtype);
		putitem(filename,&opitem,numf,sp);
#endif
	}
	else
		numf=spitem.numframes;

	/* highpass filter at 75Hz */
	hpf = filter_design(FILTER_HIGH_PASS,4,75.0,5000.0,10000.0);
	filter_signal(hpf,sp,sp,numf);
#ifdef EBUG
	sfsheader(&opitem,SP_TYPE,0,2,1,1.0/10000,0.0,1,0,0);
	sprintf(opitem.history,"%s/SP(%d.%02d;high-pass)",
			PROGNAME,spitem.datatype,spitem.subtype);
	putitem(filename,&opitem,numf,sp);
#endif

	/* window parameters */
	wisize = (int)(wdur*10000);
	stsize = (int)(frame_int*10000);

	/* make PC header */
	sfsheader(&pcitem,PC_TYPE,-1,4,(sfsstruct[PC_TYPE]+lpc_ord*sizeof(float))/sizeof(float),1.0/10000,wdur/2,wisize,wisize-stsize,0);
	sprintf(pcitem.history,"%s/PC(%d.%02d)",
			PROGNAME,spitem.datatype,spitem.subtype);

	/* get buffer for LPC coefficients */
	n=nframe = 1+(numf-wisize)/stsize;
	pc = (struct pc_rec *)sfsbuffer(&pcitem,nframe);

	/* calculate LPC ceofficients */
	nframe=0;
	for (i=0;(i+wisize)<=numf;i+=stsize) {

		/* lpc analysis */
		lpc(lpc_ord,lpc_stabl,wisize,sp+i,lpca,NULL,NULL,&normerr,&energy,preemp,w_type);

		/* copy into PC frame */
		pc[nframe].posn = i;
		pc[nframe].size = wisize;
		pc[nframe].gain = (float)energy;
		pc[nframe].mix = 0;
		pc[nframe].flag = 0;
		for (j=0;j<lpc_ord;j++)
			pc[nframe].data[j] = (float)lpca[j+1];
		nframe++;

		if (((i%100)==99)&&ttytest()) {
			fprintf(stderr,"Frame %d/%d\r",i+1,n);
			fflush(stderr);
		}
	}
#ifdef EBUG
	putitem(filename,&pcitem,nframe,pc);
#endif

	/* make FM item header */
	sfsheader(&fmitem,FM_TYPE,-1,4,(sfsstruct[FM_TYPE]+2*NFORMANT*sizeof(struct fm_rec_array))/4,
			1.0/10000.0, wdur/2, wisize,wisize-stsize,0);
	sprintf(fmitem.history,"%s/FM(%d.%02d;raw)",PROGNAME,spitem.datatype,spitem.subtype);

	/* get FM buffer */
	fm=(struct fm_rec *)sfsbuffer(&fmitem,nframe);

	/* find formants */
	init=FALSE;
	for (i=0;i<nframe;i++) {

		/* get autoregressive coeffs back */
		lpca[0]=1.0;
		for (j=0;j<lpc_ord;j++)
			lpca[j+1]=pc[i].data[j];

		/* get formant frequencies and bandwidths */
		formant(lpc_ord,10000.0,lpca,&nform,frp,bap,init);
		init=TRUE;

		/* copy into FM frame */
		fm[i].posn = pc[i].posn;
		fm[i].size = pc[i].size;
		fm[i].flag = pc[i].flag;
		fm[i].gain = pc[i].gain;
#ifdef OLD_CODE
		fm[i].npeaks = (nform > 2*NFORMANT) ? 2*NFORMANT : nform;
		for (j=0;j<fm[i].npeaks;j++) {
			fm[i].formant[j].freq = (float)frp[j];
			fm[i].formant[j].amp = calcamplitude(&pc[i],lpc_ord,1.0/10000,frp[j]);
			fm[i].formant[j].band = (float)bap[j];
		}
#else
		fm[i].npeaks = (nform > 2*NFORMANT) ? 2*NFORMANT : nform;
		for (j=0,k=0;j<fm[i].npeaks;j++) if ((float)bap[j]<1000) {
			fm[i].formant[k].freq = (float)frp[j];
			fm[i].formant[k].amp = calcamplitude(&pc[i],lpc_ord,1.0/10000,frp[j]);
			fm[i].formant[k].band = (float)bap[j];
			k++;
		}
		fm[i].npeaks = k;
#endif

		if (((i%100)==99)&&ttytest()) {
			fprintf(stderr,"Frame %d/%d\r",i+1,nframe);
			fflush(stderr);
		}
	}
	fprintf(stderr,"                       \n");

#ifdef EBUG
	putitem(filename,&fmitem,nframe,fm);
#endif

	/* formant tracking */
	dpform(fm, nframe, 10000/stsize, NFORMANT, nom_f1);

	/* SY data required ? */
	if (dosy) {
		/* get FX data */
		getitem(filename,FX_TYPE,fxtype,&fxitem,&fx);

		/* make SY item header */
		sfsheader(&syitem,SY_TYPE,0,2,19,0.01,wdur/2,1,0,0);
		sprintf(syitem.history,"%s/SY(%d.%02d)",PROGNAME,spitem.datatype,spitem.subtype);

		/* open output channel */
		if ((ofid=sfschannel(filename,&syitem))<0)
			error("unable to open outpou channel to '%s'",filename);

		for (i=0;i<nframe;i++) {
			syframe[0] = getfxval(wdur/2 + i * syitem.frameduration);
			syframe[1] = 0;
			syframe[2] = 0;
			for (j=0;j<4;j++) {
				syframe[3*(j+1)] = (short)fm[i].formant[j].freq;
				syframe[3*(j+1)+1] = (short)(10*fm[i].formant[j].amp);
				syframe[3*(j+1)+2] = (short)fm[i].formant[j].band;
			}
			syframe[15] = 250;
			syframe[16] = syframe[4];
			syframe[17] = 0;
			syframe[18] = (syframe[0] > 0) ? 248 : 0;

			/* write frame */
			sfswrite(ofid,1,syframe);
		}

		/* update file */
		if (!sfsupdate(filename))
			error("update error on '%s'",filename);
	}
	else {

		/* make FM item header */
		sfsheader(&fmitem,FM_TYPE,-1,4,(sfsstruct[FM_TYPE]+NFORMANT*sizeof(struct fm_rec_array))/4,
			1.0/10000.0, wdur/2, wisize,wisize-stsize,0);
		sprintf(fmitem.history,"%s/FM(%d.%02d)",PROGNAME,spitem.datatype,spitem.subtype);

		/* save tracked formants */
		putitem(filename,&fmitem,nframe,fm);
	}

	/* that's all folks ... */
	exit(0);
}
