/* txanal - pitch epoch locator */

/* Mark Huckvale - University College London */

/* version 1.0 - December 2003 */

#define PROGNAME "txanal"
#define PROGVERS "1.0"
char *progname=PROGNAME;

#undef EBUG

/*-------------------------------------------------------------------------*/
/**MAN
.TH TXANAL SFS1 UCL
.SH NAME
txanal -- pitch epoch detector from Speech or Lx
.SH SYNOPSIS
.B txanal
(-i item) (-l) (-f|-t|-v) (-p polarity) file
.SH DESCRIPTION
.I txanal
is a program to find the locations of larynx excitation points
in the voiced regions of a speech signal.  The output is saved as
a TX item.  Options allow the input of an FX item or a TR item
to act as a voicing indicator.
.PP
The process is essentially: (i) High-pass the signal, (ii) pre-emphasise,
(iii) find residual after LPC analysis, (iv) find all local maxima,
(v) perform a DP search for peak sequences.
.PP
This algorithm was originally used in the "epochs" program of ESPS.
.SS LICENCE
Some code in this program was originally written by David Talkin and
Derek Lin as part of the Entropic Signal Processing System and is
used under licence from Microsoft.
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.B -l
Use LX signal as source.  Some processing is modified for LX signals.
.TP 11
.B -f
Use FX item to define voiced regions.  Voiced regions are taken
to be regions with a FX value greater than 0.  The fxanal(SFS1)
or fxrapt(SFS1) programs are a suitable source.
.TP 11
.B -t
Use TR item to define voiced regions  Voiced regions are taken
to be regions where the track is greater than 0.5.  The vdegree(SFS1)
program is a suitable source.
.TP 11
.B -v
Treat whole signal as voiced.  Default is to apply a simple voicing
decision based on energy and zero-crossing rate.
.TP 11
.BI -p polarity
Specify the signal polarity as positive (1) or negative (-1).  By default
the polarity is automatically detected.  Specifying the polarity
when known increases the speed of operation.
.SH INPUT ITEMS
.IP SPEECH
Any speech signal
.IP LX
Any Lx signal
.SH OUTPUT ITEMS
.IP TX
Larynx excitation points.
.SH HISTORY
.SH VERSION/AUTHOR
.IP ESPS
David Talkin and Derek Lin
.IP SFS
Mark Huckvale
.SH SEE ALSO
HQtx(SFS1), vtx(SFS1), pp(SFS1)
*/
/*--------------------------------------------------------------------------*/

#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"
#include "filter.h"
#include "complex.h"
#include "lsys.h"
#include "lpca.h"

#ifndef M_PI
#define M_PI		3.14159265358979323846
#endif

/* item data */
char		filename[SFSMAXFILENAME]; /* SFS data file name */
struct item_header	spitem;		/* input speech item */
short			*sp;
struct item_header	fxitem;
short			*fx;
struct item_header	tritem;
float			*tr;
struct item_header	hpitem;		/* filtered speech */
float			*fsp;
struct item_header	ppitem;		/* pre-emphasised speech */
struct item_header	rpitem;		/* speech residual */
float			*rsp;
struct item_header	rmsitem;
struct item_header	positem;
struct item_header	negitem;
struct item_header	txitem;
short			*pulsp;
double			maxenergy;
double			vtime,dtime=0;
int				npeak=1, ppeak=1;
int				polarity=0;

/*=============================================================================*/

/*
 * This material contains unpublished, proprietary software of
 * Entropic Research Laboratory, Inc. Any reproduction, distribution,
 * or publication of this work must be authorized in writing by Entropic
 * Research Laboratory, Inc., and must bear the notice:
 *
 *    "Copyright (c) 1990-1996 Entropic Research Laboratory, Inc.
 *                   All rights reserved"
 *
 * The copyright notice above does not evidence any actual or intended
 * publication of this source code.
 *
 * Written by:  David Talkin, Derek Lin
 * Checked by:
 * Revised by:
 *
 * Brief description: Estimate the F0 and voicing state, using
 *     dynamic programming to
 *     find the major points of periodic excitation in a voiced speech
 *     waveform.
 *
 *
 */

/* The algorithm is as follows:

   (1) A variety of signals, including the original PCM data may be used.
   For optimal selection of the exact point of excitation the signal may
   be prepared as follows:

    (a) Highpass the signal with a symmetric, non-causal FIR to remove
           DC, breath noise, etc.
       (b) Filter the result with an autoregressive filter of order
           int(2 + Fs/1000) after preemphasizing with 1st order filter (.97).
       (c) If necessary, apply phase correction to restore peakiness of
           "glottal flow derivative" (e.g. for TIMIT).

   (2)  Find all local maxima in the signal.  The number
   of these may be reduced while maintaining accuracy by constraining
   their absolute values to be greater than some running threshold (like
   the local rms).

   (3) Associate with each peak: its time location, its polarity, its
   absolute value and first derivative.

   (4) For each peak of a given polarity:
       (a) Compute a local cost based on the relative local amplitude of
           the peak and the degree to which it fits a glottal flow
	   derivative model. (E.g. amplitude/(local rms) or
	   amplitude/(highest amp. in region); add in (s(t+.2ms) - s(t)).
       (b) For all preceeding peaks of the same polarity in an interval
            near the estimated F0 interval:
	      (i) Compute the transition cost based on the closeness of
	          the interpeak interval (IPI) to the estimated period
		  of the previous best candidate (E.g. abs(log(IPI*F0))
		  and on the similarity of the peak amplitudes
		  (E.g. abs(log(val1/val2))).
	      (ii) Save a backpointer to the previous peak which has the
	           lowest sum of accumulated cost and transition cost.
	      (iii) Assign the cost of the current peak selection to be
	            the sum of its local cost, transition cost and best
		    connection cost.  Save the interval to the best previous
		    peak for subsequent interations.
(5) Starting at the end of the signal (or at the end of each voiced
    interval), examine the peaks in a voiced interval corresponding to the
    F0 range of interest and select the lowest cost peak in that interval
    as the starting point for backtracking to select the most likely set
    of peaks.
*/

#define AMP 10000
#define min(x,y) ((x > y)? y : x)
#define max(x,y) ((x > y)? x : y)

typedef struct vlist {
  double start, end;
  int type;
  struct vlist *next;
} Vlist;

Vlist *vl=NULL;

/* DP fudge factors: */
double CLIP = 0.5,  /* clipping level for local RMS*/
  PEAK = 1.0,	/* weight given to peak quality */
  TSIM = .4,	/* cost of period dissimilarities */
  PSIM = .05,	/* cost for peak quality dissimilarity */
  PQUAL = .35,	/* relative contribution of shape to peak quality */
  FDOUB = .7,		/* cost of each frequency doubling/halving */
  AWARD = .4,		/* award for selecting a peak */
  VOFFCOST = 0.2,		/* cost for V-UV transition */
  VONCOST = 0.2,		/* cost for UV-V transition */
  VONOFF = 0.3,		/* cost for rms rise/fall appropriateness */
  UVCOST = 0.7,		/* cost of unvoiced classification */
  VUCOST,
  JITTER = .1;		/* reasonable inter-period variation */

char *parfile = NULL;
typedef struct peak {
  int sample;
  float value;
  float rms;
  struct pcand *cands;
  struct peak *next, *prev;
} Peak;

Peak *neg=NULL, *pos=NULL;
float srate, fratio, range = (float)0.7, ln2;
short *ppul, *npul;
int imin, imax, debug_level=0, peaks, tcands;

static char *voiced[] = {"D","H","V","M","F",NULL};

void new_peak(pp, loc, val, rms)
     register Peak **pp;
     register int loc;
     register float val;
     register short rms;
{
  register Peak *pk;

  if(!(pk = (Peak*)malloc(sizeof(Peak)))) {
    printf("Can't allocate a Peak.\n");
    exit(-1);
  }
  pk->sample = loc;
  pk->value = (float)val;
  pk->rms = rms;
  pk->prev = *pp;
  pk->next = NULL;
  pk->cands = NULL;
  *pp =pk;
  if(pk->prev)
    pk->prev->next = pk;
  return;
}

typedef struct pcand {
  Peak *this;			/* parent peak at head of this list */
  struct pcand *best;		/* best previous candidate resulting from DP */
  int inter;			/* interval (in samples) to "best" */
  int type;			/* voiced=1 or unvoiced=0 */
  float cost;			/* cost of choosing "best" */
  struct pcand *next;		/* next candidate for this peak */
} Pcand;

Pcand neg_p = {NULL,NULL,0,0,(float)0.0,NULL},
  pos_p = {NULL,NULL,0,0,(float)0.0,NULL};

/*------------------------------------------------------*/
Pcand *link_new_cand(pp,pc,linter,tcost,type)
     register Peak *pp;
     register Pcand *pc;
     register int linter, type;
     register float tcost;
{
  Pcand *p;

  if((p = (Pcand*)malloc(sizeof(Pcand)))) {
    if(pp) {
      p->next = pp->cands;
      pp->cands = p;
    } else
      p->next = NULL;
    p->inter = linter;
    p->best = pc;
    p->cost = (float)(tcost + pc->cost);
    p->type = type;
    if(debug_level == 256)
      printf("%f %f %d %d\n",p->cost,tcost,p->inter,pc->this);
    p->this = pp;
    return(p);
  }
  return(NULL);
}

/*------------------------------------------------------*/
Pcand *get_best_track(pp)
     Peak *pp;
{
  register Pcand *pc;

  if(!(pp && (pc = pp->cands))) return(NULL);
  else {
    float cmin, cc;
    Pcand *pmin;

    if(!pc->this) return(NULL);
    cmin = pc->cost;
    pmin = pc;
    while(pc = pc->next) {
      if(((cc =  pc->cost) < cmin)) {
	cmin = cc;
	pmin = pc;
      }
    }
    return(pmin);
  }
}

/*------------------------------------------------------*/
/* Return the lowest cost candidate from all peaks within the maximum plausible
   pitch period. */
Pcand *get_best_peak(pp)
     Peak *pp;
{
  register Pcand *pc, *pmin;
  register int low;

  if(pp) {
    low = pp->sample - imin;
    pmin = get_best_track(pp);
    while((pp=pp->prev) && (pp->sample >= low)) {
      if((pc = get_best_track(pp)) && pc->this) {
	if(pc->cost < pmin->cost)
	  pmin = pc;
      } else
	return(pmin);
    }
    return(pmin);
  }
  return(NULL);
}

/*------------------------------------------------------*/
void do_dp(pp,pcinit)
     register Peak *pp;
     register Pcand *pcinit;
{
  register Peak *p, *pstart, *phold, *bestp;
  register Pcand *pc, *pmin, *uvpmin, *bestuv;
  register int  low, high, cands=0;
  register float cmin, tcmin, uvcmin, uvtcmin, tc, cc, linter, ttemp,
           ftemp, ft1, tcost, vcost, vutcost, uvtcost,  ucost, maxcost;

  low = pp->sample - imin;
  high = pp->sample - imax;
  p = pp->prev;
  while(p && (p->sample > high)) p = p->prev;
  if(!(pstart = p)) {
    link_new_cand(pp,pcinit,0,0.0,1);
    link_new_cand(pp,pcinit,0,0.0,0);
    return;
  }
  peaks++;
  ln2 = (float)log((double)2.0);
  vcost = (float)(PEAK * pp->value - AWARD);
  ucost = (float)(PEAK * (UVCOST - pp->value) - AWARD);
  pmin = NULL;
  uvpmin = NULL;
  bestuv = NULL;
  bestp = NULL;
  maxcost =  uvcmin = (float)1.0e30;
  phold = p;
  while(p && (p->sample >= low)) { /* find highest peak as UV cand. */
    if(p->value < maxcost){
      maxcost = p->value;
      bestp = p;
    }
    p = p->prev;
  }
  /* There are always pleanty of low-quality peaks;  question is:
     Is the BEST previous peak still better classified as UNVOICED? */
  if(bestp && (pc = bestp->cands)) {
    while(pc) {
      if(! pc->type) {		/* get UV-V transition cost */
	bestuv = pc;
	uvtcost =  (float)(vcost + VONCOST + VONOFF*log(bestp->rms/pp->rms));
	break;
      }
      pc = pc->next;
    }
  } /* (Now have the unvoiced hypothesis of the HIGHEST peak.) */
  p = phold;
  while(p && (p->sample >= low)) { /* for each possible candidate */
    linter = (float)(pp->sample - p->sample); /* local time interval */
    tcost =  (float)(vcost + (PSIM * fabs(log(pp->value/p->value))));
    vutcost =  (float)(VUCOST + VONOFF*log(pp->rms/p->rms));
    if((pc = p->cands)) {
      cmin = (float)1.0e30;
      while(pc) {		/* for each of its candidates */
	if(pc->type) {		/* is it a voiced hypothesis? */
	  if(pc->inter && (pc->best->type)) { /* prev. per. available? */
	    ttemp = (float)fabs(log(linter/pc->inter));
	    ftemp = (ttemp > (ft1 = (float)(FDOUB + fabs(ttemp - ln2))))?
	      (float)ft1 : (float)ttemp;
	  } else ftemp = (float)JITTER;
	  if((cc = (float)((tc = (float)(tcost + (TSIM * ftemp))) + pc->cost)) <
	     cmin) {
	    cmin = cc;
	    tcmin = tc;
	    pmin = pc;
	  }
	  /* Now compute previous voiced to current unvoiced cost. */
	  if((cc = vutcost + pc->cost) < uvcmin) {
	    uvcmin = cc;
	    uvtcmin = vutcost;
	    uvpmin = pc;
	  }
	} else {    /* Check for unvoiced-to-voiced transition as best. */
	  if(pc == bestuv) { /* is it the LEAST likely unvoiced candidate? */
	    if((cc = uvtcost + pc->cost) < cmin) {
	      cmin = cc;
	      tcmin = uvtcost;
	      pmin = pc;
	    }
	  }
	}
	pc = pc->next;
      }
    } else {
      printf("Peak with no candidates in dp_dp()!\n");
    }
    if(!link_new_cand(pp,pmin,(int)linter,tcmin,1)) { /* voiced cands. */
      printf("Problems with link_new_cand()\n");
      exit(-1);
    }
    cands++;
    p = p->prev;
  }	/* finished all previous peaks in F0 range */

  /* get the cost of the unvoiced-unvoiced transition */
  if(bestuv && ((cc = ucost + bestuv->cost) < uvcmin)) {
    uvcmin = cc;
    uvtcmin = ucost;
    uvpmin = bestuv;
  }

  if(uvpmin) { /* record the best connection for the unvoiced hypothesis  */
    if(!link_new_cand(pp,uvpmin,(int)linter,uvtcmin,0)) { /* unvoiced cand. */
      printf("Problems with link_new_cand()\n");
      exit(-1);
    }
    cands++;
  }
  if(!pmin)		/* find a bogus best to maintain continuity */
    if((pc = get_best_track(pstart))) {
      link_new_cand(pp,pc,pc->inter,0.0,1); /* equal cost V and UV cands. */
      link_new_cand(pp,pc,pc->inter,0.0,0);
      cands += 2;
    } else
      /* printf("No prev. candidates and no track at T=%f)\n",PTIME(pp)) */;
  tcands += cands;
  return;

}

/*------------------------------------------------------*/
clobber_history()
{
  Peak *p;
  Pcand *pc, *pcn;

  while(neg) {
    pc = neg->cands;
    while(pc) {
      pcn = pc->next;
      free(pc);
      pc = pcn;
    }
    p = neg->prev;
    free(neg);
    neg = p;
  }
  while(pos) {
    pc = pos->cands;
    while(pc) {
      pcn = pc->next;
      free(pc);
      pc = pcn;
    }
    p = pos->prev;
    free(pos);
    pos = p;
  }
}


/*=============================================================================*/

void epochfinder(short *sp)
{
	int i, j, k;
	short *p, *q, *r, s, t, pm2, pm1, *ppm, thresh;
	Pcand *pk1, *pk2;
	double ssq;
	float wsize=(float)0.02, amax, maxrms, f0min=(float)50, f0max=(float)800, val;
	double outf=100.0;
	short *rms, *scrp;
	Vlist *v;
	int off, npoints, msec, outd=3;

	srate = (float)(1.0/spitem.frameduration);
    imin = (int)(srate/f0min);
    imax = (int)(srate/f0max);
    tcands = 0;
    peaks = 0;
    VUCOST = PEAK*UVCOST - AWARD + VOFFCOST;
    JITTER = log(1.0 + JITTER);

	rms = (short *)calloc(spitem.numframes,sizeof(short));
	scrp = (short *)calloc(spitem.numframes,sizeof(short));
	ppul = (short *)calloc(spitem.numframes,sizeof(short));
	npul = (short *)calloc(spitem.numframes,sizeof(short));

	/* Compute a running estimate of the rms using a rectangular window
	 of duration wsize seconds. */
	for (k=(int)(wsize*0.5/spitem.frameduration), i=0, ssq=0.0, q = p = sp; i<k; i++, p++)
		ssq += *p * *p;
	for(k *= 2, r=rms, maxrms=0.0; i < k; i++, p++) {
		*r++ = (short)(amax = (float)sqrt(ssq/i));
		if(amax > maxrms) maxrms = amax;
		ssq += *p * *p;
	}
	for(j = spitem.numframes - i; j-- > 0;q++, p++) {
		ssq -= *q * *q;
		*r++ = (short)(amax = (float)((ssq > 0.0)? sqrt(ssq/i) : 0.0));
		if(amax > maxrms) maxrms = amax;
		ssq += *p * *p;
	}
	for(ssq = (ssq > 0.0)? sqrt(ssq/i) : 0.0, k /= 2; k-- > 0;)
		*r++ = (short)ssq;

#ifdef EBUG
	sfsheader(&rmsitem,SP_TYPE,0,1,2,spitem.frameduration,spitem.offset,1,0,0);
	sprintf(rmsitem.history,"%s(%d.%02d;rms)",PROGNAME,
		spitem.datatype,spitem.subtype);
	putitem(filename,&rmsitem,spitem.numframes,rms);
#endif

	if (!vl) {		/* in case no V/UV label file is present */
		vl = (Vlist*)malloc(sizeof(Vlist));
		vl->start = 0.0;
		vl->end = spitem.numframes*spitem.frameduration;
		vl->next = NULL;
		vtime=vl->end;
	}

	/* vl is a linked list of voiced intervals (non-overlapping).
	For each voiced interval, find the optimum set of pitch peaks. */
	for (v = vl; v && (v->end <= 0.0); ) v = v->next;
    for (neg_p.cost = 0.0, pos_p.cost = 0.0 ; v; v = v->next) {
		if (v->start < spitem.numframes*spitem.frameduration) {
			if (v->start < 0.0) v->start = 0.0;
			off = (int)((v->start - 0.0) / spitem.frameduration);
			if (v->end > spitem.numframes*spitem.frameduration)
			    npoints = (int)(1 + ((spitem.numframes*spitem.frameduration - v->start) / spitem.frameduration));
			else
				npoints = (int)(1 + ((v->end - v->start) / spitem.frameduration));

			if (!(msec = (int)(0.5 + .0002 / spitem.frameduration)))	/* ~number of samples in .2ms */
				msec = 1;
			maxrms /= 4.0;
			/* Find all peaks with absolute value greater than the local rms*clip. */
			for (p= sp + off, pm2 = *p++, ppm = p + msec, pm1 = *p++, j = off + npoints - 2,
				i = off + 1, q=rms + off;	i < j ; i++, ppm++) {
				scrp[i] = 0;
				s = *p++;
				t = *q / 3;
				thresh = (short)(CLIP * *q++);
				if (!t) t = 1;
				if (ppeak && (pm1 > thresh)) {	/* large pos. peak possible? */
					if ((s < pm1) && (pm1 >= pm2)) { /* it's a positive peak.*/
						val = (float)((1.0 - PQUAL) * pm1 + PQUAL * (pm1 - *ppm));
						if (val > 0.0) {
							val = ((float)t)/val;
							scrp[i] = (short)(100.0/val);
							new_peak(&pos,i,val,t);
							do_dp(pos,&pos_p);
						}
					}
				}
				else {		/* maybe it's a large neg. peak... */
					if (npeak && (-pm1 > thresh) && (s > pm1) && (pm1 <= pm2)) {
						val = (float)(-((1.0 - PQUAL) * pm1 + PQUAL * (pm1 - *ppm)));
						if (val > 0.0) {
							val = ((float)t)/val;
							scrp[i] = (short)(-100.0/val);
							new_peak(&neg,i,val,t);
							do_dp(neg, &neg_p);
						}
					}
				}
				pm2 = pm1;
				pm1 = s;
			}

			/* Now backtrack to find optimum voicing track. */
			pk1 = get_best_peak(pos);
			if (pk1) {
				pos_p.cost = pk1->cost;
		    }
			pk2 = get_best_peak(neg);
			if (pk2) {
				neg_p.cost = pk2->cost;
			}
			while (pk1 && pk1->this) {
				if (pk1->type) ppul[pk1->this->sample] = AMP;
				pk1 = pk1->best;
			}
			while (pk2 && pk2->this) {
				if (pk2->type) npul[pk2->this->sample] = -AMP;
				pk2 = pk2->best;
		    }
			clobber_history();
		}
		dtime += (v->end - v->start);
		if (ttytest()) {
			fprintf(stderr,"%.1f%% done.\r",100.0*dtime/vtime);
			fflush(stderr);
		}
	}			/* done with all "voiced" segments */
	if (ttytest()) {
		fprintf(stderr,"                            \r");
		fflush(stderr);
	}

#ifdef EBUG
	sfsheader(&positem,SP_TYPE,0,1,2,spitem.frameduration,spitem.offset,1,0,0);
	sprintf(positem.history,"%s(%d.%02d;positive_peaks)",PROGNAME,
		spitem.datatype,spitem.subtype);
	putitem(filename,&positem,spitem.numframes,ppul);
	sfsheader(&negitem,SP_TYPE,0,1,2,spitem.frameduration,spitem.offset,1,0,0);
	sprintf(negitem.history,"%s(%d.%02d;negative_peaks)",PROGNAME,
		spitem.datatype,spitem.subtype);
	putitem(filename,&negitem,spitem.numframes,npul);
#endif

	/* return signal of pulses */
	if (pos_p.cost < neg_p.cost) {
		if (ppeak && npeak && ttytest()) fprintf(stderr,"Positive peaks chosen (use -p1)\n");
		pulsp = ppul;
		polarity=1;
	}
	else {
		if (ppeak && npeak && ttytest()) fprintf(stderr,"Negative peaks chosen (use -p-1)\n");
		pulsp = npul;
		polarity=-1;
	}
}

/*==========================================================================*/

/* zero crossing rate */
float zeroc(float *sp,int len)
{
	register int	i;
	float	last=sp[0];
	float	this;
	int		count=0;

	for (i=1;i<len;i++) {
		this = sp[i];
		if ((last<0)&&(this>=0)) count++;
		last=this;
	}

	return((float)(count/(len*spitem.frameduration)));
}

/* get a voicing decision */
int isvoiced(float *nsp,int len)
{
	float	*s1,*s2;
	float	mean,sum,sumsq,val;
	int		num;
	float	z,e,v,r;
	float	zp,ep,rp;
	int		i;

	/* get energy in unfiltered signal */
	sum=(float)0.0;
	num=len;
	s1 = nsp;
	for (i=0;i<num;i++,s1++) sum += (float)*s1;
	mean = sum/len;
	s1 = nsp;
	sumsq=(float)0.0;
	for (i=0;i<num;i++,s1++) {
		val = (float)(*s1-mean);
		sumsq += val * val;
	}
	e = sumsq/len;

	/* get first reflection coefficient for normal signal */
	sumsq=(float)0.0;
	num = len-1;
	s1 = nsp;
	s2 = nsp+1;
	for (i=0;i<num;i++,s1++,s2++) {
		sumsq += (float)(*s1-mean) * (float)(*s2-mean);
	}
	r = (sumsq/len)/e;
	rp = (float)(1.0/(1+exp(-(r-0.6)/0.2)));

	/* get zero crossing rate */
	z=zeroc(nsp,len);
	zp = (float)(1.0/(1+exp((z-1000)/200)));

	/* get energy */
	ep = (float)(1.0/(1+exp(-((10.0*log10(e))-maxenergy+30)/5)));

	/* combine scores */
	v = zp * ep * rp;

#ifdef EBUG
printf("%.3f\t%.3f\t%.3f\t%.3f\n",zp,ep,rp,v);
#endif

	return(v > 0.5);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int32		it;		/* item selection */
	char		*ty;		/* item sub type */
	/* file variables */
	int32		ipitem=SP_TYPE;
	char		*iptype="0";
	char		*fxtype="0";
	char		*trtype="0";
	int			ofid;
	int		srate;
	int		i,j,k;
	FILTER	*hpfilt;
	float	s1,s2;
	int		wisize,stsize;
	int		ncoeff,nframe;
	LTIState	ltis;
	float	*wsp,*win,*xp;
	double	pred,omega,power;
	int		dofx=0,dotr=0,dovall=0;
	Vlist	*v;
	short	lastfx;
	double	stime,etime;
	int		lasttx,txval;
	float	lasttr;
	int		lastvoice,thisvoice;
	float	val,sum,sumsq;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:lftvp:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Pitch epoch location V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if ((it == SP_TYPE) || (it == LX_TYPE)) {
					ipitem = it;
					iptype = ty;
				}
				else if (it==FX_TYPE) {
					fxtype=ty;
					dofx=1;
					dotr=0;
				}
				else if (it==TR_TYPE) {
					trtype=ty;
					dofx=0;
					dotr=1;
				}
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'l' :	/* input LX data as signal */
			ipitem=LX_TYPE;
			break;
		case 'f' :	/* input FX data for voicing */
			dofx=1;
			dotr=0;
			break;
		case 't' :	/* input TR data for voicing */
			dofx=0;
			dotr=1;
			break;
		case 'v' :	/* reat all voiced */
			dovall++;
			break;
		case 'p' :	/* search peak tpes */
			i=atoi(optarg);
			if (i<0) {
				ppeak=0;
				npeak=1;
			}
			else if (i>1) {
				ppeak=1;
				npeak=0;
			}
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-l) (-f|-t|-v) (-p pol) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* get speech signal */
	getitem(filename,ipitem,iptype,&spitem,&sp);
	srate = (int)(0.5+1.0/spitem.frameduration);

	/* analysis parameters */
	wisize = (int)(0.5 + 0.03/spitem.frameduration);
	stsize = (int)(0.5 + 0.01/spitem.frameduration);
	ncoeff = (int)(2 + 0.001/spitem.frameduration);
	nframe = 1+(spitem.numframes-wisize)/stsize;

	/* get float buffers */
	fsp = (float *)calloc(spitem.numframes,sizeof(float));
	rsp = (float *)calloc(spitem.numframes,sizeof(float));

	if (ttytest()) fprintf(stderr,"Filtering ...\n");

	/* create a high-pass filter */
	hpfilt = filter_design(FILTER_HIGH_PASS,2,50,srate/2,srate);

	/* forward pass */
	for (i=0;i<spitem.numframes;i++)
		fsp[i] = filter_sample(hpfilt,(float)sp[i]);

	/* backward pass */
	filter_clear(hpfilt);
	for (i=spitem.numframes-1;i>=0;i--)
		fsp[i] = filter_sample(hpfilt,fsp[i]);

#ifdef EBUG
	sfsheader(&hpitem,ipitem,0,1,2,spitem.frameduration,spitem.offset,1,0,0);
	sprintf(hpitem.history,"%s(%d.%02d;highpass)",PROGNAME,
		spitem.datatype,spitem.subtype);
	for (i=0;i<spitem.numframes;i++)
		sp[i] = (short)(fsp[i]);
	putitem(filename,&hpitem,spitem.numframes,sp);
#endif

	/* get voiced regions */
	if (dofx) {
		getitem(filename,FX_TYPE,fxtype,&fxitem,&fx);
		lastfx=0;
		stime=0;
		for (i=0;i<fxitem.numframes;i++) {
			if ((lastfx==0)&&(fx[i]>0)) {
				stime=fxitem.offset+i*fxitem.frameduration;
			}
			else if ((lastfx>0)&&(fx[i]==0)) {
				etime=fxitem.offset+(i+1)*fxitem.frameduration;
				v = (Vlist *)calloc(1,sizeof(Vlist));
				v->start = stime;
				v->end = etime;
				v->next = vl;
				vl = v;
				vtime += (etime-stime);
			}
			lastfx = fx[i];
		}
		if (fx[fxitem.numframes-1]>0) {
			etime=fxitem.offset+fxitem.numframes*fxitem.frameduration;
			v = (Vlist *)calloc(1,sizeof(Vlist));
			v->start = stime;
			v->end = etime;
			v->next = vl;
			vl = v;
			vtime += (etime-stime);
		}
	}
	else if (dotr) {
		getitem(filename,TR_TYPE,trtype,&tritem,&tr);
		lasttr=0;
		stime=0;
		for (i=0;i<tritem.numframes;i++) {
			if ((lasttr < 0.5)&&(tr[i]>=0.5)) {
				stime=tritem.offset+i*tritem.frameduration;
			}
			else if ((lasttr>=0.5)&&(tr[i]<0.5)) {
				etime=tritem.offset+(i+1)*tritem.frameduration;
				v = (Vlist *)calloc(1,sizeof(Vlist));
				v->start = stime;
				v->end = etime;
				v->next = vl;
				vl = v;
				vtime += (etime-stime);
			}
			lasttr = tr[i];
		}
		if (tr[tritem.numframes-1]>=0.5) {
			etime=tritem.offset+tritem.numframes*tritem.frameduration;
			v = (Vlist *)calloc(1,sizeof(Vlist));
			v->start = stime;
			v->end = etime;
			v->next = vl;
			vl = v;
			vtime += (etime-stime);
		}
	}
	else if (!dovall) {
		/* do our own voice decision */
		if (ttytest()) fprintf(stderr,"Voicing detection ...\n");

		/* get max energy */
		maxenergy=0;
		for (i=0;(i+wisize) < spitem.numframes;i+=wisize) {

			/* calculate mean */
			for (j=0,sum=0;j<wisize;j++) sum += (float)(fsp[i+j]);
			sum /= wisize;

			/* remove mean and calculate energy */
			for (j=0,sumsq=0;j<wisize;j++) {
				val = (float)fsp[i+j] - sum;
				sumsq += val * val;
			}
			if (sumsq > maxenergy) maxenergy=sumsq;

		}
		maxenergy = 10.0 * log10(maxenergy/wisize);

		/* find voiced regions */
		lastvoice=0;
		for (i=0;(i+wisize)<=spitem.numframes;i+=stsize) {
			thisvoice = isvoiced(fsp+i,wisize);
			if (!lastvoice && thisvoice) {
				stime=spitem.offset+i*spitem.frameduration;
			}
			else if (lastvoice && !thisvoice) {
				etime=spitem.offset+(i+wisize)*spitem.frameduration;
				v = (Vlist *)calloc(1,sizeof(Vlist));
				v->start = stime;
				v->end = etime;
				v->next = vl;
				vl = v;
				vtime += (etime-stime);
#ifdef EBUG
printf("V+: %.3f %.3f\n",stime,etime);
#endif
			}
			lastvoice=thisvoice;
		}
		if (lastvoice) {
			etime=spitem.offset+spitem.numframes*spitem.frameduration;
			v = (Vlist *)calloc(1,sizeof(Vlist));
			v->start = stime;
			v->end = etime;
			v->next = vl;
			vl = v;
			vtime += (etime-stime);
		}
	}

	/* pre-emphasis */
	if (ipitem==SP_TYPE) {
		s1=0;
		for (i=0;i<spitem.numframes;i++) {
			s2=(float)(fsp[i] - 0.97*s1);
			s1=fsp[i];
			fsp[i]=s2;
		}

#ifdef EBUG
		sfsheader(&ppitem,ipitem,0,1,2,spitem.frameduration,spitem.offset,1,0,0);
		sprintf(ppitem.history,"%s(%d.%02d;pre-emphasis)",PROGNAME,
			spitem.datatype,spitem.subtype);
		for (i=0;i<spitem.numframes;i++)
			sp[i] = (short)(fsp[i]);
		putitem(filename,&ppitem,spitem.numframes,sp);
#endif
	}

	/* get window */
	wsp = (float *)calloc(wisize,sizeof(float));
	win = (float *)calloc(wisize,sizeof(float));
	omega = 2.0*M_PI/(wisize-1);
	for (i=0;i<wisize;i++)
		win[i] = (float)(0.54 - 0.46*cos(i*omega));

	/* do LPC analysis to get residual */
	if (ttytest()) fprintf(stderr,"LPC Analysis ...\n");
	for (i=ncoeff;(i+wisize)<=spitem.numframes;i+=stsize) {

		/* window */
		for (j=0;j<wisize;j++)
			wsp[j] = fsp[i+j] /* * win[j] */;

		/* get AR coeffs */
		LPCAutocorrelation(wsp,wisize,ncoeff,&ltis,&power);

		/* calculate (integrated) residual */
		xp = fsp+i;
		for (j=0;j<stsize;j++) {
			pred = 0.0;
			for (k=1;k<=ncoeff;k++) pred -= ltis.b[k] * xp[j-k];
			rsp[i+j] = (float)(xp[j] - pred + 0.95*rsp[i+j-1]);
		}
	}

#ifdef EBUG
	sfsheader(&rpitem,ipitem,0,1,2,spitem.frameduration,spitem.offset,1,0,0);
	sprintf(rpitem.history,"%s(%d.%02d;residual)",PROGNAME,
		spitem.datatype,spitem.subtype);
	for (i=0;i<spitem.numframes;i++)
		sp[i] = (short)(rsp[i]);
	putitem(filename,&rpitem,spitem.numframes,sp);
#endif

	/* find pitch epochs */
	if (ttytest()) fprintf(stderr,"Find Epochs ...\n");
	for (i=0;i<spitem.numframes;i++)
		sp[i] = (short)(rsp[i]);
	epochfinder(sp);

	/* create output Tx item */
	sfsheader(&txitem,TX_TYPE,0,4,1,spitem.frameduration,spitem.offset,0,0,1);
	if (dofx)
		sprintf(txitem.history,"%s(%d.%02d;%d.%02d;polarity=%d%s)",
			PROGNAME,spitem.datatype,spitem.subtype,
			fxitem.datatype,fxitem.subtype,polarity,(dovall)?",allvoiced":"");
	else if (dotr)
		sprintf(txitem.history,"%s(%d.%02d;%d.%02d;polarity=%d%s)",
			PROGNAME,spitem.datatype,spitem.subtype,
			tritem.datatype,tritem.subtype,polarity,(dovall)?",allvoiced":"");
	else
		sprintf(txitem.history,"%s(%d.%02d;polarity=%d%s)",
			PROGNAME,spitem.datatype,spitem.subtype,polarity,(dovall)?",allvoiced":"");

	if ((ofid=sfschannel(filename,&txitem))<0)
		error("could not open output channel to '%s'",filename);

	/* save Tx */
	lasttx=0;
	for (i=1;i<spitem.numframes;i++) {
		if (pulsp[i]) {
			txval = i - lasttx;
			sfswrite(ofid,1,&txval);
			lasttx = i;
		}
	}
	if (lasttx < spitem.numframes-1) {
		txval = spitem.numframes - lasttx;
		sfswrite(ofid,1,&txval);
	}

	/* update */
	if (!sfsupdate(filename))
		error("update error on '%s'",filename);

	/* that's all folks */
	exit(0);
}

