/* vdegree -- estimate a 0-1 value for voicing in a signal */

/* M.A. Huckvale - University College London */

/* version 1.0 - July 2003 */

/* version 1.1 - December 2003
	- add options to view individual parameters
	- add calculation of first reflection coefficient
*/

#define PROGNAME "vdegree"
#define PROGVERS "1.1"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH VDEGREE SFS1 UCL
.SH NAME
vdegree - estimate degree of voicing in a speech signal
.SH SYNOPSIS
.B vdegree
(-i item) (-a) (-t threshold) (-N) (-A|-E|-R|-Z) file
.SH DESCRIPTION
.I vdegree
uses a number of short-time features to estimate the degree of voicing
in a speech signal.  Measures include: energy distribution, zero-crossing
rate and autocorrelation.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and exit.
.TP 11
.BI -i item
Select input item number and type.
.B -a
Output annotations calculated from thresholded track, rather
than the track itself.
.BI -t threshold
Supply threshold for annotation output.  Default 0.5.
.TP 11
.B -A
Output track is size of peak in autocorrelation function.
.B -E
Output track is energy.
.B -R
Output track is first reflection coefficient.
.B -Z
Output track is zero-crossing rate.
.B -N
Output normalised track values rather than raw scores
.SH INPUT ITEMS
.IP 1.xx 11
Speech waveform.
.IP 2.xx 11
Lx waveform.
.SH OUTPUT ITEMS
.IP 16 11
Voicing degree.
.SH VERSION/AUTHOR
1.1 - Mark Huckvale.
.SH SEE ALSO
noisanal(SFS1)
*/
/*--------------------------------------------------------------------------*/

/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"			/* header structures */
#include "filter.h"		/* filtering */

#define DECISION	0
#define AUTOC		1
#define	ENERGY		2
#define	REFLECT		3
#define	ZEROC		4

/* FX estimation control */
#define	STEP	0.005	/* 5ms step */
#define WINDOW	0.030	/* 30ms window */
#define LOWFX	50	/* 50 Hz */
#define HIGHFX	400	/* 400 Hz */
#define LOFILT_DEF	20.0	/* high pass frequency */
double LOFILT=LOFILT_DEF;
#define HIFILT_DEF	1200.0	/* low-pass frequency */
double HIFILT=HIFILT_DEF;

/* global data */
struct item_header 	spitem;		/* item header data */
double			wtime=WINDOW;	/* window size */
short			*sp;		/* speech data buffer (1 x 25ms window) */
float			*fsp;		/* filtered speech buffer */
float			*nsp;		/* normal speech buffer */
struct item_header 	opitem;		/* item header data */
struct	an_rec	*an;
int			sampfreq;	/* current sampling frequency */
float			*acoeff;	/* autocorrelation function */
float			smoothmem[5]={0.0,0.0,0.0,0.0,0.0};
int				smoothinit=0;
float			maxenergy;
int				output=DECISION;
int				donorm=0;
int				doannot=0;
double			threshold=0.5;

/* autocorrelation */
float autoc(float *sp,int len,float *acoeff,int l1,int l2)
{
	register int	i,j;
	int		num;
	float		sum,sumsq1,sumsq2,norm;
	float		*s1,*s2;
	float		mean=0;

	/* zero autocorrelation vector */
	for (i=0;i<=l2;i++) acoeff[i]=(float)0.0;

	/* for zero delay */
	sum=(float)0.0;
	num=len;
	s1 = sp;
	for (j=0;j<num;j++,s1++) sum += *s1 * *s1;
	acoeff[0] = sum/len;

	/* for unit delay */
	sum=(float)0.0;
	num = len-1;
	s1 = sp;
	s2 = sp+1;
	for (j=0;j<num;j++,s1++,s2++) sum += *s1 * *s2;
	acoeff[1] = sum/len;

	/* for each delay in expected freq. range */
	for (i=l1;i<=l2;i++) {
		sumsq1=sumsq2=sum=(float)0.0;
		num = 3*i;	/* up to three possible cycles */
		if (num > (len-i)) num = len-i;
		s1 = sp;
		s2 = sp + i;
		for (j=0;j<num;j++) {
			sumsq1 += *s1 * *s1;
			sumsq2 += *s2 * *s2;
			sum += *s1++ * *s2++;
		}
		norm = (float)(sqrt(sumsq1)*sqrt(sumsq2)/num);
		acoeff[i] = (sum/num)/norm;
		mean += acoeff[i];
	}
	return(mean/(l2-l1+1));
}

/* zero crossing rate */
float zeroc(float *sp,int len)
{
	register int	i;
	float	last=sp[0];
	float	this;
	int		count=0;

	for (i=1;i<len;i++) {
		this = sp[i];
		if ((last<0)&&(this>=0)) count++;
		last=this;
	}

	return((float)(count/wtime));
}

/* energy */
float energy(float *sp,int len)
{
	register int	i;
	float			s,sumsq=1;

	for (i=0;i<len;i++) {
		s = sp[i];
		sumsq += (float)(s*s);
	}
	return((float)(10.0*log10(sumsq/len)));
}

/* median smoother */
float smooth(float val)
{
	int	i,idx;
	float	max;
	float	temp[5];

	for (i=0;i<4;i++) smoothmem[i] = smoothmem[i+1];
	smoothmem[4]=val;

	for (i=0;i<5;i++) temp[i]=smoothmem[i];

	max = temp[0];
	idx = 0;
	for (i=1;i<5;i++) if (temp[i]>max) { max=temp[i]; idx=i; }
	temp[idx]=0;

	max = temp[0];
	idx = 0;
	for (i=1;i<5;i++) if (temp[i]>max) { max=temp[i]; idx=i; }
	temp[idx]=0;

	max = temp[0];
	idx = 0;
	for (i=1;i<5;i++) if (temp[i]>max) { max=temp[i]; idx=i; }

	return(temp[idx]);
}

/* process a window */
float process(float *nsp,float *fsp,int len)
{
	float	a,z,e,v,r;
	float	ap,zp,ep,rp;
	float	min;
	int		tlo=(int)(sampfreq/HIGHFX);
	int		thi=(int)(sampfreq/LOWFX);
	int		i;

	/* get first reflection coefficient for normal signal */
	autoc(nsp,len,acoeff,tlo,tlo);
	r = acoeff[1]/acoeff[0];
	rp = (float)(1.0/(1+exp(-(r-0.6)/0.2)));

	/* get autocorrelation peak from filtered signal */
	autoc(fsp,len,acoeff,tlo,thi);
	a=0;
	for (i=tlo;i<=thi;i++) if (acoeff[i] > a) a=acoeff[i];
	ap = (float)(1.0/(1+exp(-(a-0.75)/0.1)));

	/* get zero crossing rate */
	z=zeroc(nsp,len);
	zp = (float)(1.0/(1+exp((z-1000)/200)));

	/* get energy */
	/* e=energy(fsp,len); */
	e = 10.0*log10(acoeff[0]);
	ep = (float)(1.0/(1+exp(-(e-maxenergy+30)/5)));

	/* combine scores */
	min=ap;
	if (zp < min) min=zp;
	if (ep < min) min=ep;
	if (rp < min) min=rp;

	v = ap * zp * ep * rp / min;

	printf("a0=%.3f\ta1=%.3f\tamax=%.3f\tz=%.3f\te=%.3f\n",acoeff[0],acoeff[1],a,z,e);
	printf("ap=%.3f\tzp=%.3f\tep=%.3f\trp=%.3f\tv=%.3f\n",ap,zp,ep,rp,v);

// printf("%.3f\t%.3f\t%.3f\t%.3f\t%.3f\n",ap,zp,ep,rp,v);

	switch (output) {
	case AUTOC: return((donorm)?ap:a);
	case ENERGY: return((donorm)?ep:e);
	case REFLECT: return((donorm)?rp:r);
	case ZEROC:	return((donorm)?zp:z);
	}

	return(v);
}

/* main program */
void main(int argc,char *argv[])
{
	/* local variables */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument */
	int		errflg=0;	/* option error flag */
	int		c;		/* option char */
	int		it;
	char		*ty;
	int		iptype=SP_TYPE;
	char		*sptype = "0";	/* input sp sub-type = last */
	char		filename[SFSMAXFILENAME];
					/* database file name */
	int		fid,ofid;	/* file descriptors */

	/* processing variables */
	int		stsize;		/* # samples in step */
	int		wisize;		/* # samples in window */
	int		i,f=0;		/* counters */
	int		nframes;	/* # output frames */
	int		j;
	float	trval,lastrval=0;
	float	e;
	float	sum;
	FILTER		*bpfilt;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:at:AERZN")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: estimate degree of voicing V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* item spec */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE) {
					iptype=SP_TYPE;
					sptype=ty;
				}
				else if (it == LX_TYPE) {
					iptype=LX_TYPE;
					sptype=ty;
				}
				else
					error("unsuitable item specification %s",optarg);
			}
			else
				error("illegal item specification %s",optarg);
			break;
		case 'a' :	/* output annotations */
			doannot=1;
			break;
		case 't' :	/* threshold */
			threshold=atof(optarg);
			break;
		case 'A' :	/* output autocorrelation peak */
			output=AUTOC;
			break;
		case 'E' :	/* output energy peak */
			output=ENERGY;
			break;
		case 'R' :	/* output refelction coeff */
			output=REFLECT;
			break;
		case 'Z' :	/* output zero crossing rate */
			output=ZEROC;
			break;
		case 'N' :	/* output normalised values */
			donorm=1;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-a) (-t threshold) (-N) (-A|-E|-R|-Z) file\n",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("'%s': file not found",filename);
		else
			error("access error on '%s'",filename);
	}

	/* locate input speech item */
	if (!sfsitem(fid,iptype,sptype,&spitem))
		error("cannot find input item in '%s'",filename);

	/* get window parameters */
	sampfreq = (int)(0.5 + (1.0/spitem.frameduration));
	stsize = (int)(0.5 + (STEP/spitem.frameduration));
	wisize = (int)(0.5 + (wtime/spitem.frameduration));

	/* estimate # output frames for user info */
	nframes = (spitem.numframes/stsize)-(wisize/stsize)+1;

	/* get input buffer */
	if ((sp=(short *)sfsbuffer(&spitem,wisize)) == NULL)
		error("cannot get buffer for speech",NULL);
	if ((fsp=(float *)calloc(wisize,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);
	if ((nsp=(float *)calloc(wisize,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);
	if ((acoeff=(float *)calloc(1+sampfreq/LOWFX,sizeof(float)))==NULL)
		error("cannot get buffer for speech",NULL);

	/* generate output item header */
	if (doannot) {
		sfsheader(&opitem,AN_TYPE,-1,1,-1,spitem.frameduration,spitem.offset,1,0,0);
		sprintf(opitem.history,"%s/AN(%d.%02d;threshold=%g)",PROGNAME,spitem.datatype,spitem.subtype,threshold);

		an = (struct an_rec *)sfsbuffer(&opitem,1);
	}
	else {
		sfsheader(&opitem,TR_TYPE,1,4,1,stsize*spitem.frameduration,spitem.offset,1,0,0);
		switch (output) {
		case DECISION:
			sprintf(opitem.history,"%s/TR(%d.%02d)",PROGNAME,spitem.datatype,spitem.subtype);
			break;
		case AUTOC:
			sprintf(opitem.history,"%s/TR(%d.%02d;output=autoc%s)",PROGNAME,spitem.datatype,spitem.subtype,(donorm)?"-norm":"");
			break;
		case ENERGY:
			sprintf(opitem.history,"%s/TR(%d.%02d;output=energy%s)",PROGNAME,spitem.datatype,spitem.subtype,(donorm)?"-norm":"");
			break;
		case REFLECT:
			sprintf(opitem.history,"%s/TR(%d.%02d;output=reflect%s)",PROGNAME,spitem.datatype,spitem.subtype,(donorm)?"-norm":"");
			break;
		case ZEROC:
			sprintf(opitem.history,"%s/TR(%d.%02d;output=zeroc%s)",PROGNAME,spitem.datatype,spitem.subtype,(donorm)?"-norm":"");
			break;
		}
	}

	/* open output channel */
	if ((ofid=sfschannel(filename,&opitem)) < 0)
		error("cannot open temporary file",NULL);

	/* design band-pass filter */
	if (iptype==LX_TYPE)
		bpfilt = filter_design(FILTER_HIGH_PASS,8,50,0.5/spitem.frameduration,1.0/spitem.frameduration);
	else
		bpfilt = filter_design(FILTER_BAND_PASS,4,LOFILT,HIFILT,1.0/spitem.frameduration);

	/* find max energy window */
	for (i=0;sfsread(fid,i,wisize,sp)==wisize;i+=stsize) {

		/* copy waveform into float buffer */
		for (j=0,sum=0;j<wisize;j++) {
			fsp[j] = (float)(sp[j]);
			sum += fsp[j];
		}
		sum /= wisize;
		for (j=0;j<wisize;j++) fsp[j] -= sum;

		e = energy(fsp,wisize);
		if (e > maxenergy) maxenergy = e;
	}
printf("maxenergy=%g\n",maxenergy);

	/* perform processing */
	for (i=0;sfsread(fid,i,wisize,sp)==wisize;i+=stsize,f++) {

		/* copy waveform into float buffer */
		for (j=0,sum=0;j<wisize;j++) {
			nsp[j] = (float)(sp[j]);
			sum += nsp[j];
		}
		sum /= wisize;
		for (j=0;j<wisize;j++) nsp[j] -= sum;

		/* do filtering */
		if (i==0) {
			/* first time - filter whole window */
			for (j=0,sum=0;j<wisize;j++) {
				fsp[j] = filter_sample(bpfilt,(double)sp[j]);
				sum += fsp[j];
			}
		}
		else {
			/* take copy from previous window */
			for (j=stsize,sum=0;j<wisize;j++) {
				fsp[j-stsize] = fsp[j];
				sum += fsp[j-stsize];
			}
			/* and append filtered version of new part */
			for (j=wisize-stsize;j<wisize;j++) {
				fsp[j] = filter_sample(bpfilt,(double)sp[j]);
				sum += fsp[j];
			}
		}

		/* remove mean */
		sum /= wisize;
		for (j=0;j<wisize;j++) fsp[j] -= sum;

		/* process window */
		if (iptype==LX_TYPE)
			trval = process(fsp,fsp,wisize);
		else
			trval = process(nsp,fsp,wisize);

		/* smooth */
		if (!smoothinit) {
			for (j=0;j<5;j++) smoothmem[j]=trval;
			smoothinit=1;
		}
		trval = smooth(trval);

		/* save estimate */
		if (doannot) {
			if ((lastrval < threshold) && (trval >= threshold)) {
				an->posn = i;
				an->size = 0;
				strcpy(an->label,"V+");
				sfswrite(ofid,1,an);
			}
			else if ((lastrval >= threshold) && (trval < threshold)) {
				an->posn = i;
				an->size = 0;
				strcpy(an->label,"V-");
				sfswrite(ofid,1,an);
			}
		}
		else
			sfswrite(ofid,1,&trval);
		lastrval = trval;

		/* report progress */
		if (((f%100)==99) && ttytest()) {
			printf("\rFrame %d/%d",f+1,nframes);
			fflush(stdout);
		}
	}
	if (ttytest()) printf("\r                       \r");

	/* and update file */
	if (!sfsupdate(filename))
		error("backup error on '%s'",filename);

	/* ... that's all folks */
	exit(0);
}
