/* HQsynth -- High Quality formant synthesis based on Melvyn Hunt's procedure */

/* M.A.Huckvale - January 1987 */

#define PROGNAME "HQsynth"
#define PROGVERS "1.2s"
char	*progname=PROGNAME;

/* version 1.1 - february 1987
	- gain matching included in this module instead of HQanal
*/
/* version 1.2 - february 1987
	- user-supplied voiced excitation prototype
*/
/* version 1.2s - November 1987
	- SFS version
*/

/*--------------------------------------------------------------------------*/
/**MAN
.TH HQSYNTH SFS1 UCL
.SH NAME
HQsynth - High quality formant synthesis
.SH SYNOPSIS
.B HQsynth
(-i item) (-c) (-r|-e) file
.SH DESCRIPTION
.I HQsynth
performs excitation-synchronous formant synthesis from a set of formant
estimates using an LPC synthesis model using the procedure of Melvyn Hunt.
Input should be an FM item produced by
.I HQanal
(originally), and a residual waveform ("-r" option) if required.
A prototype voiced excitation waveform may be suppied ("-e" option),
preferably generated by
.I HQrsav.
Output is
a speech waveform.  De-emphasis is applied to the voiced regions only.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.B -c
Use LPC coefficients as input
.I instead of formant estimates.
.TP 11
.B -r
Use residual waveform for excitation, rather than a synthetic excitation
waveform.
.TP 11
.B -e
Use a prototype excitation waveform included in file (LX item of 200 samples)
instead of default pulse.
.SH INPUT ITEMS
.IP 12.xx 11
HQanal analysed formant estimates.
.IP 2.xx 11
Optional residual waveform (matched to output waveform).
.IP 2.xx 11
Optional prototype excitation waveform (200 samples @ 10kHz).
.IP 14.xx 11
Optional LPC coefficients.
.SH OUTPUT ITEMS
.IP 1 11
High Quality formant resynthesized speech.
.SH VERSION/AUTHOR
1.2 - Mark Huckvale (from
.I fmconv
and
.I syn
by Lynn Whitaker)
.SH SEE ALSO
"Generation of controlled speech stimuli by pitch-synchronous LPC analysis of
natural utterances", M.J. Hunt & C.E. Harvenberg, 12th Intl. Congress on Acoustics,
Toronto Canada, 1986.
.SH BUGS
.I HQsynth
is not compatible with formant items produced by
.I rootlpc
or from ILS.  Analysis must be performed by
.I HQanal.
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */
#include "HQ.h"

/* global data */
struct item_header	spitem;	/* speech item header */
short			*sp;	/* speech buffer */
struct item_header	rsitem;	/* residual lx item header */
short			*rsp;	/* residual waveform buffer */
short			*esp;	/* excitation prototype buffer */
struct item_header	pcitem;	/* LPC item header */
struct item_header	fmitem;	/* output fm item header */

/* lpc excitation */
float	lpcfilter[20] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
float	lpcexcite[MAXWINDOW] = {1};
/*
			1.01,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
			0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
*/

/* LPC synthesis */
void lpcsyn(struct co_rec *lpcrec,int ncoeff,short *rsp,short *sp,int mode)
/* LPC coefficients */
/* # LPC coeffs */
/* residual waveform (or NULL) */
/* output speech waveform */
/* filter mode:
		3 = de-emp, flush filter
		2 = de-emp, keep filter
		1 = no de-emp, flush filter
		0 = no de-emp, keep filter */
{
	double		u[MAXWINDOW];	/* excitation waveform */
	double		pred;		/* predictor output */
	double		gain;		/* frame gain */
	double		ran;		/* random number */
	double		sqrt();
	short		*s;		/* speech buffer pointer */
	int		i,j;

	/* load excitation waveform */
	if (rsp != NULL) {		/* use residual */
		for (i=0;i<lpcrec->size;i++) u[i] = rsp[lpcrec->posn+i];
	}
	else if (lpcrec->flag) {	/* voiced section */
		for (i=0;i<lpcrec->size;i++)
			u[i] = lpcrec->gain * lpcexcite[i];
	}
	else {				/* unvoiced section */
		gain = lpcrec->gain / sqrt((double)lpcrec->size);
		for (i=0;i<lpcrec->size;i++) {
			/* gaussian amplitude noise */
			ran = 0.0;
			for (j=0;j<12;j++)
				ran += (float)(rand()%65536)/32768.0; /* 0.0..2.0 */
			u[i] = gain * (ran/12.0 - 1.0);	/* -gain .. +gain */
		}
	}

	/* initialise old output samples for filter */
	if (mode & FLUSHFILTER)
		for (i=0;i<ncoeff;i++)
			lpcfilter[i] = 0.0;

	/* filter excitation using LPC coeffs */
	for (i=0;i<lpcrec->size;i++) {
		pred = 0.0;
		for (j=0;j<ncoeff;j++) pred -= lpcrec->data[j] * lpcfilter[j];
		pred += u[i];
		for (j=ncoeff-1;j>0;j--) lpcfilter[j] = lpcfilter[j-1];
		lpcfilter[0] = (float)pred;
		sp[lpcrec->posn+i] = (short)pred;
	}

	/* de-emphasize voiced regions */
	if ((mode & DEEMP) && lpcrec->flag) {
		if (lpcrec->posn==0) {
			s = &sp[lpcrec->posn+1];
			i = 1;
		}
		else {
			s = &sp[lpcrec->posn];
			i = 0;
		}
		for (;i<lpcrec->size;i++,s++) *s = (short)(*s + PREEMP * *(s-1));
	}

}

/* total sum-squared signal */
double sumsq(short *sig,int start,int count)
/* pointer to signal */
/* start position */
/* # samples to sum */
{
	double	sum=0;
	short	*s;
	int	i;

	s = &sig[start];
	for (i=0;i<count;i++,s++) sum += ((float)*s) * *s;

	return(sum);
}


/* calculate frame energy for formant record, given speech */
float calcenergy(struct co_rec *lpcrec,int ncoeff,int lastvoice)
/* LPC coeffs */
/* LPC order */
/* voicing decision on previous frame */
{
	short		sig1[MAXWINDOW];
					/* derived signal 1 */
	short		sig2[MAXWINDOW];
					/* derived signal 2 */
	float		oldfilter[20];	/* storage for LPC filter */
	double		ens1;		/* energy in signal 1 */
	double		ens2;		/* energy in signal 2 */
	double		ensp;		/* energy in speech */
	double		cross;		/* cross product sig1.sig2 */
	double		a,b,c;
	int		i,start,fullen,len;

	/* get parameters of calculation */
	start = lpcrec->posn;
	fullen= lpcrec->size;
	len   = (int)(0.90 * fullen);			/* analyse over 90% of window */
	ensp  = exp(LOG10E*lpcrec->gain/10.0);		/* stored there by HQanal */

	/* keep a record of lpc filter */
	for (i=0;i<ncoeff;i++) oldfilter[i]=lpcfilter[i];

	/* generate signal with pre-loading but no excitation */
	lpcrec->posn = 0;
	lpcrec->size = len;
	lpcrec->gain = 0;
	lpcsyn(lpcrec,ncoeff,NULL,sig2,
			(lastvoice==lpcrec->flag) ? DEEMP : DEEMP | FLUSHFILTER);

	/* generate signal with excitation, but no pre-loading */
	lpcrec->gain = 100.0;
	srand(start);	/* initialise random number generator */
	lpcsyn(lpcrec,ncoeff,NULL,sig1,FLUSHFILTER | DEEMP);
	srand(start);	/* reset for next time */

	/* get energy in window from signals */
	ens1 = sumsq(sig1,0,len);
	ens2 = sumsq(sig2,0,len);
	cross = 0.0;
	for (i=0;i<len;i++) cross += ((float)sig1[i])*sig2[i];

	/* calculate new frame gain */
	a = ens1;
	b = 2.0 * cross;
	c = (ens2 - ensp);

	if ((4*a*c < b*b) && (a != 0.0)) {	/* quadratic has solution */
		/* find root of quadratic */
		lpcrec->gain = (float)(100.0 * (-b + sqrt(b*b - 4*a*c)) / (2.0*a));

		/* restore LPC filter */
		for (i=0;i<ncoeff;i++) lpcfilter[i] = oldfilter[i];
	}
	else {					/* failed to calculate gain */
		/* recalculate on basis of zero memory */
		cross=0;
		for (i=0;i<len;i++) cross += sig1[i];
		a = ens1;
		b = 2.0 * cross;
		c = -ensp;
		lpcrec->gain = (float)(100.0 * (-b + sqrt(b*b - 4*a*c)) / (2.0*a));

		/* clear LPC filter */
		for (i=0;i<ncoeff;i++) lpcfilter[i] = 0.0;
		fprintf(stderr,"posn=%d, filter flushed\n",start);
	}
	if (lpcrec->gain < 1.0) lpcrec->gain = 1.0;

/*
	fprintf(stderr,"posn=%d, flag=%d, ensp=%g, ens1=%g, ens2=%g, gain=%g\n",start,lpcrec->flag,ensp,ens1,ens2,lpcrec->gain);
*/

	/* restore LPC record parameters */
	lpcrec->posn = start;
	lpcrec->size = fullen;

	/* and return gain */
	return(lpcrec->gain);
}

void fmtopc(struct fm_rec *formrec,struct co_rec *lpcrec,int ncoeff)
{
	COMPLEX		c[NFORMANT];	/* complex pole positions */
	double		aa[40],a[40];	/* autoregressive working data */
	double		twopi,atan(),exp(),cos();
	double 		x,w,zr;
	int		i,ii,j;

	twopi = 8.0 * atan(1.0);

	/* calculate complex pole positions */
	j=0;
	for (i=0; i<formrec->npeaks; i++){
		while ((j<ncoeff) && (formrec->formant[j].freq==0)) j++;
		x = exp(-twopi * formrec->formant[j].band / SAMPFREQ);
		w = formrec->formant[j].freq * twopi / SAMPFREQ;
		zr = x * cos(w);
		c[i].re = -2.0*zr;
		c[i].im = x*x;
		j++;
	}

	/* calculate AR coefficients from complex poles */
	/* - code from /usr/dbase/rel2/gec/fmconv.src/subproc.c */
	a[0] = 1.0;
	a[1] = c[0].re;
	a[2] = c[0].im;
	ii = 2;
	for (i=1; i<formrec->npeaks; i++) {
		aa[0] = 1.0;
		aa[1] = a[1] + c[i].re;
		for (j=2; j<=ii; j++)
			aa[j] = a[j] + a[j-1] * c[i].re + a[j-2] * c[i].im;
		aa[ii+1] = a[ii] * c[i].re + a[ii-1] * c[i].im;
		aa[ii+2] = a[ii] * c[i].im;
		ii += 2;
		for (j=1; j<=ii; j++) a[j] = aa[j];
	}
	for (ii++; ii<=ncoeff; ii++) a[ii]=0.0;	/* clear unused coeffs */

	/* create LPC record */
	lpcrec->posn = formrec->posn;
	lpcrec->size = formrec->size;
	lpcrec->flag = formrec->flag;
	lpcrec->mix  = (float)formrec->flag;
	lpcrec->gain = formrec->gain;
	for (i=0;i<ncoeff;i++) lpcrec->data[i] = (float)a[i+1];

}


/* main program */
main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item selections */
	char		*ty;
	char		*rstype = "0";	/* default sub-type = last */
	char		*fmtype = "0";	/* default sub-type = last */
	char		*pctype = "0";	/* default sub-type = last */
	int		pcin = 0;	/* lpc coeff input required */
	int		rsin = 0;	/* input residual waveform */
	int		exin = 0;	/* excitation prototype input */
	/* file variables */
	char		filename[SFSMAXFILENAME];	/* dbase file name */
	int		fid;
	/* data variables */
	struct co_rec	*lpcrec;		/* standard coefficient record */
	struct fm_rec	*formrec;	/* standard formant record */
	int		i;
	int		nframes;	/* length of output speech waveform */
	int		wmsize;		/* number of analysis windows */
	int		inptype;	/* input item type */
	int		inpsubtype;	/* input item subtype */
	int		ncoeff;		/* # lpc coefficients */
	int		npole;		/* # lpc coefficients in frame */
	int		lastvoice;	/* frame voicing flag */
	double		exsumsq;	/* sum energy in prototype */
	double		exscale;	/* excitation scaling */
	double		sqrt();

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:cre")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: High Quality formant synthesis V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == LX_TYPE) {
					rstype = ty;
					rsin++;
				}
				else if (it == FM_TYPE)
					fmtype = ty;
				else if (it == PC_TYPE) {
					pctype = ty;
					pcin++;
				}
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'c' :	/* LPC coefficients required */
			pcin++;
			break;
		case 'r' :	/* residual waveform required */
			rsin++;
			break;
		case 'e' :	/* prototype excitation waveform required */
			exin++;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-c) (-r|-e) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0)
		error("access error on %s",filename);

	/* locate input item and get length */
	if (pcin) {
		/* locate data set */
		if (!sfsitem(fid,PC_TYPE,pctype,&pcitem))
			error("cannot find input PC item in %s",filename);
		if (100*(int)(0.01/pcitem.frameduration) != SAMPFREQ)
			error("coefficients not sampled at 10kHz",NULL);

		/* get lpc buffer area */
		if ((lpcrec=(struct co_rec *)sfsbuffer(&pcitem,1))==NULL)
			error("could not get memory buffer",NULL);

		/* get last record */
		if (sfsread(fid,pcitem.numframes-1,1,lpcrec) != 1)
			error("read error on input data",NULL);
		nframes = lpcrec->posn+lpcrec->size;
		inptype = PC_TYPE;
		inpsubtype = pcitem.subtype;
		wmsize = pcitem.numframes;
		ncoeff = pcitem.framesize - 5;
	}
	else {
		/* locate data set */
		if (!sfsitem(fid,FM_TYPE,fmtype,&fmitem))
			error("could not find input FM item in %s",filename);
		if (100*(int)(0.01/fmitem.frameduration) != SAMPFREQ)
			error("formants not sampled at 10kHz",NULL);

		/* get fm buffer area */
		if ((formrec=(struct fm_rec *)sfsbuffer(&fmitem,1))==NULL)
			error("could not get memory buffer",NULL);

		/* get last record */
		if (sfsread(fid,fmitem.numframes-1,1,formrec) != 1)
			error("read error on input item",NULL);
		nframes = formrec->posn+formrec->size;
		inptype = FM_TYPE;
		inpsubtype = fmitem.subtype;
		wmsize = fmitem.numframes;
		ncoeff = 10;

		/* get lpc buffer area */
		pcitem.datatype=PC_TYPE;
		pcitem.floating= -1;
		pcitem.datasize=4;
		pcitem.framesize=ncoeff+sfsstruct[PC_TYPE]/4;
		if ((lpcrec=(struct co_rec *)sfsbuffer(&pcitem,1))==NULL)
			error("could not get memory buffer",NULL);

	}

	/* load residual waveform if required  */
	if (rsin || exin) {
		getitem(filename,LX_TYPE,rstype,&rsitem,&rsp);
		if (100*(int)(0.01/rsitem.frameduration) != SAMPFREQ)
			error("residual not sampled at 10kHz",NULL);
		if (rsitem.numframes == 200) {
			rsin=0;
			exin=1;
			esp = rsp;
			rsp = NULL;
		}
		else if (exin)
			error("excitation prototype not 200 samples",NULL);
		else if (rsitem.numframes != nframes)
			error("residual not of correct length",NULL);
	}
	else
		/* inform process to generate own excitation waveform */
		rsp = NULL;

	/* create prototype excitation for LPC resynthesis */
	if (exin) {
		/* find energy and scale factor */
		exsumsq=0.0;
		for (i=0;i<200;i++) exsumsq += (float)esp[i] * esp[i];
		exscale=1.0/sqrt(exsumsq);
		/* copy and scale */
		for (i=0;i<200;i++)
			lpcexcite[i] = (float)(exscale * (float)esp[i]);
	}

	/* set up speech item header */
	sfsheader(&spitem,SP_TYPE,0,2,1,(double)1.0/SAMPFREQ,0.0,1,0,0);
	if (rsin || exin)
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d)",
			PROGNAME,inptype,inpsubtype,
			rsitem.datatype,rsitem.subtype);
	else
		sprintf(spitem.history,"%s(%d.%02d)",
			PROGNAME,inptype,inpsubtype);

	/* allocate buffer for output speech */
	sp = (short *) sfsbuffer(&spitem,nframes);
	if (sp == NULL)
		error("cannot allocate space for speech buffer",NULL);
	for (i=0;i<nframes;i++) sp[i] = 0;

	/* produce speech frame by frame */
	lastvoice=0;
	for (i=0;i<wmsize;i++) {
		/* get LPC coefficients */
		if (!pcin) {
			/* get formant data */
			if (sfsread(fid,i,1,formrec) != 1)
				error("read error on %s",filename);

			/* convert formant data to LPC coefficients */
			npole = formrec->npeaks * 2;
			fmtopc(formrec,lpcrec,npole);

			/* re-estimate gain for artificial excitation */
			if (!rsin) lpcrec->gain = calcenergy(lpcrec,npole,lastvoice);
		}
		else {
			/* read LPC data from file */
			if (sfsread(fid,i,1,lpcrec) != 1)
				error("read error on %s",filename);
			npole=ncoeff;
			while ((npole>0) && (lpcrec->data[npole-1]==0.0)) npole--;
		}

		/* perform LPC resynthesis in this window */
		lpcsyn(lpcrec,npole,rsp,sp,
			(lastvoice==lpcrec->flag) ? DEEMP : DEEMP | FLUSHFILTER);

		/* print progress */
		if (ttytest()) {
			if ((i+1==wmsize) || ((i % 5)==4)) printf("\rFrame %d/%d",i+1,wmsize);
			fflush(stdout);
		}

		/* keep record of voicing decision */
		lastvoice = lpcrec->flag;
	}

	/* close input file and update */
	sfsclose(fid);
	putitem(filename,&spitem,nframes,sp);

	/* that's all folks ... */
	printf("\r                     \r");fflush(stdout);
	exit(0);
}

