/* vocsynth -- general purpose filterbank synthesizer */

/* M.A.Huckvale - University College London */

/* version 1.0 - July 2003
	- from vocsyn 1.3 sources
*/

#define PROGNAME "vocsynth"
#define PROGVERS "1.0"
char	*progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH VOCSYN SFS1 UCL SFS
.SH NAME
vocsynth - general purpose filterbank synthesizer
.SH SYNOPSIS
.B vocsynth
(-i item) (-w|-m|-x spitem) (-s srate) file
.SH DESCRIPTION
.I vocsynth
is a general-purpose means for converting filterbank analyses
back into speech signals.  Energies are taken from a COEFF item
analysed using voc8, voc19, voc26 or filtbank.  Fundamental frequency
is taken from a FX item (none in 'whisper' mode),
while voicing mixtures can be optionally supplied
in a TRACK item.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and version number.
.TP 11
.BI -i item
Select input item.
.TP 11
.B -w
"Whisper" data, when no FX is present.
.TP 11
.BI -x spitem
Use external excitation from given speech item (must match in
sampling rate).
.TP 11
.B -m
Use voice mix track to control degree of periodic excitation.
Designed for use with output of vdegree(SFS1) program.
.TP 11
.BI -s samplerate
Specify synthesis output sampling rate (should be bigger than
twice top of highest filter).
.SH INPUT ITEMS
.IP FX 11
Fundamental frequency contour, used for voice-switch and Fx.
.IP CO 11
Filterbank energies generated by voc8, voc19, voc26 or filtbank.
.IP SP 11
(External excitation mode) Input signal to filter.
.IP TR 11
(Optional) Voicing mixture track (see vdegree).
.SH OUTPUT ITEMS
.IP SP 11
Speech waveform.
.SH VERSION/AUTHOR
1.0 - Mark Huckvale
.SH SEE ALSO
voc8(SFS1), voc19(SFS1), voc26(SFS1), filtbank(SFS1), vocsyn(SFS1)
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */
#include "SFSCONFG.h"
#include <stdio.h>		/* standard io library */
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"		/* database structures */
#include "filter.h"

/* manifest constants */
#define MAXSYNBUF	2000	/* maximum re-synth window size */

/* global data */
struct item_header	fxitem;
short			*fx;
struct item_header	coitem;
struct item_header	spitem;
int			whisper=0;
int			extinp=0;
struct item_header	exitem;
short			*exsp;
struct item_header 	tritem;
float			*mix;
int				domix=0;
int				skiplo=0;

/* filter coefficients */
FILTER	**chan;
int		nchan;
FILTER	*uvchan=NULL;

/* re-synthesis data */
float	ex[MAXSYNBUF];		/* floating excitation */
float	fsp[MAXSYNBUF];		/* floating speech */
short	sp[MAXSYNBUF];		/* integer speech */

/* get one frame of excitation */
int getex(double start,int len)
{
	int		i,idx;
	int		fxval=0,period;
	static int	fxoffset = 0;
	float	vmix;

	/* zero excitation */
	for (i=0;i<len;i++) ex[i] = 0.0;

	/* check for external input */
	if (extinp) {
		idx = (int)((start-exitem.offset)/exitem.frameduration);
		if (idx < 0) idx = 0;
		if ((idx >= 0) && ((idx+len) <= exitem.numframes)) {
			for (i=0;i<len;i++)
				ex[i] = exsp[idx+i]/800.0;
		}
		return(0);
	}

	/* get fx value for frame */
	if (!whisper) {
		idx = (int)((start-fxitem.offset)/fxitem.frameduration);
		if (idx < 0) idx = 0;
		if (idx >= fxitem.numframes) idx = fxitem.numframes - 1;
		fxval = fx[idx];
	}

	/* check voiced or unvoiced */
	if (whisper || (fxval < 40)) {
		/* unvoiced -- put in white noise */
		for (i=0;i<len;i++) {
			ex[i] = ((rand() % 2000) - 1000)/40.0;
		}
		fxoffset = 0;
		return(0);
	}
	else if (domix) {
		idx = (int)((start-tritem.offset)/tritem.frameduration);
		if (idx < 0) idx = 0;
		if (idx >= tritem.numframes) idx = tritem.numframes - 1;
		vmix = mix[idx];
		for (i=0;i<len;i++) {
			ex[i] = (1-vmix)*((rand() % 2000) - 1000)/40.0;
		}
		period = (int)(1.0/ (fxval * spitem.frameduration));
		for (i=fxoffset;i<len;i+=period) {
			ex[i] += vmix*100.0;
		}
		fxoffset = i - len;
		return((vmix > 0.5)?1:0);
	}
	else {
		/* voiced -- put in pulses */
		period = (int)(1.0/ (fxval * spitem.frameduration));
		for (i=fxoffset;i<len;i+=period) {
			ex[i] = 100.0;
		}
		fxoffset = i - len;
		return(1);
	}

}

/* re-synthesis routine */
void process(int fid,int ofid)
{
	int		i,j,k;
	struct co_rec	*co1,*co2,*cotmp;
	int		len,vflag;
	int		lastposn=0;
	float	sv,suv,smax=0;
	int		first=1;

	/* get buffer for coefficients */
	if ((co1 = (struct co_rec *)sfsbuffer(&coitem,1))==NULL)
		error("could not get memory buffer for CO",NULL);
	if ((co2 = (struct co_rec *)sfsbuffer(&coitem,1))==NULL)
		error("could not get memory buffer for CO",NULL);

	/* process speech frame by frame */
	for (i=0;sfsread(fid,i,1,co2)==1;i++) {

		/* get length of waveform */
		len = (int)(0.5 + (co2->posn+co2->size-lastposn) * coitem.frameduration/spitem.frameduration);
		if (len >= MAXSYNBUF)
			error("frame too long in CO item",NULL);

		/* convert from dB */
		for (k=0;k<nchan;k++)
			co2->data[k] = pow(10.0,co2->data[k]/20);
		if (first) {
			for (k=0;k<nchan;k++)
				co1->data[k] = co2->data[k];
			first=0;
		}

		/* get basic excitation */
		vflag = getex(lastposn*coitem.frameduration+coitem.offset,len);

		/* filter excitation */
		for (j=0;j<len;j++) {
			fsp[j]=0.0;
			for (k=skiplo;k<nchan-1;k++)
				if (k&1)
					fsp[j] -= ((len-j)*co1->data[k]+j*co2->data[k]) * filter_sample(chan[k],ex[j])/len;
				else
					fsp[j] += ((len-j)*co1->data[k]+j*co2->data[k]) * filter_sample(chan[k],ex[j])/len;

			if (uvchan) {
				sv = ((len-j)*co1->data[nchan-1]+j*co2->data[nchan-1]) * filter_sample(chan[nchan-1],ex[j]) / len;
				suv = ((len-j)*co1->data[nchan-1]+j*co2->data[nchan-1]) * filter_sample(uvchan,ex[j]) / len;
			}
			else {
				suv = sv = ((len-j)*co1->data[nchan-1]+j*co2->data[nchan-1]) * filter_sample(chan[nchan-1],ex[j]) / len;
			}
			if (vflag) {
				if ((nchan-1)&1)
					fsp[j] -= sv;
				else
					fsp[j] += sv;
			}
			else {
				if ((nchan-1)&1)
					fsp[j] -= suv;
				else
					fsp[j] += suv;
			}

			if (fsp[j] > smax)
				smax = fsp[j];
			else if (fsp[j] < -smax)
				smax = -fsp[j];
		}

		/* write out speech waveform */
		for (j=0;j<len;j++) sp[j]=(short)fsp[j];
		if (sfswrite(ofid,len,sp) != len)
			error("write error on temporary file",NULL);

		/* update old position */
		lastposn = co2->posn + co2->size;
		cotmp = co1;
		co1 = co2;
		co2 = cotmp;

		/* report progress */
		if (ttytest()) {
			printf("\rFrame %d/%d",i+1,coitem.numframes);
			fflush(stdout);
		}
	}

	return;
}

/* main program */
void main(int argc,char **argv)
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item type selection */
	char		*ty;		/* item match selection */
	char		*fxtype="0";	/* default FX = last */
	char		*cotype="0";	/* default CO = last */
	char		*sptype="0";	/* default SP = last */
	char		*trtype="0";	/* default TR = last */
	/* input file variables */
	char		filename[SFSMAXFILENAME]; /* sfs file name */
	int		fid;		/* input file descriptor */
	/* output file variables */
	int		ofid;		/* output file descriptor */
	int			reqrate=0;
	int			srate;
	int			i,j,cf,f,bw;
	char		*labs;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:wx:ms:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: filterbank synthesizer V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == FX_TYPE)
					fxtype = ty;
				else if (it == CO_TYPE)
					cotype = ty;
				else if (it == TR_TYPE) {
					trtype = ty;
					domix=1;
				}
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'w' :	/* whisper */
			whisper++;
			extinp=0;
			break;
		case 'x' :	/* external excitation */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			extinp++;
			whisper=0;
			break;
		case 'm':	/* use voice mix track */
			domix=1;
			break;
		case 's':	/* request different output rate */
			reqrate=atoi(optarg);
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-w|-m|-x spitem) (-s srate) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open data file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("cannot find file: %s",filename);
		else
			error("access error on %s",filename);
	}

	/* locate input fx and load it */
	if (!whisper && !extinp) {
		if (!sfsitem(fid,FX_TYPE,fxtype,&fxitem))
			error("cannot find FX data in %s",filename);
		if ((fx = (short *)sfsbuffer(&fxitem,fxitem.numframes))==NULL)
			error("cannot create memory buffer for FX",NULL);
		if (sfsread(fid,0,fxitem.numframes,fx) != fxitem.numframes)
			error("read error on input FX",NULL);
	}

	/* locate input sp and load it */
	if (extinp) {
		if (!sfsitem(fid,SP_TYPE,sptype,&exitem))
			error("cannot find SP data in %s",filename);
		if ((exsp = (short *)sfsbuffer(&exitem,exitem.numframes))==NULL)
			error("cannot create memory buffer for SP",NULL);
		if (sfsread(fid,0,exitem.numframes,exsp) != exitem.numframes)
			error("read error on input SP",NULL);
	}

	/* locate input mix track and load it */
	if (domix) {
		if (!sfsitem(fid,TR_TYPE,trtype,&tritem))
			error("cannot find TR data in %s",filename);
		if ((mix = (float *)sfsbuffer(&tritem,tritem.numframes))==NULL)
			error("cannot create memory buffer for TR",NULL);
		if (sfsread(fid,0,tritem.numframes,mix) != tritem.numframes)
			error("read error on input TR",NULL);
	}

	/* locate input coefficients */
	if (!sfsitem(fid,CO_TYPE,cotype,&coitem))
		error("cannot find input CO item in %s",filename);

	/* work out what system we're using */
	labs = params(coitem.params,"labels","");
	if (strcmp(labs,"280|455|675|970|1370|1900|2600|3500")==0) {
		/* voc8 program */
		nchan=8;
		srate=10000;
		if (reqrate > srate) srate=reqrate;
		chan = (FILTER **)calloc(nchan,sizeof(FILTER *));
		chan[ 0] = filter_resonator( 280, 60,srate);
		chan[ 1] = filter_resonator( 455, 60,srate);
		chan[ 2] = filter_resonator( 675, 60,srate);
		chan[ 3] = filter_resonator( 970, 60,srate);
		chan[ 4] = filter_resonator(1370, 60,srate);
		chan[ 5] = filter_resonator(1900, 60,srate);
		chan[ 6] = filter_resonator(2600, 60,srate);
		chan[ 7] = filter_resonator(3500, 60,srate);
		uvchan   = filter_resonator(3500,400,srate);
	}
	else if (strcmp(labs,"240|360|480|600|720|840|1000|1150|1300|1450|1600|1800|2000|2200|2400|2700|3000|3300|3750")==0) {
		/* voc19 program */
		nchan=19;
		srate=10000;
		if (reqrate > srate) srate=reqrate;
		chan = (FILTER **)calloc(nchan,sizeof(FILTER *));
		chan[ 0] = filter_resonator( 240, 40,srate);
		chan[ 1] = filter_resonator( 360, 40,srate);
		chan[ 2] = filter_resonator( 480, 40,srate);
		chan[ 3] = filter_resonator( 600, 40,srate);
		chan[ 4] = filter_resonator( 720, 40,srate);
		chan[ 5] = filter_resonator( 840, 40,srate);
		chan[ 6] = filter_resonator(1000, 40,srate);
		chan[ 7] = filter_resonator(1150, 40,srate);
		chan[ 8] = filter_resonator(1300, 40,srate);
		chan[ 9] = filter_resonator(1450, 40,srate);
		chan[10] = filter_resonator(1600, 40,srate);
		chan[11] = filter_resonator(1800, 60,srate);
		chan[12] = filter_resonator(2000, 60,srate);
		chan[13] = filter_resonator(2200, 60,srate);
		chan[14] = filter_resonator(2400, 60,srate);
		chan[15] = filter_resonator(2700, 60,srate);
		chan[16] = filter_resonator(3000, 60,srate);
		chan[17] = filter_resonator(3300, 60,srate);
		chan[18] = filter_resonator(3600, 60,srate);
		uvchan   = filter_resonator(3800,400,srate);


#if 0
		for (i=0;i<nchan;i++) {
			printf("Channel %d:\n",i+1);
			printf(" a=");
			for (j=0;j<3;j++) printf("%.6f ",chan[i]->section[0].acoeff[j]);
			printf("\n");
			printf(" b=");
			for (j=0;j<3;j++) printf("%.6f ",chan[i]->section[0].bcoeff[j]);
			printf("\n");
		}
#endif

	}
	else if (strcmp(labs,"x2|120|360|603|859|1136|1445|1796|2198|2670|3315|4277|5700|7728")==0) {
		/* voc26 program */
		nchan=26;
		srate=20000;
		if (reqrate > srate) srate=reqrate;
		chan = (FILTER **)calloc(nchan,sizeof(FILTER *));
		chan[ 0] = filter_resonator( 120, 40,srate);
		chan[ 1] = filter_resonator( 240, 40,srate);
		chan[ 2] = filter_resonator( 360, 40,srate);
		chan[ 3] = filter_resonator( 480, 40,srate);
		chan[ 4] = filter_resonator( 603, 40,srate);
		chan[ 5] = filter_resonator( 731, 40,srate);
		chan[ 6] = filter_resonator( 859, 40,srate);
		chan[ 7] = filter_resonator(1000, 40,srate);
		chan[ 8] = filter_resonator(1136, 40,srate);
		chan[ 9] = filter_resonator(1290, 40,srate);
		chan[10] = filter_resonator(1445, 40,srate);
		chan[11] = filter_resonator(1620, 40,srate);
		chan[12] = filter_resonator(1796, 60,srate);
		chan[13] = filter_resonator(2000, 60,srate);
		chan[14] = filter_resonator(2198, 60,srate);
		chan[15] = filter_resonator(2434, 60,srate);
		chan[16] = filter_resonator(2670, 60,srate);
		chan[17] = filter_resonator(3000, 80,srate);
		chan[18] = filter_resonator(3315, 80,srate);
		chan[19] = filter_resonator(3800,100,srate);
		chan[20] = filter_resonator(4277,100,srate);
		chan[21] = filter_resonator(4988,100,srate);
		chan[22] = filter_resonator(5700,100,srate);
		chan[23] = filter_resonator(6714,100,srate);
		chan[24] = filter_resonator(7728,100,srate);
		chan[25] = filter_resonator(8742,100,srate);
	}
	else {
		/* assume filtbank program */
		nchan=coitem.framesize-5;
		if ((labs[0]=='x')&&(labs[2]=='|')) labs += 3;
		cf = 2*(atoi(labs)-100);
		srate = 200*(((nchan+1)*cf+99)/100);
		if (reqrate > srate) srate=reqrate;
		chan = (FILTER **)calloc(nchan,sizeof(FILTER *));
		for (i=0;i<nchan;i++) {
			f = 100 + cf/2 + i*cf;
			if (f < 1800)
				bw=40;
			else if (f < 3000)
				bw=60;
			else if (f < 4000)
				bw=80;
			else
				bw=100;
			chan[i] = filter_resonator(f,bw,srate);
		}
	}

	/* create output item header */
	sfsheader(&spitem,SP_TYPE,0,2,1,1.0/srate,coitem.offset,1,0,0);
	if (whisper)
		sprintf(spitem.history,"%s(%d.%02d;srate=%d,whispered)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			srate);
	else if (extinp)
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d;srate=%d)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			exitem.datatype,exitem.subtype,
			srate);
	else if (domix)
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d,%d.%02d;srate=%d)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			fxitem.datatype,fxitem.subtype,
			tritem.datatype,tritem.subtype,
			srate);
	else
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d;srate=%d)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			fxitem.datatype,fxitem.subtype,
			srate);

	/* open output channel */
	if ((ofid=sfschannel(filename,&spitem)) < 0)
		error("cannot open output channel",NULL);

	/* do processing */
	process(fid,ofid);

	/* update data file */
	if (!sfsupdate(filename))
		error("update error on %s",filename);

	/* clear progress */
	if (ttytest()) {
		printf("\r                       \r");
		fflush(stdout);
	}

	/* that's all folks */
	exit(0);
}
