/* vocsyn -- 19-channel vocoder synthesizer at 10kHz */

/* M.A.Huckvale - January 1988 */

/* version 1.1 - April 1988
	- add "whisper" mode
*/
/* version 1.2 - October 1995
	- add input from speech signal
*/
/* version 1.3 - July 2003
	- allow voicing mixture input from track (vdegree)
*/

#define PROGNAME "vocsyn"
#define PROGVERS "1.3"
char	*progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH VOCSYN SFS1 UCL SFS
.SH NAME
vocsyn - 19-channel vocoder synthesizer
.SH SYNOPSIS
.B vocsyn
(-i item) (-w|-m|-x spitem) file
.SH DESCRIPTION
.I vocsyn
is an implementation of the synthesis section of the JSRU 19-channel
vocoder.  Input is a fundamental frequency contour and a set of
channel energies generated by "vocode" or "voc10".
.I vocsyn.
produces a signal sampled at 10kHz.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and version number.
.TP 11
.BI -i item
Select input item.
.TP 11
.B -w
"Whisper" data, when no FX is present.
.TP 11
.BI -x spitem
Use external excitation from given speech item (must be 10kHz).
.TP 11
.B -m
Use voice mix track to control degree of periodic excitation.
Designed for use with output of vdegree(SFS1) program.
.SH INPUT ITEMS
.IP FX.xx 11
Fundamental frequency contour, used for voice-switch and Fx.
.TP CO.xx 11
19-channel energies generated by "voc19".
.IP SP 11
(External excitation mode) Input signal to filter.
.IP TR 11
(Optional) Voicing mixture track (see vdegree).
.SH OUTPUT ITEMS
.IP SP 11
Speech waveform at 10kHz.
.SH VERSION/AUTHOR
1.3 - Mark Huckvale
.SH SEE ALSO
voc19(1)
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */
#include "SFSCONFG.h"
#include <stdio.h>		/* standard io library */
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"		/* database structures */

/* manifest constants */
#define NCHAN 19		/* # channels */
#define SPDUR		0.0001	/* speech sample duration */
#define MAXSYNBUF	400	/* maximum re-synth window size = 40ms */

/* global data */
struct item_header	fxitem;
short			*fx;
struct item_header	coitem;
struct item_header	spitem;
int			whisper=0;
int			extinp=0;
struct item_header	exitem;
short			*exsp;
struct item_header 	tritem;
float			*mix;
int				domix=0;

/* filter coefficients */
#include "vocsynda.h"

/* re-synthesis data */
float	ex[MAXSYNBUF];		/* floating excitation */
float	fsp[MAXSYNBUF];		/* floating speech */
short	sp[MAXSYNBUF];		/* integer speech */
float	mem1[3],mem2[3],mem3[3],mem4[3],mem5[3];
float	mem6[3],mem7[3],mem8[3],mem9[3],mem10[3];
float	mem11[3],mem12[3],mem13[3],mem14[3],mem15[3];
float	mem16[3],mem17[3],mem18[3],mem19[3];

/* get one frame of excitation */
int getex(start,len)
double	start;
int	len;
{
	int		i,idx;
	int		fxval=0,period;
	static int	fxoffset = 0;
	float	vmix;

	/* zero excitation */
	for (i=0;i<len;i++) ex[i] = 0.0;

	/* check for external input */
	if (extinp) {
		idx = (int)((start-exitem.offset)/exitem.frameduration);
		if (idx < 0) idx = 0;
		if ((idx >= 0) && ((idx+len) <= exitem.numframes)) {
			for (i=0;i<len;i++)
				ex[i] = exsp[idx+i]/800.0;
		}
		return(0);
	}

	/* get fx value for frame */
	if (!whisper) {
		idx = (int)((start-fxitem.offset)/fxitem.frameduration);
		if (idx < 0) idx = 0;
		if (idx >= fxitem.numframes) idx = fxitem.numframes - 1;
		fxval = fx[idx];
	}

	/* check voiced or unvoiced */
	if (whisper || (fxval < 40)) {
		/* unvoiced -- put in white noise */
		for (i=0;i<len;i++) {
			ex[i] = ((rand() % 2000) - 1000)/40.0;
		}
		fxoffset = 0;
		return(0);
	}
	else if (domix) {
		idx = (int)((start-tritem.offset)/tritem.frameduration);
		if (idx < 0) idx = 0;
		if (idx >= tritem.numframes) idx = tritem.numframes - 1;
		vmix = mix[idx];
		for (i=0;i<len;i++) {
			ex[i] = (1-vmix)*((rand() % 2000) - 1000)/40.0;
		}
		period = (int)(1.0/ (fxval * SPDUR));
		for (i=fxoffset;i<len;i+=period) {
			ex[i] += vmix*100.0;
		}
		fxoffset = i - len;
		return((vmix > 0.5)?1:0);
	}
	else {
		/* voiced -- put in pulses */
		period = (int)(1.0/ (fxval * SPDUR));
		for (i=fxoffset;i<len;i+=period) {
			ex[i] = 100.0;
		}
		fxoffset = i - len;
		return(1);
	}

}

/* single channel filter */
void chfilt(len,nc,dc,mem,gain)
int		len;
float		*nc;
float		*dc;
float		*mem;
float		gain;
{
	register int	i;
	float		n1,n2,n3;
	float		d1,d2,d3;
	float		m1,m2,m3;

	/* get numbers directly for quick access */
	n1=nc[0]; n2=nc[1]; n3=nc[2];
	d1=dc[0]; d2=dc[1]; d3=dc[2];
	m1=mem[0]; m2=mem[1]; m3=mem[2];

	/* scale excitation by channel energy */
	d1 *= pow(10.0,gain/20.0);

	/* do filter from excitation to output */
	for (i=0;i<len;i++) {
		/* denominator */
		m1 = d1 * ex[i] - d2 * m2 - d3 * m3;
		/* numerator */
		fsp[i] += n1 * m1 + n2 * m2 + n3 * m3;
		/* shift */
		m3 = m2;
		m2 = m1;
	}

	/* remember filter state */
	mem[0] = m1;
	mem[1] = m2;
	mem[2] = m3;
}

/* re-synthesis routine */
void process(fid,ofid)
int		fid;
int		ofid;
{
	int		i,j;
	struct co_rec	*co;
	int		len,vflag;
	int		lastposn=0;

	/* get buffer for coefficients */
	if ((co = (struct co_rec *)sfsbuffer(&coitem,1))==NULL)
		error("could not get memory buffer for CO",NULL);

	/* process speech frame by frame */
	for (i=0;sfsread(fid,i,1,co)==1;i++) {

		/* get length of waveform */
		len = (int)(0.5 + (co->posn+co->size-lastposn) * coitem.frameduration/SPDUR);
		if (len >= MAXSYNBUF)
			error("frame too long in CO item",NULL);

		/* get basic excitation */
		vflag = getex(co->posn*coitem.frameduration+coitem.offset,len);

		/* filter waveform with 19 filters */
		for (j=0;j<len;j++) fsp[j]=0.0;
		chfilt(len,ncoeff1,dcoeff1,mem1,co->data[0]);
		chfilt(len,ncoeff2,dcoeff2,mem2,co->data[1]);
		chfilt(len,ncoeff3,dcoeff3,mem3,co->data[2]);
		chfilt(len,ncoeff4,dcoeff4,mem4,co->data[3]);
		chfilt(len,ncoeff5,dcoeff5,mem5,co->data[4]);
		chfilt(len,ncoeff6,dcoeff6,mem6,co->data[5]);
		chfilt(len,ncoeff7,dcoeff7,mem7,co->data[6]);
		chfilt(len,ncoeff8,dcoeff8,mem8,co->data[7]);
		chfilt(len,ncoeff9,dcoeff9,mem9,co->data[8]);
		chfilt(len,ncoeff10,dcoeff10,mem10,co->data[9]);
		chfilt(len,ncoeff11,dcoeff11,mem11,co->data[10]);
		chfilt(len,ncoeff12,dcoeff12,mem12,co->data[11]);
		chfilt(len,ncoeff13,dcoeff13,mem13,co->data[12]);
		chfilt(len,ncoeff14,dcoeff14,mem14,co->data[13]);
		chfilt(len,ncoeff15,dcoeff15,mem15,co->data[14]);
		chfilt(len,ncoeff16,dcoeff16,mem16,co->data[15]);
		chfilt(len,ncoeff17,dcoeff17,mem17,co->data[16]);
		chfilt(len,ncoeff18,dcoeff18,mem18,co->data[17]);
		if (vflag)
			chfilt(len,ncoeff19v,dcoeff19v,mem19,co->data[18]);
		else
			chfilt(len,ncoeff19uv,dcoeff19uv,mem19,co->data[18]);

		/* write out speech waveform */
		for (j=0;j<len;j++) sp[j]=(short)fsp[j];
		if (sfswrite(ofid,len,sp) != len)
			error("write error on temporary file",NULL);

		/* update old position */
		lastposn = co->posn + co->size;

		/* report progress */
		if (ttytest()) {
			printf("\rFrame %d/%d",i+1,coitem.numframes);
			fflush(stdout);
		}
	}

	return;
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item type selection */
	char		*ty;		/* item match selection */
	char		*fxtype="0";	/* default FX = last */
	char		*cotype="0";	/* default CO = last */
	char		*sptype="0";	/* default SP = last */
	char		*trtype="0";	/* default TR = last */
	/* input file variables */
	char		filename[SFSMAXFILENAME]; /* sfs file name */
	int		fid;		/* input file descriptor */
	/* output file variables */
	int		ofid;		/* output file descriptor */

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:wx:m")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: 19-channel vocoder synthesizer V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == FX_TYPE)
					fxtype = ty;
				else if (it == CO_TYPE)
					cotype = ty;
				else if (it == TR_TYPE) {
					trtype = ty;
					domix=1;
				}
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'w' :	/* whisper */
			whisper++;
			extinp=0;
			break;
		case 'x' :	/* external excitation */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			extinp++;
			whisper=0;
			break;
		case 'm':	/* use voice mix track */
			domix=1;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-w|-m|-x spitem) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open data file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("cannot find file: %s",filename);
		else
			error("access error on %s",filename);
	}

	/* locate input fx and load it */
	if (!whisper && !extinp) {
		if (!sfsitem(fid,FX_TYPE,fxtype,&fxitem))
			error("cannot find FX data in %s",filename);
		if ((fx = (short *)sfsbuffer(&fxitem,fxitem.numframes))==NULL)
			error("cannot create memory buffer for FX",NULL);
		if (sfsread(fid,0,fxitem.numframes,fx) != fxitem.numframes)
			error("read error on input FX",NULL);
	}

	/* locate input sp and load it */
	if (extinp) {
		if (!sfsitem(fid,SP_TYPE,sptype,&exitem))
			error("cannot find SP data in %s",filename);
		if ((int)(0.5+0.01/exitem.frameduration)!=100)
			error("input SP is not sampled at 10000Hz",NULL);
		if ((exsp = (short *)sfsbuffer(&exitem,exitem.numframes))==NULL)
			error("cannot create memory buffer for SP",NULL);
		if (sfsread(fid,0,exitem.numframes,exsp) != exitem.numframes)
			error("read error on input SP",NULL);
	}

	/* locate input mix track and load it */
	if (domix) {
		if (!sfsitem(fid,TR_TYPE,trtype,&tritem))
			error("cannot find TR data in %s",filename);
		if ((mix = (float *)sfsbuffer(&tritem,tritem.numframes))==NULL)
			error("cannot create memory buffer for TR",NULL);
		if (sfsread(fid,0,tritem.numframes,mix) != tritem.numframes)
			error("read error on input TR",NULL);
	}

	/* locate input coefficients */
	if (!sfsitem(fid,CO_TYPE,cotype,&coitem))
		error("cannot find input CO item in %s",filename);
	if ((coitem.datasize*coitem.framesize-sfsstruct[CO_TYPE])/4 != NCHAN)
		error("input CO type is not 19-channel vocoder",NULL);

	/* create output item header */
	sfsheader(&spitem,SP_TYPE,0,2,1,
			SPDUR,coitem.offset,0,1,0);
	if (whisper)
		sprintf(spitem.history,"%s(%d.%02d;whispered)",
			PROGNAME,
			coitem.datatype,coitem.subtype);
	else if (extinp)
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			exitem.datatype,exitem.subtype);
	else if (domix)
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d,%d.%02d)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			fxitem.datatype,fxitem.subtype,
			tritem.datatype,tritem.subtype);
	else
		sprintf(spitem.history,"%s(%d.%02d,%d.%02d)",
			PROGNAME,
			coitem.datatype,coitem.subtype,
			fxitem.datatype,fxitem.subtype);

	/* open output channel */
	if ((ofid=sfschannel(filename,&spitem)) < 0)
		error("cannot open output channel",NULL);

	/* do processing */
	process(fid,ofid);

	/* update data file */
	if (!sfsupdate(filename))
		error("update error on %s",filename);

	/* clear progress */
	if (ttytest()) {
		printf("\r                       \r");
		fflush(stdout);
	}

	/* that's all folks */
	exit(0);
}


