/* mbrsynth -- MBROLA diphone synthesis from .PHO file format */

/* M.A.Huckvale - University College London */

/* version 1.0 - March 2001 */

/* version 1.1 - October 2002
	- add transcription from command line with default durations & pitch
*/

#undef IAG

#define PROGNAME "mbrsynth"
#define PROGVERS "1.1"
char *progname=PROGNAME;

/*-------------------------------------------------------------------------*/
/**MAN
.TH MBRSYNTH 1 SFS UCL
.SH NAME
mbrsynth - MBROLA diphone synthesis from MBROLA format control file
.SH SYNOPSIS
.B mbrsynth
(-I) (-i item) (-m file.pho|-t transcription) (-d diphonedb) (-s) (-a) (out.sfs)
.SH DESCRIPTION
.I mbrsynth
is a program to perform diphone synthesis using the MBROLA diphone
system.  MBROLA software from http://tcts.fpms.ac.be/synthesis needs
be installed for this program to work.  You also need at least one
diphone database from the same source.
.PP
The program takes as input a phonetic transcription and a fundamental frequency
contour and produces an audio signal.  The input can come from
an input AN item and FX item, from a .PHO file as used by the MBROLA
diphone synthesis system, or from a transcription specfied on the
command line.  IN the last case, default duration and pitch values
are used.  Output is direct to the audio output, or to an SFS file.
.PP
Here is an example for you to try:
.nf
_ 50
t 95 0 150
e 133 0 150 99 110
s 150
t 135 0 110
_ 50
.fi
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input AN and FX items.
.TP 11
.BI -m mbrola.pho
Take input from MBROLA format .PHO file.
.TP 11
.BI -d diphonedb
Select the name of the diphone database to use for synthesis.
.TP 11
.BI -t transcription
Specify the phonetic transcription to produce.  Put spaces between symbols
and enclose in quotes.
.TP 11
.B -s
Save synthesis to SFS file specified (must exist).
.TP 11
.B -a
Save transcription as annotations to SFS file specified (must exist).
For transcription (-t) input only.
.SH VERSION/AUTHOR
.IP 1.1
Mark Huckvale
.SH SEE ALSO
phosynth, repros
.SH BUGS
*/
/*--------------------------------------------------------------------------*/

/* include files */
#include "SFSCONFG.h"
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
#include "sfs.h"
#include "mbrplay.h"

/* label sequence */
struct label_rec {
	char	*label;			/* phonetic label */
	double	stime;			/* start time */
	double	etime;			/* end time */
};

/* global data */
double			totdur=0;	/* total duration */
struct label_rec	*itab;		/* input table */
int			icount;		/* # entries */
short			*fx;		/* fx contour */
int			fxcount;	/* # samples */
int			dosfs=0;	/* input from SFS file */
int			dombrola=1;	/* input from mbrola file */
int			dosynth=1;	/* output to audio */
int			dosave=0;	/* output to SFS file */
int			dotrans=0;	/* input from comand line transcription */
int			doannot=0;
int			ofid;		/* output file id */
struct item_header	spitem;
struct item_header	anitem;
struct item_header	fxitem;

char		dbname[SFSMAXFILENAME]="en1";
char		mfilename[SFSMAXFILENAME]; /* control file name */
char		sfilename[SFSMAXFILENAME]; /* SFS file name */
char		tfilename[SFSMAXFILENAME]; /* temporary file name */
#define MAXSEGMENT	200
char		transcription[MAXSEGMENT*4]; /* 3 chars+space */
char		phobuffer[MAXSEGMENT*14];	/* 3 chars, space, 3 chars, space, 5 chars, newline */

/* symbol mapping and default durations */
struct map_rec {
	char	*in;
	char	*out;
	int		dur;
} symmap[]={
	{ "_",	"_",	100 },
	{ "\"",	"_",	0 },
	{ "/",	"_",	100 },
	{ "&",	"&",	140 },
	{ ",",	"_",	100 },
	{ ".",	"_",	200 },
	{ "3",	"3:",	120 },
	{ "3:",	"3:",	240 },
	{ "5",	"5",	 80 },		/* alias of l~ */
	{ "@",	"@",	 90 },
	{ "@U",	"@U",	230 },
	{ "A",	"A:",	110 },
	{ "A:",	"A:",	220 },
	{ "D",	"D",	100 },
	{ "I",	"I",	100 },
	{ "I@",	"I@",	210 },
	{ "N",	"N",	130 },
	{ "O",	"O:",	105 },
	{ "O:",	"O:",	210 },
	{ "OI",	"OI",	240 },
	{ "Q",	"Q",	140 },
	{ "S",	"S",	180 },
	{ "T",	"T",	160 },
	{ "U",	"U",	100 },
	{ "V",	"V",	155 },
	{ "Z",	"Z",	 70 },
	{ "aI",	"aI",	220 },
	{ "aU",	"aU",	240 },
	{ "b",	"b",	115 },
	{ "d",	"d",	 75 },
	{ "dZ",	"dZ",	170 },
	{ "e",	"e",	125 },
	{ "e@",	"e@",	270 },
	{ "eI",	"eI",	230 },
	{ "f",	"f",	130 },
	{ "g",	"g",	 90 },
	{ "h",	"h",	160 },
	{ "i",	"i:",	 70 },
	{ "i:",	"i:",	140 },
	{ "j",	"j",	110 },
	{ "k",	"k",	140 },
	{ "l",	"l",	 80 },
	{ "l~",	"l~",	 80 },
	{ "m",	"m",	110 },
	{ "n",	"n",	130 },
	{ "p",	"p",	130 },
	{ "r",	"r",	 80 },
	{ "s",	"s",	125 },
	{ "t",	"t",	130 },
	{ "tS",	"tS",	210 },
	{ "u",	"u:",	 80 },
	{ "u:",	"u:",	155 },
	{ "v",	"v",	 85 },
	{ "w",	"w",	 80 },
	{ "z",	"z",	140 },
	{ "{",	"{",	140 },
};
#define NUMSYM (sizeof(symmap)/sizeof(struct map_rec))

/* save string in memory */
char *strsave(char *str)
{
	char *ptr=malloc(strlen(str)+1);
	if (ptr==NULL)
		error("out of memory");
	strcpy(ptr,str);
	return(ptr);
}

/* load AN and FX item from SFS file */
void loadsfs(char *fname,char *antype,char *fxtype)
{
	struct an_rec		*an;
	short			*ifx;
	int			i,j;

	/* get SFS items */
	getitem(fname,AN_TYPE,antype,&anitem,&an);
	getitem(fname,FX_TYPE,fxtype,&fxitem,&ifx);

	/* allocate buffer for output Fx */
	fxcount = (int)(1+(fxitem.numframes*fxitem.frameduration/0.01));
	if ((fx=(short *)calloc(fxcount,sizeof(short)))==NULL)
		error("out of memory");

	/* copy fx */
	for (i=0;i<fxcount;i++) {
		j = (int)(i*0.01/fxitem.frameduration);
		if ((0<=j)&&(j<fxitem.numframes))
			fx[i] = ifx[j];
	}

	/* allocate buffer for labels */
	icount = anitem.numframes;
	totdur = (an[icount-1].posn+an[icount-1].size)*anitem.frameduration;
	if ((itab=(struct label_rec *)calloc(icount,sizeof(struct label_rec)))==NULL)
		error("out of memory");

	/* copy annotations across */
	for (i=0;i<icount;i++) {
		itab[i].stime = an[i].posn*anitem.frameduration;
		itab[i].label = strsave(an[i].label);
		if (i < icount-1)
			itab[i].etime = an[i+1].posn*anitem.frameduration;
		else
			itab[i].etime = (an[i].posn+an[i].size)*anitem.frameduration;
	}
}

/* create PHO file from transcription */
char *createPHO(char *trans)
{
	char	*p;
	int		i,cnt=0;
	int		idx;
	char	buf[32];

	strcpy(phobuffer,"");
	p = strtok(strsave(trans)," ");
	while (p && *p && (cnt < MAXSEGMENT)) {
		idx=-1;
		for (i=0;i<NUMSYM;i++)
			if (strcmp(p,symmap[i].in)==0) {
				idx=i;
				break;
			}
		if (idx >= 0) {
			sprintf(buf,"%s %d 0 120\n",symmap[idx].out,symmap[idx].dur);
			strcat(phobuffer,buf);
		}
		cnt++;
		p = strtok(NULL," ");
	}

printf("phobuffer=\n%s\n",phobuffer);
	return phobuffer;
}

/* save annotations from transcription */
void saveAN(char *trans,char *fname)
{
	char	*p;
	int		i,cnt=0;
	int		t=0;
	int		idx;
	char	buf[128];
	struct an_rec *an;
	int		ofid;

	sfsheader(&anitem,AN_TYPE,-1,1,-1,0.001,0.0,0,0,1);
	strncpy(buf,trans,127);
	strcpy(buf+90,"...");
	sprintf(anitem.history,"%s(type=transcription,trans=%s)",
		PROGNAME,buf);
	if ((ofid=sfschannel(fname,&anitem))<0)
		error("could not open output channel to '%s'",fname);
	an = sfsbuffer(&anitem,1);

	p = strtok(strsave(trans)," ");
	while (p && *p && (cnt < MAXSEGMENT)) {
		idx=-1;
		for (i=0;i<NUMSYM;i++)
			if (strcmp(p,symmap[i].in)==0) {
				idx=i;
				break;
			}
		if (idx >= 0) {
			an->posn = t;
			an->size = symmap[idx].dur;
			t += symmap[idx].dur;
			strcpy(an->label,symmap[idx].out);
			sfswrite(ofid,1,an);
		}
		cnt++;
		p = strtok(NULL," ");
	}

}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		i,j,code;
	/* input items */
	int		it;
	char		*ty;
	char		*antype="0";
	char		*fxtype="0";
	FILE		*ip;
	short		sbuf[512];
	int		len;
	double		srate;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:m:d:st:a")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Synthesis by rule of .PHO file V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* input items */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == AN_TYPE) {
					antype = ty;
					dosfs=1;
					dombrola=0;
				}
				else if (it == FX_TYPE) {
					fxtype = ty;
					dosfs=1;
					dombrola=0;
				}
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'm' :	/* input MBROLA file */
			strcpy(mfilename,optarg);
			dosfs=0;
			dombrola=1;
			break;
		case 's' :	/* save to SFS */
			dosynth=0;
			dosave=1;
			break;
		case 'd' :	/* diphone database name */
			strcpy(dbname,optarg);
			break;
		case 't':	/* transcription */
			strcpy(transcription,optarg);
			dombrola=0;
			dotrans=1;
			break;
		case 'a':	/* save annotation */
			doannot=1;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-m file.pho) (-d diphonedb) (-s) (-a) (file.sfs)",PROGNAME);

	/* get SFS filename */
	if (optind < argc)
		strcpy(sfilename,sfsfile(argv[optind]));
	else {
		dosave=0;
		doannot=0;
	}

	/* load control file */
	if (dosfs)
		loadsfs(sfilename,antype,fxtype);

	/* select diphone database */
	if ((code=MBR_SetDatabase(dbname))!=0)
		error("could not initialise diphone database '%s', code %d",dbname,code);
	srate = MBR_GetDefaultFreq();

	/* open fid to SFS file */
	if (dosave) {
		sfsheader(&spitem,SP_TYPE,0,2,1,1.0/srate,0.0,1,0,0);
		if (dombrola)
			sprintf(spitem.history,"%s(file=%s)",PROGNAME,mfilename);
		else
			sprintf(spitem.history,"%s(%d.%02d,%d.%02d)",
				PROGNAME,
				anitem.datatype,anitem.subtype,
				fxitem.datatype,fxitem.subtype);
		if ((ofid=sfschannel(sfilename,&spitem)) < 0)
			error("unable to create temporary file",NULL);
	}

	/* call MBROLA */
	if (dombrola) {
		if (dosave) {
			strcpy(tfilename,"mbXXXXXX");
			mktemp(tfilename);
			if ((code=MBR_Play(mfilename,MBR_BYFILE|MBR_WAIT|MBROUT_RAW,tfilename,(DWORD)0))!=0)
				error("failed to call MBROLA mbrplay, code %d",code);
			/* copy raw data to SFS file */
			if ((ip = fopen(tfilename,"rb"))==NULL)
				error("failed to open '%s'",tfilename);
			while ((len=fread(sbuf,2,512,ip))>0)
				sfswrite(ofid,len,sbuf);
			fclose(ip);
			remove(tfilename);
		}
		else {
			if ((code=MBR_Play(mfilename,MBR_BYFILE|MBR_WAIT|MBROUT_SOUNDBOARD,NULL,(DWORD)0))!=0)
				error("failed to call MBROLA mbrplay, code %d",code);
		}
	}
	else if (dotrans) {
		if (dosave) {
			strcpy(tfilename,"mbXXXXXX");
			mktemp(tfilename);
			if ((code=MBR_Play(createPHO(transcription),MBR_WAIT|MBROUT_RAW,tfilename,(DWORD)0))!=0)
				error("failed to call MBROLA mbrplay, code %d",code);
			/* copy raw data to SFS file */
			if ((ip = fopen(tfilename,"rb"))==NULL)
				error("failed to open '%s'",tfilename);
			while ((len=fread(sbuf,2,512,ip))>0)
				sfswrite(ofid,len,sbuf);
			fclose(ip);
			remove(tfilename);
		}
		else {
			if ((code=MBR_Play(createPHO(transcription),MBR_WAIT|MBROUT_SOUNDBOARD,NULL,(DWORD)0))!=0)
				error("failed to call MBROLA mbrplay, code %d",code);
		}
		if (doannot) saveAN(transcription,sfilename);
	}

	/* that's all folks ! */
	if (dosave||doannot) sfsupdate(sfilename);

	exit(0);
}
