/* repitch -- change speaking rate and/or pitch */

/* M.A.Huckvale - University College London */

/* version 0.1 - March 1997 */

#define PROGNAME "repitch"
#define PROGVERS "0.1"
char *progname=PROGNAME;

/*-------------------------------------------------------------------------*/
/**MAN
.TH REPITCH 1 SFS UCL
.SH NAME
repitch - change speaking rate and or pitch of speech
.SH SYNOPSIS
.B repitch
(-I) (-i item) (-r rate-change) (-f pitch-change) file
.SH DESCRIPTION
.I repitch
is a program to modify the pitch and duration of an utterance.
It uses the PSOLA algorithm and requires a set of pitch-epoch
annotations.  These can be generated from a Laryngograph signal
using Lx->Tx conversion, followed by Tx->An conversion.
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -r rate-change
Relative speed up factor. 2.0 = twice as fast, 0.5=half as fast.  Default 1.
.TP 11
.BI -f pitch-change
Relative pitch change factor.  2.0 = double pitch, 0.5 = halve pitch.  Default 1.
.SH INPUT ITEMS
.IP SP 11
Speech item
.IP TX 11
Pitch epochs (alternative to AN).
.IP AN 11
Pitch epoch annotations (alternative to TX).
.SH OUTPUT ITEMS
.IP SP 11
Prosody changed speech
.SH HISTORY
.IP rate=
proportional rate-change.
.IP freq=
proportional pitch change
.SH VERSION/AUTHOR
.IP 1.0
Mark Huckvale
.SH SEE ALSO
respeed
.SH BUGS
*/
/*--------------------------------------------------------------------------*/

/* include files */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
#include <malloc.h>
#include "sfs.h"
#define MAX(x,y) (((x)>(y))?(x):(y))
#define ABS(x) (((x)>0)?(x):-(x))

/* global defines */
#define MINFX		50.0		/* below this is unvoiced */
#define MAXSTEP		0.016		/* max window step = 16ms */

/* window record */
struct window_rec {
	int	ssamp;		/* start sample in signal */
	int	lsize;		/* number of samples in left half */
	int	rsize;		/* number of samples in right half */
	int	fx;		/* voicing flag */
};

/* global data */
struct item_header	spitem;
struct item_header	txitem;
struct item_header	anitem;
struct an_rec		*an;
struct window_rec	*wtab;
int			wcount;
struct item_header	opitem;
short			*isp,*osp;	/* input/output signal */
float			*risp,*rosp;	/* real input/output signal */
float			*wisp,*wosp;	/* real input/output window */
int			bufsize;	/* size of internal buffers */

double			newrate=1.0;	/* speed-up rate */
double			newpitch=1.0;	/* pitch change */
int			verbose=0;	/* dump debugging info */

/* load TX as if it were annotations */
void loadTX(int fid,struct item_header *txitem,struct an_rec *an)
{
	long	*tx;
	int	i;

	if ((tx = sfsbuffer(txitem,txitem->numframes))==NULL)
		error("could not get memory");
	sfsread(fid,0,txitem->numframes,tx);
	an[0].posn = 0;
	for (i=0;i<txitem->numframes;i++) {
		an[i].size = tx[i];
		if (i < txitem->numframes)
			an[i+1].posn = an[i].posn + tx[i];
	}
	free(tx);
}

/* find a window to match a time */
int findwindow(int samp,struct window_rec *wr)
{
	int	i;
	int	min=20000;
	int	idx=0;
	int	dif;

	if (samp > wtab[wcount-1].ssamp+wtab[wcount-1].lsize+wtab[wcount-1].rsize) return(0);
	for (i=0;i<wcount;i++) {
		dif = wtab[i].ssamp + wtab[i].lsize - samp;
		if (dif < 0) dif = -dif;
		if (dif < min) {
			min = dif;
			idx = i;
		}
	}
	*wr = wtab[idx];
	return(1);
}

/* half window signal */
void halfwindow(short *buf,float *obuf,float *wbuf,int len)
{
	double 	w;
	int	i;

	w = M_PI/(len+1);
	for (i=1;i<=len;i++,buf++,wbuf++,obuf++) {
		*wbuf = (float)(0.5 + 0.5*cos(i*w));
		*obuf = (float)*buf * *wbuf;
	}
}

/* window signal */
void fullwindow(short *buf,float *obuf,float *wbuf,int size1,int size2)
{
	double 	w;
	int	i;

	w = 2.0*M_PI/(2*size1+1);
	for (i=0;i<size1;i++,buf++,wbuf++,obuf++) {
		*wbuf = (float)(0.5 - 0.5*cos((i+1)*w));
		*obuf = (float)*buf * *wbuf;
	}
	w = 2.0*M_PI/(2*size2+1);
	for (i=0;i<size2;i++,buf++,wbuf++,obuf++) {
		*wbuf = (float)(0.5 - 0.5*cos((i+1+size2)*w));
		*obuf = (float)*buf * *wbuf;
	}
}

/* overlap-add two signals */
int	overlapadd(float *s1buf,float *w1buf,int s1len,float *s2buf,float *w2buf,int s2off)
{
	int	i;

	s2buf += s2off;
	w2buf += s2off;
	for (i=0;i<s1len;i++,s1buf++,w1buf++,s2buf++,w2buf++) {
		*s2buf += *s1buf;
		*w2buf += *w1buf;
	}
	return(s2off+s1len);
}

/* re-synthesize */
void outsynth(short *obuf,float *sbuf,float *wbuf,int slen)
{
	int	i;

	for (i=0;i<slen;i++,obuf++,sbuf++,wbuf++) {
		if (*wbuf==0.0) {
/*			fprintf(stderr,"zero window\n"); */
			*obuf = 32767;
		}
		else
			*obuf = (short)(*sbuf / *wbuf);
	}
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item selection */
	char		*ty;		/* item sub type */
	char		*sptype="0";
	char		*txtype="0";
	char		*antype="0";
	int		epoch=AN_TYPE;
	/* file variables */
	char		filename[SFSMAXFILENAME]; /* SFS data file name */
	int		fid;		/* input file descriptor */
	int		ofid;
	double		curtime;
	int		cursamp,newsamp,lastsamp;
	int		framelen,offset;
	struct window_rec wrec;
	double		t0,t1,t2;
	int		i;
	
	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:r:f:vt")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Change pitch and duration V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype=ty;
				else if (it == TX_TYPE) {
					txtype=ty;
					epoch=TX_TYPE;
				}
				else if (it == AN_TYPE) {
					antype=ty;
					epoch=AN_TYPE;
				}
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'r' :	/* output rate */
			newrate = atof(optarg);
			break;
		case 'f' :	/* output pitch */
			newpitch = atof(optarg);
			break;
		case 'v' :	/* verbose */
			verbose++;
			break;
		case 't' :	/* use Tx markers */
			epoch = TX_TYPE;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-t) (-r rate-change) (-f pitch-change) (-v) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL))<0)
		error("access error on '%s'",filename);

	/* locate items */
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("unable to find input item in '%s'",filename);
	if (epoch==TX_TYPE) {
		if (!sfsitem(fid,TX_TYPE,txtype,&txitem))
			error("unable to find input TX item in '%s'",filename);
	}
	else if (!sfsitem(fid,AN_TYPE,antype,&anitem)) {
		if (!sfsitem(fid,TX_TYPE,txtype,&txitem))
			error("unable to find input TX or AN item in '%s'",filename);
		else
			epoch=TX_TYPE;
	}

	/* load annotations */
	if (epoch==TX_TYPE) {
		sfsheader(&anitem,AN_TYPE,1,0,-1,txitem.frameduration,txitem.offset,0,0,1);
		an = (struct an_rec *)sfsbuffer(&anitem,txitem.numframes);
		loadTX(fid,&txitem,an);
		anitem.numframes = txitem.numframes;
	}
	else {
		an = (struct an_rec *)sfsbuffer(&anitem,anitem.numframes);
		sfsread(fid,0,anitem.numframes,an);
	}

	/* count number of windows we shall need */
	wcount=0;
	t0 = 0;
	t1 = an[0].posn*anitem.frameduration;
	for (i=1;i<anitem.numframes;i++) {
		t2 = an[i].posn*anitem.frameduration;
		wcount++;
		while ((t2-t0) > (2.0/MINFX)) {
			wcount++;
			t0 += MAXSTEP;
		}
		t0 = t1;
		t1 = t2;
	}
#ifdef IAG
	printf("wcount=%d\n",wcount);
#endif	
	/* make window records from annotations */
	wtab = (struct window_rec *)calloc(wcount+100,sizeof(struct window_rec));
	wcount=0;
	t0 = 0;
	t1 = an[0].posn*anitem.frameduration;
	for (i=1;i<anitem.numframes;i++) {
		t2 = an[i].posn*anitem.frameduration;

		wtab[wcount].ssamp = (int)(t0/spitem.frameduration);
		if ((t2-t0) <= (2.0/MINFX)) {
			wtab[wcount].lsize = (int)((t1-t0)/spitem.frameduration);
			wtab[wcount].rsize = (int)((t2-t1)/spitem.frameduration);
			wtab[wcount].fx = 1;
			wcount++;
		}
		else if ((t2-t0) < (2.0*MAXSTEP)) {
			wtab[wcount].lsize = (int)((t2-t0)/(2*spitem.frameduration));
			wtab[wcount].rsize = (int)((t2-t0)/(2*spitem.frameduration));
			wtab[wcount].fx = 0;
			wcount++;
		}
		else {
			wtab[wcount].lsize = (int)(MAXSTEP/spitem.frameduration);
			wtab[wcount].rsize = (int)(MAXSTEP/spitem.frameduration);
			wtab[wcount].fx = 0;
			wcount++;
			t0 += MAXSTEP;
			while ((t2-t0) >= (2.0*MAXSTEP)) {
				wtab[wcount].ssamp = (int)(t0/spitem.frameduration);
				wtab[wcount].lsize = (int)(MAXSTEP/spitem.frameduration);
				wtab[wcount].rsize = (int)(MAXSTEP/spitem.frameduration);
				wtab[wcount].fx = 0;
				wcount++;
				t0 += MAXSTEP;
			}
			wtab[wcount].ssamp = (int)(t0/spitem.frameduration);
			wtab[wcount].lsize = (int)((t2-t0)/(2*spitem.frameduration));
			wtab[wcount].rsize = (int)((t2-t0)/(2*spitem.frameduration));
			wtab[wcount].fx = 0;
			wcount++;
		}
		t0 = t1;
		t1 = t2;
	}
#ifdef IAG
	for (i=0;i<wcount;i++)
		printf("%4d. p=%5d s=%5d/%5d f=%d\n",
			i,wtab[i].ssamp,wtab[i].lsize,wtab[i].rsize,wtab[i].fx);
#endif
	/* find speech again */	
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("unable to find input item in '%s'",filename);

	/* buffer size about 1 seconds worth */
	bufsize = (int)(1.0/spitem.frameduration);
	
	/* get buffers */
	if ((isp=(short *)sfsbuffer(&spitem,bufsize))==NULL)
		error("could not get memory buffer");
	if ((osp=(short *)sfsbuffer(&spitem,bufsize))==NULL)
		error("could not get memory buffer");
	if ((risp=(float *)calloc(bufsize,sizeof(float)))==NULL)
		error("could not get memory buffer");
	if ((rosp=(float *)calloc(bufsize,sizeof(float)))==NULL)
		error("could not get memory buffer");
	if ((wisp=(float *)calloc(bufsize,sizeof(float)))==NULL)
		error("could not get memory buffer");
	if ((wosp=(float *)calloc(bufsize,sizeof(float)))==NULL)
		error("could not get memory buffer");

	/* make output header */
	sfsheader(&opitem,SP_TYPE,0,2,1,spitem.frameduration,spitem.offset,1,0,0);
	sprintf(opitem.history,"%s(%d.%02d,%d.%02d;rate=%g,freq=%g)",
			PROGNAME,
			spitem.datatype,spitem.subtype,
			(epoch==TX_TYPE)?txitem.datatype:anitem.datatype,
			(epoch==TX_TYPE)?txitem.subtype:anitem.subtype,
			newrate,newpitch);

	/* get output channel */
	if ((ofid=sfschannel(filename,&opitem))<0)
		error("could not open output channel to '%s'",filename);

	/* pre-charge buffer with half a window */
	framelen = sfsread(fid,wtab[0].ssamp,wtab[0].lsize,isp);
	halfwindow(isp,rosp,wosp,framelen);
	lastsamp=0;		/* centre of last window */
	cursamp=framelen;	/* end of last window */
	curtime=framelen*spitem.frameduration;	/* time at end of last window */

	/* processing loop */
	while (findwindow((int)(curtime*newrate/spitem.frameduration),&wrec)) {

		/* load signal */
		framelen = wrec.lsize+wrec.rsize;
		if (sfsread(fid,wrec.ssamp,framelen,isp)!=framelen) break;

		/* window signal */
		fullwindow(isp,risp,wisp,wrec.lsize,wrec.rsize);

		/* calculate overlap */
		if (wrec.fx==0)
			offset = (int)(wrec.lsize/newrate);
		else
			offset = (int)(wrec.lsize/newpitch);

		if (offset < (wrec.lsize/16)) offset = wrec.lsize/16;
		if (offset > ((31*wrec.lsize)/16)) offset = (31*wrec.lsize)/16;
		if ((lastsamp+offset-wrec.lsize) > cursamp)
			offset = cursamp-lastsamp+wrec.lsize;

		/* add in */
		if ((lastsamp+offset-wrec.lsize+framelen) >= bufsize) {
			fprintf(stderr,"? had to adjust framelen from %d => %d samples\n",
				framelen,bufsize - (lastsamp+offset-wrec.lsize));
			framelen = bufsize - (lastsamp+offset-wrec.lsize);
		}
		newsamp = overlapadd(risp,wisp,framelen,rosp,wosp,lastsamp+offset-wrec.lsize);

		if (verbose)
			printf("Out %7.4f-%.4f\tIn %7.4f-%.4f\tPeriod %6.4fs.\n",
				curtime-offset*spitem.frameduration,
				curtime+(newsamp-cursamp)*spitem.frameduration,
				wrec.ssamp*spitem.frameduration,
				(wrec.ssamp+framelen)*spitem.frameduration,
				offset*spitem.frameduration);

		curtime += (newsamp-cursamp)*spitem.frameduration;
		cursamp = newsamp;
		lastsamp += offset;

		/* purge output from time to time */
		if (cursamp >= (3*bufsize/4)) {
			outsynth(osp,rosp,wosp,bufsize/2);
			/* write out first half */
			sfswrite(ofid,bufsize/2,osp);
			memcpy((char *)osp,(char *)(osp+bufsize/2),bufsize);
			memcpy((char *)rosp,(char *)(rosp+bufsize/2),bufsize*2);
			memcpy((char *)wosp,(char *)(wosp+bufsize/2),bufsize*2);
			memset((char *)(osp+bufsize/2),0,bufsize);
			memset((char *)(rosp+bufsize/2),0,bufsize*2);
			memset((char *)(wosp+bufsize/2),0,bufsize*2);
			cursamp -= bufsize/2;
			lastsamp -= bufsize/2;
		}
	}

	/* write last buffer */
	outsynth(osp,rosp,wosp,cursamp);
	sfswrite(ofid,cursamp,osp);

	/* that's all folks */
	if (!sfsupdate(filename))
		error("update error on '%s'",filename);
	exit(0);
}	
