/* enhance -- perform enhancement of clean speech signals */

/* M.A.Huckvale - University College London */

/* version 1.0 - January 1998 */
/* version 1.1 - September 2001 John Rye - fixed zero buffer bug*/
/* version 1.2 - January 2002
	- add support for fixed scaling of regions
*/
/* version 2.0 - January 2002
	- restructure and add spectral subtraction
*/

#define PROGNAME "enhance"
#define PROGVERS "2.0"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH ENHANCE SFS1 UCL
.SH NAME
enhance -- enhance clean speech signal
.SH SYNOPSIS
.B enhance
(-i item) (-c mu-law|-f fixdB|-s spectralsubdegree) (-A label|-X label) file
.SH DESCRIPTION
.I enhance
is a program to perform a range of enhancement operations on
speech signals.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and exit.
.TP 11
.BI -i item
Select input item.
.TP 11
.BI -c mulaw
Perform amplitude compression on the signal.  This reduces the
dynamic range of the signal, making quieter components more
salient.  A mu-law compression function is used:
Select compression-style enhancement using a mu-law compression function:
output_energy = log(1+mu.input_energy)/log(1+mu).  A mu value of 10 gives
modest compression.
.TP 11
.BI -s spectsubdegree
Perform spectral subtraction on the signal.  This reduces the amount
of background noise in the signal by subtracting a fraction of the
quietest frames from all frames (in the frequency domain).  The degree
is a percentage factor on the energy found in the quietest frames.
A degree of 100 gives modest background noise removal.
.TP 11
.BI -f fixdB
Select fixed amplitude scaling by the specified number of decibels.
.TP 11
.BI -A label
Only process the regions annotated with this label.  You can specify
multiple regions.
.TP 11
.BI -X label
Process all regions except those annotated with this label.  You can
specify multiple regions.
.SH INPUT ITEMS
.IP 1.xx 11
Any speech item.
.SH VERSION/AUTHOR
.nf
1.0 - Mark Huckvale.
1.1 - John Rye.
2.0 - Mark Huckvale.
.fi
*/
/*--------------------------------------------------------------------------*/


/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "sfs.h"			/* header structures */
#include "libic.h"

/* manifest constants */
#define WINTIME	0.030		/* process in 30ms windows */
#define DBRANGE	40.0		/* dynamic range */

/* global data */
struct item_header spitem;	/* input item header data */
struct item_header opitem;	/* output item header */
struct item_header anitem;	/* annotation item */
struct an_rec *an;

/* annotation lists */
#define MAXLABEL	32
char	*label[MAXLABEL];
int	nlabel=0;
int	exclude=0;

/* buffering */
short	*buff;
short	*hold;
double	*window;
int	buffsize;
double	maxenergy;

/* operations selected */
int		docompression=0;	/* compression mode */
double	mu=10.0;			/* mu value for compression */
int		dospectsub=1;		/* spectral substraction */
double	subdegree=100;		/* subtraction stremgth */
int 	dofixscale=0;		/* fixed scaling */
double	fixscale=0;			/* scaling factor (dB) */

/* save string */
char *strsave(char *str)
{
	char *ptr=malloc(strlen(str)+1);
	strcpy(ptr,str);
	return(ptr);
}

/* check if annotation on list */
int onlist(char *str)
{
	int	i;
	for (i=0;i<nlabel;i++)
		if (strcmp(label[i],str)==0)
			return(1);
	return(0);
}

/* check window against annotations: 0=enhance, 1=don't enhance */
int checkwin(double start,double stop)
{
	double	t1,t2;
	int	i,j,match;

	/* if no annotations always enhance */
	if ((nlabel==0)&&(exclude==0)) return(0);

	/* check annotated region */
	for (i=0;i<anitem.numframes;i++) {
		/* check if this annotation mentioned on command line */
		match=onlist(an[i].label);
		/* run forward to end of equivalent block */
		for (j=i+1;j<anitem.numframes;j++)
			if (onlist(an[j].label)!=match) break;
		j--;
		/* find start and stop time of block */
		t1 = anitem.offset + an[i].posn*anitem.frameduration;
		t2 = anitem.offset + (an[j].posn + an[j].size)*anitem.frameduration;
		/* check whether to exclude */
		if (exclude) {
			if (match) {
				/* exclude this window only if completely in region */
				if ((t1 <= start) && (stop <= t2))
					return(1);
			}
		}
		else {
			if (match) {
				/* include this window if any part in region */
				if ((t1 <= start) && (start <= t2))
					return(0);
				if ((t1 <= stop) && (stop <= t2))
					return(0);
			}
		}
	}
	return(!exclude);
}


/* main program */
void main(argc,argv)
int argc;
char *argv[];
{
	/* local variables */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument */
	int		errflg=0;	/* option error flag */
	int		c;		/* option char */
	int		it;
	char		*ty;
	char		*sptype="0";
	char		*antype="0";
	char		filename[SFSMAXFILENAME];
					/* database file name */
	int		fid,ofid;
	int		i,j;
	double		energy;
	double		minenergy, minsumsq;
	double		newen;
	double		sumsq,factor;
	double		val;
	double		omega;
	int		overload=0;

	REAL		*fsp,*wsp,*mag2,*minmag2,*lstmag2;
	COMPLEX		*spect;
	double		smooth;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:c:a:A:X:f:s:")) != EOF )
		switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Enhance speech signal V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* item spec */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == SP_TYPE)
					sptype = ty;
				else if (it == AN_TYPE)
					antype = ty;
				else
					error("unsuitable item specification %s",optarg);
			}
			else
				error("illegal item specification %s",optarg);
			break;
		case 'c' :	/* amplitude compression mode */
		case 'a' :	/* amplitude compression mode */
			mu = atof(optarg);
			docompression=1;
			dospectsub=0;;
			dofixscale=0;
			break;
		case 'f' :	/* fix scaling (in decibels) */
			fixscale = atof(optarg);
			docompression=0;
			dospectsub=0;
			dofixscale=1;
			break;
		case 's' :	/* spectral subtraction */
			subdegree = atof(optarg);
			docompression=0;
			dospectsub=1;
			dofixscale=0;
			break;
		case 'A' :	/* add annotated region */
			if (nlabel && exclude)
				error("cannot mix -A and -X switches");
			exclude=0;
			label[nlabel++] = strsave(optarg);
			break;
		case 'X' :	/* exclude annotated region */
			if (nlabel && !exclude)
				error("cannot mix -A and -X switches");
			exclude=1;
			label[nlabel++] = strsave(optarg);
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-c mu-value|-f fixdB|-s spectsubdegree) (-A addlabel|-X exclabel) file\n",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0) {
		if (fid==-1)
			error("unable to find file '%s'",filename);
		else
			error("access error on '%s'",filename);
	}

	if (nlabel>0) {
		/* locate input item */
		if (!sfsitem(fid,AN_TYPE,antype,&anitem))
			error("unable to find input item in '%s'",filename);
		an = (struct an_rec *)sfsbuffer(&anitem,anitem.numframes);
		sfsread(fid,0,anitem.numframes,an);
	}

	/* locate input item */
	if (!sfsitem(fid,SP_TYPE,sptype,&spitem))
		error("unable to find input item in '%s'",filename);

	/* calculate analysis window size in samples */
	buffsize=256;
	while (buffsize < (int)(WINTIME/spitem.frameduration))
		buffsize *= 2;
	if (buffsize > 1024) buffsize=1024;

	/* choose a smoothing factor for noise */
	smooth = exp(-(buffsize/2)*spitem.frameduration/0.1);

	/* get buffers */
	buff = (short *)sfsbuffer(&spitem,buffsize);
	hold = (short *)sfsbuffer(&spitem,buffsize);
	memset(hold,0,buffsize*sizeof(short));
	window = (double *)calloc(buffsize,sizeof(double));
	fsp = calloc(buffsize,sizeof(REAL));
	wsp = calloc(buffsize,sizeof(REAL));
	mag2 = calloc(buffsize,sizeof(REAL));
	lstmag2 = calloc(buffsize,sizeof(REAL));
	minmag2 = calloc(buffsize,sizeof(REAL));
	spect = calloc(buffsize,sizeof(COMPLEX));

	/* create output item header */
	sfsheader(&opitem,spitem.datatype,spitem.floating,
			spitem.datasize,spitem.framesize,
			spitem.frameduration,spitem.offset,
			spitem.windowsize,spitem.overlap,spitem.lxsync);
	if (docompression) {
		if (nlabel==0)
			sprintf(opitem.history,"%s(%d.%02d;compression,mu=%g)",
				PROGNAME,
				spitem.datatype,spitem.subtype,mu);
		else {
			sprintf(opitem.history,"%s(%d.%02d,%d.%02d;compression,mu=%g%s%s",
				PROGNAME,
				spitem.datatype,spitem.subtype,
				anitem.datatype,anitem.subtype,mu,
				(exclude)?",exclude":"",
				(nlabel>0)?",labels=":"");
			for (i=0;i<nlabel;i++) {
				if (i) strcat(opitem.history,",");
				strcat(opitem.history,label[i]);
			}
			strcat(opitem.history,")");
		}
	}
	else if (dospectsub) {
		if (nlabel==0)
			sprintf(opitem.history,"%s(%d.%02d;spectral-sub,degree=%g)",
				PROGNAME,
				spitem.datatype,spitem.subtype,subdegree);
		else {
			sprintf(opitem.history,"%s(%d.%02d,%d.%02d;spectral-sub,degree=%g%s%s",
				PROGNAME,
				spitem.datatype,spitem.subtype,
				anitem.datatype,anitem.subtype,subdegree,
				(exclude)?",exclude":"",
				(nlabel>0)?",labels=":"");
			for (i=0;i<nlabel;i++) {
				if (i) strcat(opitem.history,",");
				strcat(opitem.history,label[i]);
			}
			strcat(opitem.history,")");
		}
	}
	else if (dofixscale) {
		if (nlabel==0)
			sprintf(opitem.history,"%s(%d.%02d;scale,factor=%g)",
				PROGNAME,
				spitem.datatype,spitem.subtype,fixscale);
		else {
			sprintf(opitem.history,"%s(%d.%02d,%d.%02d;scale,factor=%g%s%s",
				PROGNAME,
				spitem.datatype,spitem.subtype,
				anitem.datatype,anitem.subtype,fixscale,
				(exclude)?",exclude":"",
				(nlabel>0)?",labels=":"");
			for (i=0;i<nlabel;i++) {
				if (i) strcat(opitem.history,",");
				strcat(opitem.history,label[i]);
			}
			strcat(opitem.history,")");
		}
	}

	/* open output channel */
	if ((ofid=sfschannel(filename,&opitem)) < 0)
		error("unable to open output file",NULL);

	/* calculate raised cosine window */
	omega = 8.0*atan(1.0)/(buffsize-1);
	for (i=0;i<buffsize;i++) {
		window[i] = 0.5 - 0.5*cos(i*omega);
	}

	/* find window with maximum energy */
	maxenergy = 0;
	for (i=0;sfsread(fid,i,buffsize,buff)==buffsize;i+=buffsize/2) {

		/* calculate energy in window */
		sumsq = 0;
		for (j=0;j<buffsize;j++) {
			val = buff[j] * window[j];
			sumsq += val * val;
		}
		energy = 10*log10(sumsq/buffsize);
		if (energy > maxenergy) maxenergy = energy;


		/* record information for spectral subtraction */
		if (dospectsub) {
			for (j=0;j<buffsize;j++)
				fsp[j] = buff[j] * window[j];
			rfft(fsp,spect,buffsize);
			for (j=0;j<buffsize;j++)
				mag2[j] = spect[j].r*spect[j].r + spect[j].i*spect[j].i;
			if (i==0) {
				for (j=0;j<buffsize;j++)
					lstmag2[j] = mag2[j];
			}
			else {
				for (j=0;j<buffsize;j++)
					lstmag2[j] = smooth*lstmag2[j] + (1-smooth)*mag2[j];
			}
			if (i==0) {
				for (j=0;j<buffsize;j++)
					minmag2[j] = lstmag2[j];
			}
			else {
				for (j=0;j<buffsize;j++)
					if (lstmag2[j] < minmag2[j])
						minmag2[j] = lstmag2[j];
			}
		}

	}

	if (dospectsub) {
		/* get noise magnitude */
		for (j=0;j<buffsize;j++) minmag2[j] = sqrt(minmag2[j]);

		/* choose a smoothing factor for speech */
		smooth = exp(-(buffsize/2)*spitem.frameduration/0.04);
	}

	printf("Maximum energy in window = %.2fdB\n", maxenergy);
	minenergy = maxenergy - DBRANGE;
	minsumsq = buffsize * pow(10.0, minenergy/10);

	/* process file */
	for (i=0;sfsread(fid,i,buffsize,buff)==buffsize;i+=buffsize/2) {

		/* calculate energy in window */
		sumsq = 0;
		for (j=0;j<buffsize;j++) {
			val = buff[j] * window[j];
			sumsq += val * val;
		}

		/* avoid log error if zero valued buffer JMR */
		if (sumsq<minsumsq)
			energy = minenergy - 1;
		else
			energy = 10*log10(sumsq/buffsize);

		/* calculate new energy value */
		if (energy < minenergy)
			newen = energy;
		else
			newen = minenergy + DBRANGE*log(1+mu*(energy-minenergy)/DBRANGE) / log(1+mu);

		if (docompression) {
			/* calculate multiplying factor */
			if (checkwin(spitem.offset+i*spitem.frameduration,
				     spitem.offset+(i+buffsize-1)*spitem.frameduration))
				factor = 1.0;
			else
				factor = pow(10.0,(newen-energy)/20);

			/* scale output and add to hold buffer */
			for (j=0;j<buffsize;j++) {
				val = hold[j] + buff[j] * window[j] * factor;
				if (val < -32768) {
					overload=1;
					hold[j] = -32768;
				}
				else if (val > 32767) {
					overload=1;
					hold[j] = 32767;
				}
				else
					hold[j] = (short)val;
			}
		}
		else if (dofixscale) {
			/* calculate multiplying factor */
			if (checkwin(spitem.offset+i*spitem.frameduration,
				     spitem.offset+(i+buffsize-1)*spitem.frameduration))
				factor = 1.0;
			else
				factor = pow(10.0,fixscale/20);

			/* scale output and add to hold buffer */
			for (j=0;j<buffsize;j++) {
				val = hold[j] + buff[j] * window[j] * factor;
				if (val < -32768) {
					overload=1;
					hold[j] = -32768;
				}
				else if (val > 32767) {
					overload=1;
					hold[j] = 32767;
				}
				else
					hold[j] = (short)val;
			}
		}
		else if (dospectsub) {
			for (j=0;j<buffsize;j++)
				fsp[j] = buff[j] * window[j];
			rfft(fsp,spect,buffsize);
			for (j=0;j<buffsize;j++)
				mag2[j] = sqrt(spect[j].r*spect[j].r + spect[j].i*spect[j].i);
			if (i==0) {
				for (j=0;j<buffsize;j++)
					lstmag2[j] = mag2[j];
			}
			else {
				for (j=0;j<buffsize;j++)
					lstmag2[j] = smooth*lstmag2[j] + (1-smooth)*mag2[j];
			}
			for (j=0;j<buffsize;j++) {
				factor = (lstmag2[j]-subdegree*minmag2[j]/100)/lstmag2[j];
			if (factor < 0.02) factor=0.02;
				spect[j].r *= factor;
				spect[j].i *= factor;
			}
			irfft(fsp,spect,buffsize);
			if (checkwin(spitem.offset+i*spitem.frameduration,
				     spitem.offset+(i+buffsize-1)*spitem.frameduration)==0) {
				for (j=0;j<buffsize;j++)
					hold[j] = (short)(hold[j] + fsp[j]);
			}
			else {
				for (j=0;j<buffsize;j++)
					hold[j] = (short)(hold[j] + buff[j] * window[j]);
			}
		}

		/* write out result of processing */
		if (sfswrite(ofid,buffsize/2,hold) != buffsize/2)
			error("write error on output file",NULL);

		/* shift hold buffer and reset */
		for (j=0;j<buffsize/2;j++) {
			hold[j] = hold[buffsize/2 + j];
			hold[buffsize/2 + j] = 0;
		}
	}

	/* write out result of processing last buffer */
	if (sfswrite(ofid,buffsize/2,hold) != buffsize/2)
		error("write error on output file",NULL);
	if (overload)
		fprintf(stderr,"%s: WARNING - overload on output\n",PROGNAME);

	/* update file */
	if (!sfsupdate(filename))
		error("update error on %s",filename);

	free(fsp);
	free(wsp);
	free(mag2);
	free(minmag2);
	free(spect);

	/* ... that's all folks */
	exit(0);
}

