/* anspect -- calculate spectra from speech signals at selected times */

/* M.A.Huckvale - University College London */

/* version 1.0 - February 1993 */

#define PROGNAME "anspect"
#define PROGVERS "1.0"
char *progname=PROGNAME;

/*--------------------------------------------------------------------------*/
/**MAN
.TH ANSPECT SFS1 UCL SFS
.SH NAME
anspect -- calculate and display spectra from speech at selected times
.SH SYNOPSIS
.B anspect
(-i item) (-w window) (-a anlabel|-t time|-T timefile) (-n) (-m) (-p) (-P) file
.SH DESCRIPTION
.I anspect
is a program to display a cross-section spectrum of a speech signal of
a given analysis window size at a give time in the signal.
.I anspect
can also plot a number of spectra from a file containing a list
of times, normalise spectra and calculate the mean spectrum.
.PP
.I Options
and their meanings are:
.TP 11
.B -I
Identify program and exit.
.TP 11
.BI -i item
Select input item.
.TP 11
.BI -w window
Specify the analysis window size in seconds.  Default: 0.020s.
.TP 11
.B -c
Perform cepstral smoothing on the resulting spectrum.
.TP 11
.B -P
Do not pre-emphasise the signal prior to analysis.
.TP 11
.BI -a anlabel
Specify an annotation label that specifies a time at which to
calculate the spectrum.
.TP 11
.BI -t time
Specify the time at which to calculate the spectrum.
.TP 11
.BI -T timefile
Specify a file containing times (one per line) at which to calculate
a number of spectra.  If filename is given as '-', then the times are
read from the standard input.
.TP 11
.B -n
Normalise the spectrum by subtracting the mean dB value from each frequency.
.TP 11
.B -m
Calculate the mean spectrum when more than one spectrum is calculated.
.TP 11
.B -p
Send the resulting graph directly to the printer.
.TP 11
.BI -f maxfreq
Set maximum frequency for displayed graph.
.SH INPUT ITEMS
.IP SP.xx 11
Any speech item.
.IP AN.xx 11
(Optional) Any annotation item.
.SH VERSION/AUTHOR
1.0 - Mark Huckvale.
.SH SEE ALSO
Espect.
*/
/*--------------------------------------------------------------------------*/

/* standard definitions */ 
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <math.h>
#include <string.h>
#include "sfs.h"  
#include "dig.h"
#include "dgraph.h"
#include "fft.h"

/* manifest constants */
#define DEFAULT_ANAL_WIDTH	0.020	/* default window size */
#define CEPCUT  		0.002	/* cepstrum cut-off frequency = 500 Hz */ 
#define GRAPH_RANGE_AMP		60.0
#define GRAPH_RANGE_FREQ	8000.0
#define GRAPH_Y_SIZE		GRAPH_RANGE_AMP+25.0	/* 50dB + 25 for axes */
#define GRAPH_X_SIZE		GRAPH_RANGE_FREQ+2000.0	/* 8000Hz + 2000 for axes */
#define GRAPH_Y_ORIGIN		12.5
#define GRAPH_X_ORIGIN		1000.0

#define MIN(x,y) (((x)<(y))?(x):(y))
#define MAX(x,y) (((x)>(y))?(x):(y))

/* global data */ 
struct item_header	spitem;	 	/* speech item header */ 
short 		*sp;		/* speech buffer */
struct item_header	anitem;		/* annotation item header */
struct an_rec		*an;		/* annotation table */
double		  	wisize = DEFAULT_ANAL_WIDTH;    /* analysis window size */ 
double			preemp = 0.95;  /* pre-emphasis factor */ 
int		     	docepst = 0;	/* cepstral smoothing */ 
int		     	dopremp = 1;	/* pre-emphasis */  
int			donorm = 0;	/* normalise spectra */
int			domean = 0;	/* calculate mean */
char			timefilename[SFSMAXFILENAME];	/* time file list */
int			dofile = 0;	/* do file of times */
double			witime = -1.0;	/* window time */
char			anlabel[256];	/* annotation label */
int			doprint = 0;	/* send picture to printer */
float			*coeff,*mcoeff;	/* analysis coefficients */
double			grafreq=GRAPH_RANGE_FREQ;
					/* top of frequency graph */
double			maxfreq;	/* top of calculated spectrum */
char			filename[SFSMAXFILENAME];       /* dbase file name */ 

/* forward function prototypes */
#ifdef __STDC__
void plotgraph(float *coeff,int	numc);
static int fftsub(short *, int, float *);
#else
void plotgraph();
static int fftsub();
#endif

/* main program */ 
void main(argc,argv)
int	argc;
char	*argv[]; 
{ 
	/* option decoding */ 
	extern int	optind;	 /* option index */ 
	extern char	*optarg;	/* option argument ptr */ 
	int		errflg = 0;     /* option error flag */ 
	int		c;	      /* option switch */ 
	int32		it;	     /* item selections */ 
	char		*ty; 
	char		*sptype="0";    /* default sub-type = last */ 
	char 		*antype="0";    /* default sub-type = last */ 
	/* file variables */ 
	int		fid; 
	int		i;
	int32		start;
	int32		numf;
	FILE		*fp=NULL;
	char		iline[128];
	/* processing variables */
	int		numc;
	float		emean;
	int		mcount=0;

	/* decode switches */ 
	while ((c = getopt(argc,argv,"Ii:w:cPa:t:T:nmpf:")) != EOF ) switch (c) { 
	case 'I' :      /* Identify */ 
		fprintf(stderr, "%s: Display spectral cross-sections V%s\n",PROGNAME, PROGVERS); 
		exit(0); 
		break; 
  	case 'i' :      /* specific item */ 
	    	if (itspec(optarg,&it,&ty) == 0) { 
			if (it == SP_TYPE)
				sptype = ty; 
		 	else if (it == AN_TYPE)
				antype = ty;
		     	else
				error("unsuitable item specifier %s",optarg); 
	   	}
		else
			error("illegal item specifier %s",optarg); 
		break; 
	case 'w' :      /* window size */ 
		wisize = atof(optarg); 
		break; 
	case 'c' :      /* cepstral smoothing required */ 
		docepst++; 
		break; 
       	case 'P' :      /* do not pre-emphasize */ 
		dopremp = 0;
		break;
	case 't' :	/* window time */
		witime = atof(optarg);
		break;
	case 'a' :	/* annotation time */
		strcpy(anlabel,optarg);
		break;
	case 'T' :	/* file of times */
		strcpy(timefilename,optarg);
		dofile++;
		break;
	case 'n' :	/* do normalise */
		donorm++;
		break;
	case 'm' :	/* do mean */
		domean++;
		break;
	case 'p' :	/* send to printer */
		doprint++;
		break;
	case 'f' :	/* set graph frequency */
		grafreq = atof(optarg);
		break;
	case '?' :      /* unknown */ 
		errflg++; 
	} 
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-w windowtime) (-c) (-P) (-a anlabel|-t time|-T timefile) (-n) (-m) (-p) (-f maxfreq) file", PROGNAME);

	/* get filename */ 
	if (optind < argc)
		strcpy(filename, sfsfile(argv[optind])); 
	else
		error("no database file specified", NULL);  

	/* check file ok */ 
	if ((fid = sfsopen(filename, "r", NULL)) < 0)
		error("access error on %s", filename);

	/* load annotations if required */ 
	if (anlabel[0]) {
		getitem(filename, AN_TYPE, antype, &anitem, (void **)&an);
		witime = -1.0;
		for (i=0;i<anitem.numframes;i++)
			if (strcmp(anlabel,an[i].label)==0) {
				witime = anitem.offset + an[i].posn*anitem.frameduration;
				break;
			}
		if (witime < 0)
			error("could not find annotation '%s' in input file",anlabel);
		free(an);
	}

	/* get time from file */
	if (dofile) {
		if (strcmp(timefilename,"-")==0)
			fp = stdin;
		else if ((fp=fopen(timefilename,"r"))==NULL)
			error("could not open '%s'",timefilename);
		if (!fgets(iline,128,fp))
			error("empty file: '%s'\n",timefilename);
		witime = atof(iline);
	}

	/* check everything OK */
	if ((wisize < 0) || (witime < 0))
		error("bad analysis window");

	/* locate speech data */ 
	if (!sfsitem(fid, SP_TYPE, sptype, &spitem))
		error("unable to find input speech item in %s", filename);  

	/* get analysis buffers */
	numf = (int)(wisize / spitem.frameduration);
	if ((sp=(short *)sfsbuffer(&spitem,numf))==NULL)
		error("could not get speech buffer");
	if ((coeff=(float *)calloc(numf+2,sizeof(float)))==NULL)
		error("could not get analysis buffer");
	if ((mcoeff=(float *)calloc(numf+2,sizeof(float)))==NULL)
		error("could not get analysis buffer");
	maxfreq = 0.5/spitem.frameduration;

	/* open graphics */
	digstart((char)((doprint)?DIG_DEFAULT_PRINTER:DIG_DEFAULT_TERM),NULL,1);
	digscale(1.2,1.2,0);
	digorigin(0.1,0.1);

	/* calculation loop */
	do {

		/* get the bit of speech to analyse */
		start = (int)((witime-spitem.offset)/spitem.frameduration);
		if (start > spitem.numframes)
			error("time off end of speech signal");
		start = MIN(spitem.numframes-numf,start);
		start = MAX(0,start);
		numf = sfsread(fid, start, numf, sp);

		/* perform FFT analysis on this window */
		numc = fftsub(sp, numf, coeff);  

		/* normalise if required */
		if (donorm) {
			emean = 0.0;
			for (i=0;i<numc;i++)
				emean += coeff[i];
			emean /= numc;
			for (i=0;i<numc;i++)
				coeff[i] -= emean;
		}
		
		/* plot graph */
		if (domean) {
			for (i=0;i<numc;i++)
				mcoeff[i] += coeff[i];
			mcount++;
		}
		else
			plotgraph(coeff,numc);

		/* get next window */
		if (dofile) {
			if (!fgets(iline,128,fp)) {
				if (fp != stdin) fclose(fp);
				dofile = 0;
			}
			else
				witime = atof(iline);
		}
		
	} while (dofile);

	if (domean) {
		for (i=0;i<numc;i++)
			mcoeff[i] /= (float)mcount;
		plotgraph(mcoeff,numc);
	}

	digquit(15);

	sfsclose(fid);
	exit(0); 
}  

/* plot a graph */
void plotgraph(coeff,numc)
float *coeff;
int	numc;
{
	float	x[2],y[2];
	int	i;
	float	maxamp;
	static int first=1;
	char	title[128],messg[32];

	/* first time - set amplitude axes */
	if (first) {
		/* find maximum amplitude and round up */
		maxamp = coeff[0];
		for (i=1;i<numc;i++) maxamp = MAX(maxamp,coeff[i]);
		maxamp = (float)(((int)maxamp+10)/10)*10;

		/* do axes */
		x[0] = 0.0;
		x[1] = grafreq;
		y[0] = maxamp-GRAPH_RANGE_AMP;
		y[1] = maxamp;

		dgraph((short *)x,(short *)y,2,DGfloat+DGlowax,DGfloat+DGlowax,DGaxonly,23242423,
			"","Frequency (Hz)","Amplitude (dB)");
		first=0;
	}

	/* plot graph */
	x[0]=0.0;
	x[1]=maxfreq;
	dgraph((short *)x,(short *)coeff,numc,DGfloat+DGsame+DGlast,
			DGfloat+DGsame,DGline,23,NULL,NULL,NULL);

	/* and title it */
	sprintf(title,"anspect: file=%s item=%d.%02d ",filename,spitem.datatype,spitem.subtype);
	if (anlabel[0]) {
		strcat(title,"label=");	
		strcat(title,anlabel);	
		strcat(title," ");
	}
	else if (timefilename[0]) {
		strcat(title,"timefile=");	
		strcat(title,timefilename);	
		strcat(title," ");
	}
	else {
		sprintf(messg,"time=%g ",witime);
		strcat(title,messg);
	}
	if (docepst) strcat(title,"smoothed ");
	if (donorm) strcat(title,"normalised ");
	if (domean) strcat(title,"mean");
	digprompt(title);		
	digflush();

}

/* static fft arrays */ 
static float *fftbuf;
static float *cepstbuf;

/* perform fft on speech waveform */ 
static int fftsub(sp, numf, coeff)
short 	*sp;
int	numf;
float	*coeff;
{ 
	int	i,j,pow,wlen,offset; 
	float   omega,val;
	float 	*xp;
	float 	*rp;

	/* find power of two required */ 
	pow  = 8;  /* minimum is 256 points */ 
	wlen = 256; 
	while (((numf-1) >> pow) > 0) { 
		pow++; 
		wlen *= 2; 
	} 
	if (pow > 13) { 
		fprintf(stderr, "analysis window truncated to 8192 points\n"); 
		wlen = 8192; 
		numf=8192; 
	}

	/* allocate memory for fftbuf and cepstbuf */
	fftbuf = (float *) calloc(wlen+2, sizeof(float));
	if (fftbuf == (float *)NULL)
		error("Not enough memory for FFT buffer.");
	if (docepst) {
		cepstbuf = (float *) calloc(wlen+2, sizeof(float));
		if (cepstbuf == (float *)NULL)
			error("Not enough memory for FFT buffer.");
	}

	/* put speech data in floating point buffer */ 
	offset = (wlen-numf)/2; 
	if (offset < 0) offset = 0; 
	xp = fftbuf; 
	for (i = 0; i < offset; i++)       *xp++ = 0.0; 
	for (; i < (numf+offset); i++) *xp++ = *sp++; 
	for (; i < wlen; i++)	      *xp++ = 0.0;  

	/* pre-emphasize and window */ 
	omega = 8.0 * atan(1.0) / (numf - 1); 
	xp    = fftbuf + numf + offset - 1;
	if (dopremp) {
		for (i = 0; i < numf-1; i++, xp--) {
		  *xp = (*xp - preemp * *(xp-1)) * (0.54 - (0.46 * cos(i * omega))); 
		}
		*xp *= 0.08;
	}
	else {
		for (i = 0; i < numf-1; i++, xp--) {
		  *xp *= (0.54 - (0.46 * cos(i * omega))); 
		}
		*xp *= 0.08;
	}
/*
printf("windowed signal=");
for (i=0;i<wlen;i++) printf("%g ",fftbuf[i]);
printf("\n");
*/
	/* perform FFT */ 
	CDFFT(fftbuf, wlen);		       
/*
printf("complex spectrum=");
for (i=0;i<wlen;i++) printf("%g ",fftbuf[i]);
printf("\n");
*/
	/* perform cepstral analysis if required */ 
	if (docepst) { 
		rp = cepstbuf; 
		xp = fftbuf + wlen + 1; 
		for (i = 0; i < wlen; i += 2) {	      /* backwards spectrum */ 
			val = 0.0001; 
			val += *xp * *xp; 
			xp--; 
			val += *xp * *xp; 
			xp--; 
			*rp++ = 10.0*log10(val); 
	  	} 
		xp = cepstbuf + wlen/2 - 1; 
		for (i = 0; i < wlen; i += 2) *rp++ = *xp--; /* forwards spectrum */  

		/* perform real-to-complex fft */ 
/*
printf("log spectrum=");
for (i=0;i<wlen;i++) printf("%g ",cepstbuf[i]);
printf("\n");
*/
		CDFFT(cepstbuf, wlen);			 /* +ve half of complex DFT */
/*
printf("complex cepstrum=");
for (i=0;i<wlen;i++) printf("%g ",cepstbuf[i]);
printf("\n");
*/
		/* zero out detail < 500Hz */ 
		j = (int)(CEPCUT/spitem.frameduration); 
		xp = &cepstbuf[2*j+2]; 
		for (i = j; i < wlen/2; i++) {		/* zero complex points */
			*xp++ = 0.0;			/* up to cepstbuf[wlen+1] */
			*xp++ = 0.0; 
		}  
/*
printf("zeroed complex cepstrum=");
for (i=0;i<wlen;i++) printf("%g ",cepstbuf[i]);
printf("\n");
*/
		/* do inverse fft */ 
		CDIFFT(cepstbuf, wlen);  
/*
printf("smoothed spectrum=");
for (i=0;i<wlen;i++) printf("%g ",cepstbuf[i]);
printf("\n");
*/
		/* copy smoothed energies into coefficient structure */ 
		xp = cepstbuf + wlen/2 - 1; 
		for (i = 0; i < wlen/2; i++)
			coeff[i] = *xp--; 
	} 
	else for (i = 0,xp=fftbuf+2; i < wlen/2; i++) { 
		val = 0.0001; 
		val += *xp * *xp;			     /* real value */
		xp++; 
		val += *xp * *xp;			     /* imaginary value */
		xp++; 
		coeff[i]=10.0*log10(val); 
	} 

	/* free FFT buffer memory */
	free(fftbuf);
	if (docepst) free(cepstbuf);

	return(wlen/2);	
} 
 
 
 

