/* anlist -- list an annotation item to standard output in form required by anload */

/* M.A.Huckvale - November 1988 */

/* version 1.0 */
/* version 1.1 - August 1989
	- add sample number outputs
*/
/* version 1.2 - March 1992
	- add -h HTK format output
*/
/* version 1.3 - July 1992
	- add -m anmap annotation mapping on output
*/
/* version 1.4 - September 1992
	- add -c flag to collapse adjacent identical output annotations
*/
/* version 1.5 - February 2002
	- add output file option
	- add ESPS format
*/
/* version 1.6 - June 2004
	- add -O option to write to filename modelled on SFS filename
*/
/* version 1.7 - April 2013
	- add Praat Textgrid output option
	- add CSV output option
*/

/*-------------------------------------------------------------------*/
/**MAN
.TH ANLIST SFS1 UCL
.SH NAME
anlist -- list an annotation item to a text description
.SH SYNOPSIS
.B anlist
(-i item) (-s rate|-S|-h|-e|-C|-P) (-m anmapfile) (-c) (-O|-o outfile) file
.SH DESCRIPTION
.I anlist
dumps the contents of an annotation item onto the standard output
in a form suitable for editting and reloading with
.I anload(SFS1).
The standard format of the output is one annotation per line, with two
entries: firstly the time in seconds, secondly the text label.
.PP
.B Options
.TP 11
.B -I
Identify the program version.
.TP 11
.BI -i item
Select input item.
.TP 11
.BI -s rate
Output sample numbers pre-supposing given sampling rate.
.TP 11
.B -S
Output annotations in sample number format. Sample rate is taken from
annotation item unless -s switch is used.
.TP 11
.B -h
Output annotations in HTK format, with start and stop times
expressed in 100ns units.
.TP 11
.B -e
Output annotations in ESPS format, with end times only labelled in
seconds.
.TP 11
.B -C
Output annotations in CSV format, with start times, durations and labels seperated by commas.
.TP 11
.B -P
Output annotations in Praat Textgrid format, with a single tier of intervals and labels.
.TP 11
.BI -m anmapfile
Map any annotations that match lines in the supplied file.  Format as for
.I anmap(SFS1)
program, namely two fields per line, input and output.
.TP 11
.B -c
Collapse together adjacent annotations sharing the same output label.
.TP 11
.BI -o outfile
Output to specified file.  Default is standard output.
.TP 11
.B -O
Output to a file with a name modelled on the SFS file name.
New name is basename.LAB.
.SH INPUT ITEMS
.IP AN 11
Any annotation item
.SH VERSION/AUTHOR
1.7 - Mark Huckvale
*/
/*---------------------------------------------------------------*/

/* program name and version */
#define	PROGNAME "anlist"
#define PROGVERS "1.7"
char	*progname = PROGNAME;

/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <string.h>
#include "sfs.h"		/* database filing system structures */
char	*strsave();

/* global data */
char			filename[SFSMAXFILENAME];	/* dbase file name */
struct item_header 	anitem;			/* annotation item header  */
struct an_rec		*an;			/* annotation record */
char			*antype="0";		/* annotation type */
double			srate= -1;
int			sampformat=0;		/* output in samples format */
int			htkformat=0;		/* output in HTK format */
int			espsformat=0;		/* output in ESPS format */
int			csvformat=0;		/* output in CSV format */
int			praatformat=0;		/* output in Praat textgrid format */
int			domap=0;		/* do annotation mapping */
int			dojoin=0;		/* join adjacent duplicates */
char		ofilename[256];	/* output file name */
int			genoname=0;		/* auto generate output name */

/* annotation mapping */
char	**anin,**anout;
int	nummap;
char	lastan[256];

/* map file line */
char	mline[256];

/* save string in dynamic memory */
char	*strsave(s)
char	*s;
{
	char	*p;
	int	len=strlen(s)+1;
	if ((p=malloc(len))==NULL)
		error("cannot save string",NULL);
	strcpy(p,s);
	return(p);
}

/* binary string search */
int strfind(s,t,num)
char	*s;
char	*t[];
int	num;
{
	int	i,j,k;
	int	c;

	if (num==0)
		return(-1);
	else {
		i=0;
		j=num-1;
		do {
			k=(i+j)/2;
			if ((c=strcmp(s,t[k])) > 0)
				i=k+1;
			else
				j=k-1;
		} while (c && (i <= j));
		if (c)
			return(-1);
		else
			return(k);
	}
}

/* maintain string table */
void strtable(s,t,num)
char	*s;
char	*t[];
int	*num;
{
	int	i;

	/* see if string in table */
	if (strfind(s,t,*num) < 0) {
		/* add to table */
		i = *num;
		while ((i>0) && (strcmp(s,t[i-1])<0)) {
			t[i] = t[i-1];
			i--;
		}
		t[i]=strsave(s);
		(*num)++;
	}
}

/* get entry from map file */
int	getmapentry(ip,ilab,olab)
FILE	*ip;
char	*ilab;
char	*olab;
{
	char	*p;

	if (fgets(mline,256,ip)) {
		/* get input */
		p=strtok(mline," \t\n");
		if (p && *p)
			strcpy(ilab,p);
		else
			*ilab='\0';
		/* get output */
		p=strtok(NULL," \t\n");
		if (p && *p)
			strcpy(olab,p);
		else
			*olab='\0';
		return(1);
	}
	return(0);
}

void processmap(mapname)
char	*mapname;
{
	char	ilab[256],olab[256];		/* temporary storage */
	FILE	*ip;
	int	idx;

	/* open map file */
	if ((ip=fopen(mapname,"r"))==NULL)
		error("cannot open map file '%s'",mapname);

	/* get number of annotations from map */
	nummap=0;
	while (getmapentry(ip,ilab,olab)) nummap++;

	/* get space for annotation table */
	if ((anin=(char **)calloc(nummap,sizeof(char *)))==NULL)
		error("cannot get memory for annotation table",NULL);
	if ((anout=(char **)calloc(nummap,sizeof(char *)))==NULL)
		error("cannot get memory for annotation table",NULL);

	/* get list of input annotations from map */
	nummap=0;
	rewind(ip);
	while (getmapentry(ip,ilab,olab))
		strtable(ilab,anin,&nummap);

	/* save output annotations */
	rewind(ip);
	while (getmapentry(ip,ilab,olab)) {
		idx=strfind(ilab,anin,nummap);
		anout[idx]=strsave(olab);
	}
	fprintf(stderr,"%d annotation mappings loaded from '%s'\n",nummap,mapname);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;			/* option index */
	extern char	*optarg;		/* option argument ptr */
	int		errflg = 0;		/* option error flag */
	int32		it;			/* item type */
	char		*ty="0";		/* sub-type = last */
	int		c;			/* option switch */
	double		atof();

	/* processing variables */
	int		i,j;
	int		fid;			/* input file descriptor */
	double		tim,etim;
	int		idx;
	FILE	*op;
	char	*p;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:s:Shem:co:OCP")) != EOF )
		switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: List AN item V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* input specification */
			if (itspec(optarg,&it,&ty)==0) {
				if (it==AN_TYPE)
					antype=ty;
				else
					error("unsuitable item specification: '%s'",optarg);
			}
			else
				error("illegal item specification: '%s'",optarg);
			break;
		case 's' :	/* sample mode with sampling rate */
			srate = atof(optarg);
			sampformat++;
			if (srate <= 0)
				error("bad sampling rate '%s'",optarg);
			break;
		case 'S' :	/* sample format */
			sampformat=1;
			htkformat=0;
			espsformat=0;
			csvformat=0;
			praatformat=0;
			break;
		case 'h' :	/* HTK format output */
			sampformat=0;
			htkformat=1;
			espsformat=0;
			csvformat=0;
			praatformat=0;
			break;
		case 'e' :	/* ESPS format output */
			sampformat=0;
			htkformat=0;
			espsformat=1;
			csvformat=0;
			praatformat=0;
			break;
		case 'C' :	/* CSV format output */
			sampformat=0;
			htkformat=0;
			espsformat=0;
			csvformat=1;
			praatformat=0;
			break;
		case 'P' :	/* Praat format output */
			sampformat=0;
			htkformat=0;
			espsformat=0;
			csvformat=0;
			praatformat=1;
			break;
		case 'm' :	/* map annotations */
			processmap(optarg);
			domap++;
			break;
		case 'c' :	/* join adjacent duplicates */
			dojoin++;
			break;
		case 'o':
			strcpy(ofilename,optarg);
			break;
		case 'O':
			genoname=1;
			break;
		case '?' :	/* unknown */
			errflg++;
	}

	/* check command line */
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-s rate|-h|-e|-S|-C|-P) (-m anmap) (-c) (-O|-o outfile) file",PROGNAME);

	/* get data filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open data file */
	if ((fid=sfsopen(filename,"r",NULL)) < 0) {
		if (fid==-1)
			error("cannot find file %s",filename);
		else
			error("access error on %s",filename);
	}

	/* locate input item */
	if (!sfsitem(fid,AN_TYPE,antype,&anitem))
		error("unable to find input annotations in '%s'",filename);
	if (srate<=0) srate = 1.0/anitem.frameduration;

	/* get output record buffer */
	if ((an=(struct an_rec *)sfsbuffer(&anitem,anitem.numframes))==NULL)
		error("could not get memory buffer",NULL);

	/* read in all annotations */
	if (sfsread(fid,0,anitem.numframes,an)!=anitem.numframes)
		error("read error on input");

	/* perform mapping */
	if (domap) for (i=0;i<anitem.numframes;i++) {
		if ((idx=strfind(an[i].label,anin,nummap)) >= 0)
			an[i].label = anout[idx];
	}

	/* collapse adjacencies */
	if (dojoin) {
		for (i=1,j=0;i<anitem.numframes;i++) {
			if (strcmp(an[j].label,an[i].label)==0)
				an[j].size += an[i].size;
			else {
				j++;
				an[j].posn = an[i].posn;
				an[j].size = an[i].size;
				an[j].label = an[i].label;
			}
		}
		anitem.numframes = j+1;
	}

	/* open output file */
	if (genoname) {
		strcpy(ofilename,filename);
		p=strrchr(ofilename,'.');
		if (p)
			strcpy(p,".lab");
		else
			strcat(ofilename,".lab");
		printf("Writing %s\n",ofilename);
	}
	if (ofilename[0]!='\0') {
		if ((op=fopen(ofilename,"w"))==NULL)
			error("could not open '%s'",ofilename);
	}
	else
		op=stdout;

	/* ESPS format has a leading comment (and annotation sometimes) */
	if (espsformat) {
		tim = anitem.offset + an[0].posn*anitem.frameduration;
		fprintf(op,"#\n");
		if (tim > 0)
			/* put in dummy annotation */
			fprintf(op,"%10.5f 121 #\n",tim);
	}

	/* Praat format has header */
	if (praatformat) {
		for (i=0,j=0;i<anitem.numframes;i++) {
			if (an[i].label[0]!='\0') {
				if (j==0) tim = anitem.offset + an[i].posn*anitem.frameduration;
				etim = anitem.offset + (an[i].posn+an[i].size)*anitem.frameduration;
				j++;
			}
		}
		fprintf(op,"File type = \"ooTextFile\"\n");
		fprintf(op,"Object class = \"TextGrid\"\n");
		fprintf(op,"\n");
		fprintf(op,"xmin = %.5f\n",tim);
		fprintf(op,"xmax = %.5f\n",etim);
		fprintf(op,"tiers? <exists>\n");
		fprintf(op,"size = 1\n");
		fprintf(op,"item []:\n");
		fprintf(op,"    item [1]:\n");
		fprintf(op,"\tclass = \"IntervalTier\"\n");
		fprintf(op,"\tname = \"%s\"\n",params(anitem.history,"type","annotation"));
		fprintf(op,"\txmin = %.5f\n",tim);
		fprintf(op,"\txmax = %.5f\n",etim);
		fprintf(op,"\tintervals: size = %d\n",j);
	}

	/* CSV format has header */
	if (csvformat) fprintf(op,"START,DURATION,LABEL\n");

	/* print annotations */
	for (i=0,j=0;i<anitem.numframes;i++) {
		tim = anitem.offset + an[i].posn*anitem.frameduration;
		if (an[i].label[0]) {
			if (htkformat) {
				etim = tim + an[i].size*anitem.frameduration;
				p=strtok(an[i].label," ");
				while (p && *p) {
					if ((*p=='-') || (*p=='/') || ((*p>='0') && (*p<='9')))
						fprintf(op,"%d %d L%s\n",
							(int)(0.5+tim*10000000L),
							(int)(0.5+etim*10000000L),
							p);
					else
						fprintf(op,"%d %d %s\n",
							(int)(0.5+tim*10000000L),
							(int)(0.5+etim*10000000L),
							p);
					p=strtok(NULL," ");
				}
			}
			else if (espsformat) {
				etim = tim + an[i].size*anitem.frameduration;
				fprintf(op,"%10.5f 121 %s\n",
						etim,an[i].label);
			}
			else if (csvformat) {
				fprintf(op,"%.5f,%.5f,\"%s\"\n",tim,an[i].size*anitem.frameduration,an[i].label);
			}
			else if (praatformat) {
				etim = tim + an[i].size*anitem.frameduration;
				fprintf(op,"\tintervals [%d]:\n",j+1);
				fprintf(op,"\t    xmin = %.5f\n",tim);
				fprintf(op,"\t    xmax = %.5f\n",etim);
				fprintf(op,"\t    text = \"%s\"\n",an[i].label);
			}
			else if (sampformat)
				fprintf(op,"%8d %s\n",(int)(0.5+tim*srate),an[i].label);
			else
				fprintf(op,"%10.5f %s\n",tim,an[i].label);
			j++;
		}
	}
	if (op!=stdout) fclose(op);

	/* that's all folks */
	sfsclose(fid);
	exit(0);

}

