/* anxml -- list an annotation item to standard output in XML */

/* M.A.Huckvale - November 1997 */

/* version 1.0 */
	
/*-------------------------------------------------------------------*/
/**MAN
.TH ANXML SFS1 UCL
.SH NAME
anxml -- list an annotation item to XML format
.SH SYNOPSIS
.B anxml
(-i item) (-t tag) (-p pausedur) (-P pausesym) (-u utterancetag) (-c) file
.SH DESCRIPTION
.I anxml
dumps the contents of an annotation item onto the standard output
in XML format.
Each annotation is enclosed in a tag of the given type with attributes
'START' and 'END'.  The default tag type is 'WORD'.  An identifier
of form <tag><index> is given to each tag.
.PP
.B -I
Identify the program version.
.TP 11
.BI -i item
Select input item.
.TP 11
.BI -t tag
Name of tag to use.  Default: 'WORD'.
.TP 11
.BI -p pausedur
Select pause detection mode, in which the unit sequence is divided into
utterances at pauses.  The minimum duration of a pause in seconds
is given as an argument.  Default: Off.
.BI -P pausesym
Use one or more times to indicate the annotation labels to be
taken to indicate pauses.  Default: '##'.
.BI -u utterancetag
In pause detection mode, indicates the tag to be used to separate
utterances.  Default: 'UTT'.
.SH INPUT ITEMS
.IP AN 11
Any annotation item
.SH VERSION/AUTHOR
1.0 - Mark Huckvale
.SH BUGS
The last annotation has the same start and end time.
*/
/*---------------------------------------------------------------*/

/* program name and version */
#define	PROGNAME "anxml"
#define PROGVERS "1.0"
char	*progname = PROGNAME;

/* global declarations */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <malloc.h>
#include <string.h>
#include <math.h>
#include "sfs.h"		/* database filing system structures */

/* global data */
char			filename[SFSMAXFILENAME];	/* dbase file name */
struct item_header 	anitem;			/* annotation item header  */
struct an_rec		*an;			/* annotation record */
char			*antype="0";		/* annotation type */
char			tname[256]="WORD";	/* unit tag */
char			uname[256]="UTT";	/* utterance tag */
int			dopause=0;		/* pause mode */
double			pausedur=0.0;		/* pause threshold */
#define MAXPAUSESYM 100
char			*pausesym[MAXPAUSESYM];	/* pause symbols */
int			npausesym=0;		/* # pause symbols */

char *strsave(char *str)
{
	char *ptr=malloc(strlen(str)+1);
	strcpy(ptr,str);
	return(ptr);
}

/* check if annotation is a pause */
int ispause(struct an_rec *an)
{
	int	i;
	double	dur;
	
	for (i=0;i<npausesym;i++)
		if (strcmp(an->label,pausesym[i])==0) break;
	if (i==npausesym) return(0);
	dur = an->size * anitem.frameduration;
	if (dur < pausedur)
		return(0);
	else
		return(1);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;			/* option index */
	extern char	*optarg;		/* option argument ptr */
	int		errflg = 0;		/* option error flag */
	int32		it;			/* item type */
	char		*ty="0";		/* sub-type = last */
	int		c;			/* option switch */

	/* processing variables */
	int		i,j;
	double		stim,etim;
	int		uttno,segno;
	int		inutt;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:t:p:P:u:")) != EOF )
		switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: List AN item in XML V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* input specification */
			if (itspec(optarg,&it,&ty)==0) {
				if (it==AN_TYPE)
					antype=ty;
				else
					error("unsuitable item specification: '%s'",optarg);
			}
			else
				error("illegal item specification: '%s'",optarg);
			break;
		case 't' :	/* select unit tag name */
			strcpy(tname,optarg);
			break;
		case 'u' :	/* select utterance tag name */
			strcpy(uname,optarg);
			break;
		case 'p' :	/* pause mode and threshold */
			dopause=1;
			pausedur = atof(optarg);
			break;
		case 'P' :	/* pause symbol */
			pausesym[npausesym++] = strsave(optarg);
			break;			
		case '?' :	/* unknown */
			errflg++;
	}

	/* check command line */
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-t tag) (-p pausethresh) (-P pausesym) (-u utterancetag) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,argv[optind]);
	else
		error("no filename specified");

	/* load annotation item into memory */
	getitem(filename,AN_TYPE,antype,&anitem,(void **)&an);
	
	/* print document type declaration */
	printf("<?XML version=\"1.0\"?>\n");
	printf("<!DOCTYPE AN [\n");
	if (dopause) {
		printf("<!ELEMENT AN (%s)* >\n",uname);
		printf("<!ELEMENT %s (%s)* >\n",uname,tname);
	}
	else {
		printf("<!ELEMENT AN (%s)* >\n",tname);
	}
	printf("<!ELEMENT %s (#PCDATA)>\n",tname);
	if (dopause) {
		printf("<!ATTLIST %s\n",uname);
		printf("\tID\tID\t#required\n");
		printf("\tSTART\tCDATA\t#required\n");
		printf("\tEND\tCDATA\t#required >\n");
	}
	printf("<!ATTLIST %s\n",tname);
	printf("\tID\tID\t#required\n");
	printf("\tSTART\tCDATA\t#required\n");
	printf("\tEND\tCDATA\t#required >\n");
	printf("]>\n");
	
	/* print annotations */
	printf("<AN>\n");
	inutt=0;
	uttno=0;
	segno=0;
	for (i=0;i<anitem.numframes;i++) {
		stim = anitem.offset + an[i].posn*anitem.frameduration;
		etim = stim + an[i].size*anitem.frameduration;
		if (dopause && ispause(&an[i])) {
			if (inutt) {
				printf("</%s>\n",uname);
				inutt=0;
			}
			/* find next pause */
			stim = etim;
			for (j=i+1;j<anitem.numframes;j++) {
				etim = anitem.offset + an[j].posn*anitem.frameduration;
				if (ispause(&an[j])) break;
			}
			if (etim > stim) {
				printf("<%s ID=\"%s%d\" START=\"%.4f\" END=\"%.4f\">\n",
					uname,uname,++uttno,stim,etim);
				inutt=1;
			}
		}
		else
			printf("<%s ID=\"%s%d\" START=\"%.4f\" END=\"%.4f\">%s</%s>\n",
				tname,tname,++segno,stim,etim,an[i].label,tname);
	}
	if (inutt) printf("</%s>\n",uname);
	printf("</AN>\n");

	/* that's all folks */
	exit(0);

}

