/* wordchop -- chop words out of an annotated file */

/* M.A.Huckvale - University College London */

/* version 1.0 - June 1994 */

#define PROGNAME "wordchop"
#define PROGVERS "1.2"
char *progname=PROGNAME;

/*-------------------------------------------------------------------------*/
/**MAN
.TH WORDCHOP SFS1 UCL
.SH NAME
wordchop -- chop word annotated regions of a data file
.SH SYNOPSIS
.B wordchop
(-n wnum) (-s wnum) (-e wnum) (-i item) (-l) (-o) (-d outputdirectory) file
.SH DESCRIPTION
.I wordchop
is a program to chop out annotated regions of a data file.  It assumes that individual
words are annotated at the start and end.  If an annotation is labelled simply '/', then
it is skipped.  New SFS files are created for each annotated word, header information
is borrowed from the input SFS file.  The appropriate
part of every SP or LX item in the input file is copied or linked into the new file.
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -n wnum
Chop word number wnum only.  Default: chop all words.
.TP 11
.BI -s wnum
Chop starting at word wnum.  Default: word 1.
.TP 11
.BI -e enum
Finish chopping at word wnum.  Default: last word.
.TP 11
.B -l
Link items rather than copy.
.TP 11
.B -o
Overwrite existing files.
.TP 11
.BI -d outputdir
Specify the output directory name to use to prefix the
filenames generated from the annotations.
.SH INPUT ITEMS
.IP SP
Any speech items
.IP LX
Any Lx items.
.IP AN
Word annotations.
.SH VERSION/AUTHOR
.IP 1.2
Mark Huckvale
.SH SEE ALSO
divide
*/
/*--------------------------------------------------------------------------*/

#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <malloc.h>
#include "sfs.h"
#include "sfsdata.h"

/* maximum # items to transfer */
#define MAXXFER		16

/* maximum # words */
#define MAXWORDS	10000

/* transfer buffer size */
#define BUFSIZE		16384
#define MIN(x,y) (((x)<(y))?(x):(y))

/* global data */
struct main_header	head;
struct item_header	item[MAXXFER];
struct link_header	lhead[MAXXFER];
int			numxfer;
short			*sp;
struct item_header	anitem;
struct an_rec		*antab;
struct xtab_rec {
	double	stime;	/* start time for copy */
	double	sdur;	/* segment duration */
	char	*label;	/* annotation */
	char	*fname;	/* file name */
} xtab[MAXWORDS];
int	xnum;
char		filename[SFSMAXFILENAME]; /* SFS data file name */

/* operational modes */
int	wnum = -1;	/* chop all words */
int	snum = 1;
int	fnum = 1000000;
int	dolink=0;	/* link don't copy */
int	dozap=0;	/* overwrite existing files */
char	basename[256];

/* duplicate a string */
char	*strsave(str)
char	*str;
{
	char	*p;
	p = malloc(strlen(str)+1);
	strcpy(p,str);
	return(p);
}

/* create the copying data */
void createcopydata()
{
	int	i,j,k;
	char	fname[128];

	xnum=0;
	for (i=0;(i<anitem.numframes-1)&&(xnum < MAXWORDS);i++) {
		if (strcmp(antab[i].label,"/")) {
			xtab[xnum].stime = antab[i].posn*anitem.frameduration+anitem.offset;
			xtab[xnum].sdur = antab[i+1].posn*anitem.frameduration+anitem.offset - xtab[xnum].stime;
			if (basename[0])
				sprintf(fname,"%s%s.sfs",basename,antab[i].label);
			else
				sprintf(fname,"%s.sfs",antab[i].label);
			for (j=0,k=1;j<xnum;j++)
				if (strcmp(xtab[j].fname,fname)==0)
					sprintf(fname,"%s%d.sfs",antab[i].label,k++);
			xtab[xnum].label = antab[i].label;	/* alias */
			xtab[xnum].fname = strsave(fname);
			xnum++;
		}
	}
}

/* create linked items */
void makelink(fid,xr)
int	fid;
struct xtab_rec *xr;
{
	int	ofid;
	struct item_header lnitem;
	struct link_header lnlink;
	int	ssamp,esamp;
	int	i;
	char	ans[80];

	/* check for existing file */
	if (access(xr->fname,0)==0) {
		if (dozap) {
			if (unlink(xr->fname)!=0)
				error("failed to overwrite '%s'",xr->fname);
		}
		else {
			printf(" file exists, replace ? (y/n) : ");
			gets(ans);
			if (ans[0]=='y') {
				if (unlink(xr->fname)!=0)
					error("failed to overwrite '%s'",xr->fname);
			}
			else {
				printf("'%s' skipped.\n",xr->fname);
				return;
			}
		}
	}

	/* create new file */
	if ((ofid = sfsopen(xr->fname,"c",&head)) < 0)
		error("could not create '%s'",xr->fname);
	sfsclose(ofid);

	/* for each item to xfer */
	for (i=0;i<numxfer;i++) {

		/* take copy of item header */
		sfsheader(&lnitem,item[i].datatype,item[i].floating,
				item[i].datasize,item[i].framesize,
				item[i].frameduration,item[i].offset,
				item[i].windowsize,item[i].overlap,item[i].lxsync);

		ssamp = (int)((xr->stime-item[i].offset)/item[i].frameduration);
		esamp = (int)(((xr->stime+xr->sdur)-item[i].offset)/item[i].frameduration);

		/* do item history */
		sprintf(lnitem.history,"%s(file=%s,item=%d.%02d,start=%d,end=%d,history=%s)",
			PROGNAME,
			filename,
			item[i].datatype,
			item[i].subtype,
			ssamp,esamp,
			item[i].history);
		strcpy(lnitem.params,item[i].params);

		/* check input item not linked itself ! */
		if (item[i].datapresent==2) {
			/* take copy of link header */
			lnlink = lhead[i];
			lnlink.offset += (ssamp*item[i].datasize*item[i].framesize)*(lnlink.multiplex+1);
		}
		else {
			/* build link header */
			memset(&lnlink,0,sizeof(struct link_header));
			strcpy(lnlink.filename,filename);
			lnlink.filetype = SFS_TYPE;
			lnlink.datatype = item[i].datatype;
			lnlink.subtype = item[i].subtype;
			lnlink.offset = (int)(((xr->stime-item[i].offset)/item[i].frameduration)*item[i].datasize*item[i].framesize);
			lnlink.linkdate = item[i].processdate;
			lnlink.machine = item[i].machine;
		}

		/* write link to file */
		if (!sfswritelink(&lnitem,esamp-ssamp,&lnlink,xr->fname))
			error("write failed on temporary file",NULL);

	}
	if (!sfsupdate(xr->fname))
		error("update error on '%s'",xr->fname);
}

/* create copied items */
void makecopy(fid,xr)
int	fid;
struct xtab_rec *xr;
{
	int	ofid;
	struct item_header dummy;
	struct item_header lnitem;
	int	ssamp,esamp;
	int	i;
	char	ans[80];
	int	cnt,nframe;
	char	*buff;
	char	histbuff[1024];

	/* check for existing file */
	if (access(xr->fname,0)==0) {
		if (dozap) {
			if (unlink(xr->fname)!=0)
				error("failed to overwrite '%s'",xr->fname);
		}
		else {
			printf(" file exists, replace ? (y/n) : ");
			gets(ans);
			if (ans[0]=='y') {
				if (unlink(xr->fname)!=0)
					error("failed to overwrite '%s'",xr->fname);
			}
			else {
				printf("'%s' skipped.\n",xr->fname);
				return;
			}
		}
	}

	/* create new file */
	if ((ofid = sfsopen(xr->fname,"c",&head)) < 0)
		error("could not create '%s'",xr->fname);
	sfsclose(ofid);

	/* for each item to xfer */
	for (i=0;i<numxfer;i++) {

		/* take copy of item header */
		sfsheader(&lnitem,item[i].datatype,item[i].floating,
				item[i].datasize,item[i].framesize,
				item[i].frameduration,item[i].offset,
				item[i].windowsize,item[i].overlap,item[i].lxsync);

		ssamp = (int)((xr->stime-item[i].offset)/item[i].frameduration);
		esamp = (int)(((xr->stime+xr->sdur)-item[i].offset)/item[i].frameduration);

		/* do item history */
		sprintf(histbuff,"%s(file=%s,item=%d.%02d,start=%d,end=%d,history=%s)",
			PROGNAME,
			filename,
			item[i].datatype,
			item[i].subtype,
			ssamp,esamp,
			item[i].history);
		strncpy(lnitem.history,histbuff,sizeof(lnitem.history)-1);
		strcpy(lnitem.params,item[i].params);

		/* locate input item again */
		sprintf(ans,"%02d",item[i].subtype);
		if (!sfsitem(fid,item[i].datatype,ans,&dummy))
			error("failed to relocate '%s'",item[i].history);

		/* create output channel */
		if ((ofid = sfschannel(xr->fname,&lnitem)) < 0)
			error("failed to create output channel to '%s'",xr->fname);

		/* create buffer for data transfer */
		if (item[i].framesize > 0)
			nframe = BUFSIZE / (item[i].datasize*item[i].framesize) + 1;
		else
			nframe = 10;
		if ((buff = sfsbuffer(&item[i],nframe)) == NULL)
			error("could not get memory buffer",NULL);

		/* copy data */
		while (ssamp < esamp) {
			if ((cnt=sfsread(fid,ssamp,MIN(nframe,(esamp-ssamp)),buff))<=0) break;
			if (sfswrite(ofid,cnt,buff)!=cnt)
				error("write error on '%s'",xr->fname);
			ssamp += cnt;
		}

		free(buff);
	}
	if (!sfsupdate(xr->fname))
		error("update error on '%s'",xr->fname);
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item selection */
	char		*ty;		/* item sub type */
	char		*antype="0";
	/* file variables */
	int		fid;		/* input file descriptor */
	int		i;

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:n:s:e:lod:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Chop words from file V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == AN_TYPE)
					antype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'n' :	/* replay one word */
			wnum = atoi(optarg);
			break;
		case 's' :	/* start word # */
			snum = atoi(optarg);
			break;
		case 'e' :	/* end word # */
			fnum = atoi(optarg);
			break;
		case 'l' :	/* do link */
			dolink=1;
			break;
		case 'o' :	/* overwrite */
			dozap=1;
			break;
		case 'd' :	/* output directory */
			strcpy(basename,optarg);
			if (basename[strlen(basename)-1]!='/') strcat(basename,"/");
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-n wnum) (-s wnum) (-e wnum) (-l) (-o) (-d outputdir) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"r",&head)) < 0)
		error("access error on '%s'",filename);

	/* find annotations */
	if (!sfsitem(fid,AN_TYPE,antype,&anitem))
		error("could not find annotation item in '%s'",filename);

	/* load annotations */
	if ((antab = (struct an_rec *)sfsbuffer(&anitem,anitem.numframes))==NULL)
		error("could not get buffer");
	if (sfsread(fid,0,anitem.numframes,antab)!=anitem.numframes)
		error("read error on '%s'",filename);

	/* find all the speech and lx items */
	sfsnextitem(fid,NULL);
	numxfer=0;
	while (sfsnextitem(fid,&item[numxfer])) {
		if ((item[numxfer].datatype==SP_TYPE) ||
		    (item[numxfer].datatype==LX_TYPE)) {
			if (item[numxfer].datapresent==2) {
				/* item is linked - get link header */

				lseek(fid,sfsdata[fid]->datastart,0);
				read(fid,&lhead[numxfer],sizeof(struct link_header));
				sfsdata[fid]->currpos=lseek(fid,0L,1);
			}
			numxfer++;
		 }
	}

	/* create copying data */
	createcopydata();

	/* do the chopping */
	if (wnum > 0) snum = fnum = wnum;
	for (i=0;i<xnum;i++) {
		if (((i+1) >= snum) && ((i+1) <= fnum)) {
			printf("%d. %s -> %s",i+1,xtab[i].label,xtab[i].fname);
			fflush(stdout);
			if (dolink)
				makelink(fid,&xtab[i]);
			else
				makecopy(fid,&xtab[i]);
			printf("\n");
		}
	}

	/* that's all folks ! */
	sfsclose(fid);
	exit(0);
}

