/* sampa2ipa -- map SAMPA phonetic symbols to IPA unicode symbols */

/* M.A.Huckvale - University College London */

/* version 1.0 - July 2010 */

#define PROGNAME "sampa2ipa"
#define PROGVERS "1.0"
char *progname=PROGNAME;

/*-------------------------------------------------------------------*/
/**MAN
.TH SAMPA2IPA 1 UCL SFS
.SH NAME
sampa2ipa -- map SAMPA phonetic symbols to IPA Unicode characters
.SH SYNOPSIS
.B sampa2ipa
(-i item) (-t type) file
.SH DESCRIPTION
.I sampa2ipa
maps the characters used in annotation labels from SAMPA symbols
to IPA symbols. The symbols affected are "ADINOQSTUVZ@{3:". The unicode characters
are encoded in UTF-8 format.
.PP
.I Options:
.TP 11
.B -I
Identify the program name and version.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -t type
Specify type labels for output annotation item. Default: IPA.
.SH VERSION/AUTHOR
1.0 - Mark Huckvale
*/
/*---------------------------------------------------------------*/

/* include files */
#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <malloc.h>
#include <math.h>
#include "sfs.h"

/* global data */
char	filename[SFSMAXFILENAME];
struct item_header	anitem;		/* input annotation item */
struct item_header	opitem;		/* output annotation item */
struct an_rec		ian;
struct an_rec		oan;
char	labtype[256]="IPA";

/* mapping */
struct map_rec {
	unsigned char	in;			/* SAMPA */
	unsigned short	out;		/* IPA Unicode */
} mtab[]={
{ 'A',	0x0251 },
{ '{',	0x00E6 },
{ '6',	0x0250 },
{ 'Q',	0x0252 },
{ 'E',	0x025B },
{ '@',	0x0259 },
{ '3',	0x025C },
{ 'I',	0x026A },
{ 'O',	0x0254 },
{ '2',	0x00F8 },
{ '9',	0x0153 },
{ '&',	0x0276 },
{ 'U',	0x028A },
{ '}',	0x0289 },
{ 'V',	0x028C },
{ 'Y',	0x028F },
{ 'B',	0x03B2 },
{ 'C',	0x00E7 },
{ 'D',	0x00F0 },
{ 'G',	0x0263 },
{ 'L',	0x028E },
{ 'J',	0x0272 },
{ 'N',	0x014B },
{ 'R',	0x0281 },
{ 'S',	0x0283 },
{ 'T',	0x03B8 },
{ 'H',	0x0265 },
{ 'Z',	0x0292 },
{ '?',	0x0294 },
{ ':',	0x02D0 },
{ '"',	0x02C8 },
{ '%',	0x02CC },
};
#define MTAB_COUNT	(sizeof(mtab)/sizeof(struct map_rec))

/* map an annotation with UTF-8 coding*/
void mapan(unsigned char *ilab,unsigned char *olab)
{
	int	i;
	int	c,d;

	while ((c=*ilab++)!='\0') {
		for (i=0;i<MTAB_COUNT;i++)
			if (mtab[i].in==c) {
				d=mtab[i].out;
				if (d < 128)
					*olab++ = d;
			    else if (d < 2048) {
					*olab++ = 192 + d/64;
					*olab++ = 128 + (d&63);
				}
			    else if (d < 65536) {
					*olab++ = 224 + d/4096;
					*olab++ = 128 + (d&4095)/64;
					*olab++ = 128 + (d&63);
				}
			    else {
					*olab++ = 240 + d/262144;
					*olab++ = 128 + (d&262143)/4096;
					*olab++ = 128 + (d&4095)/64;
					*olab++ = 128 + (d&63);
				}
				break;
			}
		if (i==MTAB_COUNT) *olab++ = c;
	}
	*olab='\0';

}

/* main program */
void main(int argc,char *argv[])
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */

	/* processing variables */
	int		fid,ofid;
	FILE		*ip;
	int32		it;
	char		*ty;
	char		*antype="0";
	int		i,idx;
	char		*params();

	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:t:")) != EOF )
		switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Convert SAMPA annotation labels to IPA V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* item number */
			if (itspec(optarg,&it,&ty)==0) {
				if (it==AN_TYPE)
					antype=ty;
				else
					error("bad item specification",NULL);
			}
			else
				error("illegal item specification",NULL);
			break;
		case 't' :	/* output type */
			strcpy(labtype,optarg);
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	/* check command line */
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item) (-t newtype) file",PROGNAME);

	/* get data filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no data file specified",NULL);

	/* open data file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0)
		error("access error on '%s'",filename);

	/* find input annotation item */
	if (!sfsitem(fid,AN_TYPE,antype,&anitem))
		error("cannot find input AN item in '%s'",filename);

	/* get annotation buffer */
	ian.label=(char *)malloc(1024);
	oan.label=(char *)malloc(1024);

	/* create output item */
	sfsheader(&opitem,AN_TYPE,-1,1,-1,anitem.frameduration,anitem.offset,0,0,1);
	sprintf(opitem.history,"%s(%d.%02d;type=%s)",
			PROGNAME,
			anitem.datatype,anitem.subtype,
			labtype);

	/* open output channel */
	if ((ofid=sfschannel(filename,&opitem)) < 0)
		error("cannot open output channel to '%s'",filename);

	/* process annotations in sequence */
	for (i=0;sfsread(fid,i,1,&ian);i++) {
		oan.posn = ian.posn;
		oan.size = ian.size;
		/* map annotation */
		mapan(ian.label,oan.label);
		/* write non-NULL labels */
		if (oan.label[0]) sfswrite(ofid,1,&oan);
	}

	/* that's all folks */
	sfsclose(fid);
	if (!sfsupdate(filename))
		error("update error on '%s'",filename);
	exit(0);
}


