/* ancomp -- compare two sets of annotations for alignment accuracy */

/* M.A.Huckvale - University College London */

/* version 1.0 - August 1999 */

/* version 1.1 - April 2000
	- add option to save raw matches
	- add option to save histogram
*/

#define PROGNAME "ancomp"
#define PROGVERS "1.1"
char *progname=PROGNAME;

/*-------------------------------------------------------------------------*/
/**MAN
.TH ANCOMP SFS1 UCL
.SH NAME
ancomp -- compare alignment of two sets of annotations
.SH SYNOPSIS
.B ancomp
(-r anitem) (-t anitem) (-m matchfile) (-h histfile) file
.SH DESCRIPTION
.I ancomp
is a program to measure the alignment accuracy of a set of
automatically aligned annotations against a reference set.
Statistics of mean alignment error and a breakdown of
accuracy per annotation label is provided.  It is assumed
that the labels themselves are correct.
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input item number.
.TP 11
.BI -r anitem
Select reference annotation item.  Default first.
.TP 11
.BI -t anitem
Select test annotation item.  Default last.
.TP 11
.BI -m matchfile
Store raw information about matches into given file.
.TP 11
.BI -h histfile
Store histogram of distances into given file.
.SH INPUT ITEMS
.IP AN
Reference annotation
.IP AN
Test annotation
.SH VERSION/AUTHOR
.IP 1.1
Mark Huckvale
*/
/*--------------------------------------------------------------------------*/

#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <ctype.h>
#include <math.h>
#include "sfs.h"

/* global data */
struct item_header refitem;
struct an_rec *refan;
struct item_header tstitem;
struct an_rec *tstan;

/* linked list of annotation label types */
struct antab_rec {
	char	*label;
	int	cnt;
	double	sum;
	double	sumsq;
	double	min;
	double	max;
	struct antab_rec *next;
} *root;

double	totsum;
double	totsumsq;
int	totcnt;
int	mshist[41];
char	matchfilename[SFSMAXFILENAME];
int	savematch;
char	histfilename[SFSMAXFILENAME];
int	savehist;

/* save a string in dynamic memory */
char *strsave(char *str)
{
	char *ptr = malloc(strlen(str)+1);
	strcpy(ptr,str);
	return(ptr);
}

/* find a matching record */
struct antab_rec *findan(char *str)
{
	struct antab_rec *p=root;

	while (p) {
		if (strcmp(str,p->label)==0) return(p);
		p = p->next;
	}
	return(NULL);
}	

/* add a label to table (if new) */
void addlabel(char *str)
{
	struct antab_rec *p = findan(str);

	if (!p) {
		p = calloc(1,sizeof(struct antab_rec));
		p->label = strsave(str);
		p->next = root;
		root = p;
	}
}

/* add in a distance to table */
void adddistance(char *str,double t)
{
	int	idx;
	struct antab_rec *p = findan(str);
	if (!p) error("could not find annotation");

	p->sum += fabs(t);
	p->sumsq += (t*t);
	if (t < p->min) p->min = t;
	if (t > p->max) p->max = t;
	p->cnt++;

	totsum += fabs(t);
	totsumsq += (t*t);
	totcnt++;

	if (savehist) {
		idx = (int)(200*t+20);
		if ((0<=idx)&&(idx<=40)) mshist[idx]++;
	}
}

/* report annotation table */
void anreport()
{
	int	i;
	struct antab_rec *p=root;

	printf("Label                 Count     Mean   StdDev      Min      Max\n");
	printf("---------------------------------------------------------------\n");
	while (p) {
		printf("%-20s %6d %8.3f %8.3f %8.3f %8.3f\n",
			p->label,p->cnt,
			p->sum / p->cnt,
			sqrt((p->sumsq-(p->sum * p->sum)/p->cnt)/p->cnt),
			p->min,
			p->max);
		p = p->next;
	}
	printf("\n");
	printf("%-20s %6d %8.3f %8.3f\n",
		"OVERALL",
		totcnt,
		totsum / totcnt,
		sqrt((totsumsq - (totsum * totsum)/totcnt)/totcnt));
}

void anreporthist(FILE *op)
{
	int	i;
	for (i=0;i<=40;i++)
		fprintf(op,"%8.3f %d\n",(i-20)*0.005,mshist[i]);
}	

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item selection */
	char		*ty;		/* item sub type */
	/* file variables */
	char		filename[SFSMAXFILENAME]; /* SFS data file name */
	int		fid;		/* input file descriptor */
	char		*reftype="*";
	char		*tsttype="0";
	int		i;
	double		tref,ttst;
	FILE		*op;
	
	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:r:t:m:h:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 't' :	/* test item */
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == AN_TYPE)
					tsttype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'r' :	/* reference item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == AN_TYPE)
					reftype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'm' :	/* match file */
			strcpy(matchfilename,optarg);
			savematch++;
			break;
		case 'h' :	/* histogram file */
			strcpy(histfilename,optarg);
			savehist++;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-r item) (-t item) file",PROGNAME);

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"r",NULL))<0)
		error("could not open '%s'",filename);

	/* find and load reference item */
	if (!sfsitem(fid,AN_TYPE,reftype,&refitem))
		error("could not find reference annotations");
	refan = (struct an_rec *)sfsbuffer(&refitem,refitem.numframes);
	sfsread(fid,0,refitem.numframes,refan);

	/* find and load test item */
	if (!sfsitem(fid,AN_TYPE,tsttype,&tstitem))
		error("could not find test annotations");
	tstan = (struct an_rec *)sfsbuffer(&tstitem,tstitem.numframes);
	sfsread(fid,0,tstitem.numframes,tstan);
	sfsclose(fid);

	/* simple checks */
	if (refitem.numframes != tstitem.numframes)
		error("unequal numbers of annotations");

	/* get a list of all the annotation names */
	for (i=0;i<refitem.numframes;i++)
		addlabel(refan[i].label);

	if (savematch) {
		if ((op=fopen(matchfilename,"w"))==NULL)
			error("could not open '%s'",matchfilename);
	}

	/* process annotations */
	for (i=0;i<refitem.numframes;i++) {
		tref = refitem.offset + refan[i].posn*refitem.frameduration;
		ttst = tstitem.offset + tstan[i].posn*tstitem.frameduration;
		adddistance(refan[i].label,ttst - tref);
		if (savematch) {
			fprintf(op,"%s %.4f\n",refan[i].label,ttst - tref);
		}
	}

	/* report annotations */
	anreport();

	/* other reports */
	if (savematch) fclose(op);
	if (savehist) {
		if ((op=fopen(histfilename,"w"))==NULL)
			error("could not open '%s'",matchfilename);
		anreporthist(op);
		fclose(op);
	}

	/* that's all folks! */
	exit(0);
}

	
