/* conmat -- build confusion matrix from raw recognition results */

/* M.A. Huckvale - July 1990 */

/* full version derived from old 'conmat' */

/* version 1.1 - March 1991
	- add optional 'title' to matrix for printouts
*/
/* version 1.2 - February 1992
	- -e switch: only print rows and columns that have values
*/

#define PROGNAME "conmat"
#define PROGVERS "1.1"
char *progname=PROGNAME;

/*-------------------------------------------------------------------------*/
/**MAN
.TH CONMAT UCL1 UCL PRW
.SH NAME
conmat - print confusion matrix from raw matches
.SH SYNOPSIS
.B conmat
(-l) (-s) (-e) (-P pagelength) (-L linelength) (-t title) (file(s))
.SH DESCRIPTION
.I conmat
is a general purpose program to print a confusion matrix from listings of 
matches in the form "<input-token><TAB><output-token><NL>".  The input
and output tokens can be ASCII strings, but they cannot contain whitespace.
The program can print confusion matrices up to 100 x 100 on multiple sheets 
of paper.  If filenames are given on the command line, their contents is 
concatenated. If no files are given, input is read from the standard input.
If a command line file is given as "-", then input is switched to the 
standard input.
Lines containing only a single token are ignored, but may be used to 
order tokens in the confusion matrix (include a list of tokens as the
first file to be processed)
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.B -l
Add line summaries to the right of the matrix, detailing the number of matches
and the percentage correct.
.TP 11
.B -s
Force output into a single printed document, regardless of size.
.TP 11
.B -e
Ignore rows and columns that have zero entries (useful if output labels != input labels).
.TP 11
.BI -P pagelength
Set number of lines per printed page.  Default 50.
.TP 11
.BI -L linelength
Set number of character per printed line.  Default 80.
.TP 11
.Bi -t title
Put a title line on the output including the supplied text.
.SH VERSION/AUTHOR
.IP 1.0
M.A.Huckvale
*/
/*--------------------------------------------------------------------------*/

/* include files */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include "sfs.h"

/* manifest constants */
#define MAXTOKEN	1024

/* confusion matrix table */
struct {
	char	*tokname;
	unsigned short	counts[MAXTOKEN];
} table[MAXTOKEN];
int	tp;				/* table counter */
int	toklen;				/* max token length */
int	rowtab[MAXTOKEN],rowcnt;	/* count of rows */
int	coltab[MAXTOKEN],colcnt;	/* count of columns */
int	maxcount;			/* maximum count value */
int	numconfuse=0;			/* total # confusions */
int	pagelength=50;			/* lines per page */
int	linelength=80;			/* characters per line */
int	linesummary=0;			/* print summary per line */
int	singlepage=0;			/* print all on single page */
int	doempty=1;			/* print all columns, even if empty */
char	*title=NULL;			/* title line on output */

#define MAXLINE	8192
char	iline[MAXLINE];			/* input line buffer */
char	oline[MAXLINE];			/* output line buffer */
char	dateline[80];
char	fileline[MAXLINE];

/* look up a string in table */
int lookup(s)
char	*s;
{
	int	i,len;

	/* look in table */
	for (i=0;i<tp;i++) {
		if (strcmp(table[i].tokname,s)==0)
			return(i);
	}

	/* not found */
	len = strlen(s);
	if (len > toklen) toklen=len;

	/* add token */
	if (tp==MAXTOKEN)
		error("too many input tokens");
	else if ((table[tp].tokname = (char *)malloc(len+1))!=NULL) {
		strcpy(table[tp].tokname,s);
		return(tp++);
	}
	else
		error("could not get memory");
	return(0);
}

/* process a file into the confusion matrix */
void process(ip)
FILE	*ip;
{
	char	*istr,*ostr;
	int	idx,odx;

	while (fgets(iline,MAXLINE,ip)) {
		istr = strtok(iline," \t\n");
		ostr = strtok(NULL," \t\n");
		if (ostr && *ostr)
			lookup(ostr);	/* dummy look up to make table entry */
		if (istr && *istr) {
			idx = lookup(istr);
			if (ostr && *ostr) {
				odx = lookup(ostr);
				if (++table[idx].counts[odx] > maxcount)
					maxcount = table[idx].counts[odx];
				if (rowtab[idx]==0) {
					rowcnt++;
					rowtab[idx]=1;
				}
				if (coltab[odx]==0) {
					colcnt++;
					coltab[odx]=1;
				}
				numconfuse++;
			}
		}
	}
}

#define OKROW(r)	((srow <= (r)) && ((r) < erow))

/* print a line */
void printline(str,scol,ecol)
char	*str;
int	scol,ecol;
{
	int	i=0;

	while ((i<scol) && *str) {
		i++;
		str++;
	}
	for (;(i<ecol) && (*str);i++,str++) 
		putchar(*str);
	putchar('\n');
}

/* print part of confusion matrix */
void printmatrix(srow,erow,scol,ecol)
int	srow,erow;
int	scol,ecol;
{
	char	tline[128];
	int	i,j;
	int	tot=0,offdiag=0,ltot;

	/* line 0 - Titles */
	if (OKROW(0))
		printline(dateline,scol,ecol);
	if (OKROW(1))
		printline(fileline,scol,ecol);
	if (OKROW(2))
		printf("\n");
	if (OKROW(3)) {
		sprintf(oline,"%*sConfusion Matrix",((rowcnt+1)*(toklen+1)-16)/2," ");
		printline(oline,scol,ecol);
	}
	if (OKROW(4))
		printf("\n");
	if (OKROW(5)) {
		sprintf(oline,"%*s|",toklen," ");
		for (i=0;i<tp;i++) if (doempty || coltab[i]) {
			sprintf(tline,"%*s ",toklen,table[i].tokname);
			strcat(oline,tline);
		}
		printline(oline,scol,ecol);
	}
	if (OKROW(6)) {
		for (i=0;i<toklen;i++) oline[i]='-';
		oline[i++]='+';
		for (;i<(colcnt+1)*(toklen+1)-1;i++) 
			oline[i]='-';
		oline[i]='\0';
		printline(oline,scol,ecol);
	}
	for (i=0;i<tp;i++) if (doempty || rowtab[i]) {
		sprintf(oline,"%*s|",toklen,table[i].tokname);
		ltot=0;
		for (j=0;j<tp;j++) if (doempty || coltab[j]) {
			sprintf(tline,"%*d ",toklen,table[i].counts[j]);
			strcat(oline,tline);
			tot += table[i].counts[j];
			ltot += table[i].counts[j];
			if (i != j) offdiag += table[i].counts[j];
		}
		if (linesummary) {
			if (ltot)
				sprintf(tline," %5d total %3d%%",ltot,(int)(100.0*table[i].counts[i]/ltot));
			else
				sprintf(tline," %5d total %3d%%",ltot,0);
			strcat(oline,tline);
		}
		if (OKROW(7+i)) printline(oline,scol,ecol);
	}

	/* print scores */
	if (OKROW(rowcnt+7))
		printf("\n");
	if (OKROW(rowcnt+8)) {
		sprintf(oline,"Number of matches = %3d",tot);
		printline(oline,scol,ecol);
	}
	if (OKROW(rowcnt+9)) {
		sprintf(oline,"Recognition rate  = %5.1f%%",(100.0*(tot-offdiag)/tot));
		printline(oline,scol,ecol);
	}
}

/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */

	/* file processing */
	time_t		tim;
	FILE		*ip;
	int		i,j;
	int		totwidth;
	int		numwidth,numheight;

	/* decode switches */
	while ( (c = getopt(argc,argv,"IlseP:L:t:")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Construct confusion matrix V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'l' :	/* add line summary */
			linesummary=1;
			break;
		case 's' :	/* force single page */
			singlepage++;
			break;
		case 'e' :	/* do not print empty rows and columns */
			doempty=0;
			break;
		case 'P' :	/* page length */
			pagelength = atoi(optarg);
			break;
		case 'L' :	/* line length */
			linelength = atoi(optarg);
			if (linelength > MAXLINE)
				error("line length too large");
			break;
		case 't' :	/* title line */
			title = optarg;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg)
		error("usage: %s (-I) (-s) (-e) (-l) (-P pagelen) (-L linelen) (-t title) (file(s))",PROGNAME);


	/* put title */
	if (title)
		printf("Confusion Matrix    : %s\n",title);

	/* process files */
	tim=time((time_t *)0);
	sprintf(dateline,"Processing date     : %s",ctime(&tim));
	dateline[strlen(dateline)-1]='\0';
	sprintf(fileline,"Confusion data from :");
	if (optind == argc) {
		strcat(fileline," stdin");
		process(stdin);
	}
	else for (;optind<argc;optind++) {
		if (strcmp(argv[optind],"-")==0) {
			strcat(fileline," stdin");
			process(stdin);
		}
		else if ((ip=fopen(argv[optind],"r"))!=NULL) {
			strcat(fileline," ");
			strcat(fileline,argv[optind]);
			process(ip);
			fclose(ip);
		}
		else
			error("could not open '%s'",argv[optind]);
	}

	/* get sizes */
	if (doempty) {
		rowcnt = tp;
		colcnt = tp;
	}

	/* print confusion matrix */
	if (numconfuse) {

		/* check minimum token size */
		if ((toklen==1) && (maxcount > 9)) toklen++;
		if ((toklen==2) && (maxcount > 99)) toklen++;
		if ((toklen==3) && (maxcount > 999)) toklen++;

		/* get total number of pages */
		totwidth = colcnt*toklen + linesummary*17;
		if (singlepage) {
			printmatrix(0,MAXLINE,0,MAXLINE);
		}
		else {
			numwidth = 1 + totwidth/linelength;
			numheight = 1 + (rowcnt+10)/pagelength;
			for (i=0;i<numheight;i++)
				for (j=0;j<numwidth;j++) {
					if ((i > 0) || (j > 0))
						printf("\f");
					printmatrix(i*pagelength,(i+1)*pagelength,
						j*linelength,(j+1)*linelength);
				}
		}
	}
	else
		error("no matches found to process");

	/* that's all folks */
	exit(0);
}


