/* treevq -- build a binary tree based vector quantiser */

/* Mark Huckvale - University College London */

/* version 1.0 - September 1996 */

#define PROGNAME "treevq"
#define PROGVERS "1.0"
char *progname=PROGNAME;

/*-------------------------------------------------------------------------*/
/**MAN
.TH TREEVQ 1 UCL
.SH NAME
treevq - build/use a binary tree codebook for vector quantisation
.SH SYNOPSIS
.B treevq
(-i item|-a item) (-c cbitem) (-f cbfile) (-n numcode) (-u) file
.SH DESCRIPTION
.I treevq
is a program to either build or use a binary-tree format vector
quantisation codebook.  In encoding mode (when -c and -f options not used)
the input item(s) are divided into 2 clusters, then each cluster may then
be split into two further clusters, etc, until
the number of requested codes is met or exceeded.  The cluster centroids used
in the derivation of the final clusters, and the final clusters themselves
are then saved back to the file.  In decoding mode (where -c or -f is
used) a built codebook is used to encode a single new input item, the result
saved as an artificial coefficient item consisting of the codebook
vectors, but with the 'flag' field in each coefficient record set to
the mapped codebook index.  For decoding, the codebook may be stored in
a separate file.  For encoding, a uniform (balanced binary tree) may be
enforced with the -u switch.
.SH OPTIONS
.TP 11
.B -I
Identify program name and version number.
.TP 11
.BI -i item
Select input coefficient item for building or encoding.
.TP 11
.BI -a item
Select all coefficient items in the file for building.
.TP 11
.BI -c cbitem
Select input codebook coefficient item.  Also selects encoding operation.
Default: building.
.TP 11
.BI -f cbfile
Select input codebook filename.  Default: input file.
.TP 11
.BI -n numcode
Specify minimum # codes in codebook.  Default: 64.  For -u switch,
the number of codes will always be a power of two.
.TP 11
.B -u
Specify a uniform binary tree.  This gives slightly worse distortion
for a given codebook size, but gives the best average encoding speed.
.SH INPUT ITEMS
.IP COEFF
Input vectors for building or encoding.
.SH OUTPUT ITEMS
.IP COEFF
(Building)Codebook.
.IP COEFF
(Encoding)Encoded utterance.
.SH HISTORY
.IP input=
Input items for codebook.
.IP size=
Number of codes
.IP uniform
Uniform binary tree.
.SH VERSION/AUTHOR
.IP 1.0
Mark Huckvale
.SH SEE ALSO
<associated programs>
.SH BUGS
*/
/*--------------------------------------------------------------------------*/

#include "SFSCONFG.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include "sfs.h"

/* global data */
struct item_header	coitem;
struct co_rec		*co;
struct co_rec		*cobuff;
int			totvector;
int			numvector;
struct item_header	opitem;
struct co_rec		*cb;

/* codebook structure */
struct item_header	cbitem;
struct cb_rec {
	struct co_rec	*lcr;
	struct cb_rec	*lcb;
	struct co_rec	*rcr;
	struct cb_rec	*rcb;
} *cbroot,*cbmax;
int	parent;
int	totcode;
float	cbmaxdist;

/* operation */
int	building=1;	/* building a codebook */
int	allitem=0;	/* processing all items */
char	*cbtype="0";	/* codebook item */
char	cbfile[SFSMAXFILENAME];	/* codebook file */
int	uniform=0;	/* uniform tree */
int	numcode=64;	/* codebook size */
int	vecsize;	/* input vector size */

#ifdef IAG
/* dump vector */
void dumpvector(cr,title)
struct co_rec *cr;
char	*title;
{
	int	j;
	printf("%s=",title);
	for (j=0;j<vecsize;j++) printf("%g ",cr->data[j]);
	printf("\n");
}

/* dump codebook */
void dumpcodebook(cb,level)
struct cb_rec *cb;
int level;
{
	printf("%*scode %2d left  %s\n",2*level," ",cb->lcr->posn,(cb->lcb)?"branch":"terminal");
	if (cb->lcb) dumpcodebook(cb->lcb,level+1);
	printf("%*scode %2d right %s\n",2*level," ",cb->rcr->posn,(cb->rcb)?"branch":"terminal");
	if (cb->rcb) dumpcodebook(cb->rcb,level+1);
}
#endif
	
/* add vectors into memory table */
void addvectors(fid,num)
int	fid;
int	num;
{
	int	i,j;

	if (numvector+num>totvector)
		error("too many input vectors");
	for (i=0;i<num;i++) {
		if (sfsread(fid,i,1,co)!=1) break;
		for (j=0;j<vecsize;j++)
			cobuff[numvector].data[j] = co->data[j];
		cobuff[numvector].posn=0;	/* parent=0 */
#ifdef IAG
dumpvector(&cobuff[numvector],"input");
#endif
		numvector++;
	}
}

/* write out codebook */
void writecodebook(fid,cur)
int	fid;
struct cb_rec *cur;
{
	int	i;

	/* do left branch */
	for (i=0;i<vecsize;i++)
		co->data[i] = cur->lcr->data[i];
	co->flag = (cur->lcb) ? 1 : 0;
	sfswrite(fid,1,co);
	co->posn++;
	if (cur->lcb) writecodebook(fid,cur->lcb);

	/* do right branch */
	for (i=0;i<vecsize;i++)
		co->data[i] = cur->rcr->data[i];
	co->flag = (cur->rcb) ? 1 : 0;
	sfswrite(fid,1,co);
	co->posn++;
	if (cur->rcb) writecodebook(fid,cur->rcb);

}

/* read codebook */
int readcodepos;
int readcodevq;

void readcodebook(fid,cur)
int	fid;
struct cb_rec *cur;
{
	int	i;

	/* do left branch */
	if (sfsread(fid,readcodepos++,1,co)!=1)
		error("failed to load codebook");
	cur->lcr = (struct co_rec *)sfsbuffer(&coitem,1);
	for (i=0;i<vecsize;i++)
		cur->lcr->data[i] = co->data[i];
	cur->lcr->posn = co->posn;
	if (co->flag) {
		cur->lcb = (struct cb_rec *)calloc(1,sizeof(struct cb_rec));
		readcodebook(fid,cur->lcb);
		cur->lcr->flag = 0;
	}
	else
		cur->lcr->flag = readcodevq++;

	/* do right branch */
	if (sfsread(fid,readcodepos++,1,co)!=1)
		error("failed to load codebook");
	cur->rcr = (struct co_rec *)sfsbuffer(&coitem,1);
	for (i=0;i<vecsize;i++)
		cur->rcr->data[i] = co->data[i];
	cur->rcr->posn = co->posn;
	if (co->flag) {
		cur->rcb = (struct cb_rec *)calloc(1,sizeof(struct cb_rec));
		readcodebook(fid,cur->rcb);
		cur->rcr->flag = 0;
	}
	else
		cur->rcr->flag = readcodevq++;
}

/* calculate centroid of a cluster */
void centroid(cnum)
int	cnum;	/* cluster number */
{
	int	i,j;
	int	count=0;

	for (j=0;j<vecsize;j++) co->data[j]=0;
	for (i=0;i<numvector;i++) {
		if (cobuff[i].posn==cnum) {
			for (j=0;j<vecsize;j++)
				co->data[j] += cobuff[i].data[j];
			count++;
		}
	}
	if (count==0)
		error("cluster %d has zero entries!",cnum);
	for (j=0;j<vecsize;j++) co->data[j] /= count;
#ifdef IAG
dumpvector(co,"centroid");
#endif
}

/* assign vectors to one of two clusters */
void assignvectors(vec1,vec2,code0,code1,code2)
struct co_rec *vec1;
struct co_rec *vec2;
int	code0,code1,code2;
{
	int	i,j;
	float	diff,dist1,dist2;
	int	count1=0,count2=0;

	vec1->gain=0;
	vec2->gain=0;
	for (i=0;i<numvector;i++) {
		if ((cobuff[i].posn==code0) ||
		    (cobuff[i].posn==code1) ||
		    (cobuff[i].posn==code2)) {
		    	dist1=0;
		    	for (j=0;j<vecsize;j++) {
		    		diff = cobuff[i].data[j] - vec1->data[j];
		    		dist1 += diff*diff;
		    	}
		    	dist2=0;
		    	for (j=0;j<vecsize;j++) {
		    		diff = cobuff[i].data[j] - vec2->data[j];
		    		dist2 += diff*diff;
		    	}
			if (dist1 < dist2) {
				cobuff[i].posn = code1;
			    	vec1->gain += dist1/vecsize;
			    	count1++;
			}
			else {
				cobuff[i].posn = code2;
			    	vec2->gain += dist2/vecsize;
			    	count2++;
			}
		}
	}
	if (uniform) {
		vec1->gain = 1.0/code1;
		vec2->gain = 1.0/code2;
	}
	else {
		if (count1) vec1->gain = sqrt(vec1->gain/count1);
		if (count2) vec2->gain = sqrt(vec2->gain/count2);
	}
#ifdef IAG
printf("Counts: code %d=%d code%d=%d\n",code1,count1,code2,count2);
#endif
}

/* cluster the data */
void clustercodebook(cb,code)
struct cb_rec *cb;
int	 code;
{
	int	j;
	int	count;
	int	change;
	float	ranval;
	
	/* create coefficient vectors */
	cb->lcr = (struct co_rec *)sfsbuffer(&coitem,1);
	cb->lcr->posn = 2*code+1;
	cb->rcr = (struct co_rec *)sfsbuffer(&coitem,1);
	cb->rcr->posn = 2*code+2;

	/* calculate centroid */
	centroid(code);

	/* put modified centroid into codebook */
	for (j=0;j<vecsize;j++) {
		ranval = (float)(rand()%2000)/1.0E6 - 1.0E-3;
		cb->lcr->data[j] = co->data[j] + ranval;
		cb->rcr->data[j] = co->data[j] - ranval;
	}
#ifdef IAG
dumpvector(cb->lcr," left");
dumpvector(cb->rcr,"right");
#endif
	/* iterate to convergence */
	count=0;
	do {
		change=0;
		count++;
#ifdef IAG		
printf("Clusters %d,%d,%d Iteration %d\n",code,2*code+1,2*code+2,count);
#endif
		assignvectors(cb->lcr,cb->rcr,code,2*code+1,2*code+2);
#ifdef IAG
printf("distortion, cluster %d=%g cluster %d=%g\n",
	2*code+1,cb->lcr->gain,2*code+2,cb->rcr->gain);
#endif
		/* update codebook */
		centroid(2*code+1);
		for (j=0;j<vecsize;j++) {
			if (cb->lcr->data[j] != co->data[j]) {
				cb->lcr->data[j] = co->data[j];
				change=1;
			}
		}
		centroid(2*code+2);
		for (j=0;j<vecsize;j++) {
			if (cb->rcr->data[j] != co->data[j]) {
				cb->rcr->data[j] = co->data[j];
				change=1;
			}
		}
		
	} while (change && (count < 20));

	/* increased # codes by 1 */
	totcode++;
}

/* search for maximum distortion */
void searchmaxdist(cb)
struct cb_rec *cb;
{
	if (cb->lcr->gain > cbmaxdist) {
		cbmaxdist = cb->lcr->gain;
		cbmax = cb;
	}
	if (cb->lcb) searchmaxdist(cb->lcb);
	if (cb->rcr->gain > cbmaxdist) {
		cbmaxdist = cb->rcr->gain;
		cbmax = cb;
	}
	if (cb->rcb) searchmaxdist(cb->rcb);
}
	
/* map a vector using codebook */
struct co_rec *mapvector(cb,cr)
struct cb_rec *cb;
struct co_rec *cr;
{
	float	dist1=0,dist2=0,diff;
	int	j;

	for (j=0;j<vecsize;j++) {
		diff = cb->lcr->data[j] - cr->data[j];
		dist1 += diff*diff;
		diff = cb->rcr->data[j] - cr->data[j];
		dist2 += diff*diff;
	}
	if (dist1 < dist2) {
		if (cb->lcb)
			return(mapvector(cb->lcb,cr));
		else
			return(cb->lcr);
	}
	else {
		if (cb->rcb)
			return(mapvector(cb->rcb,cr));
		else
			return(cb->rcr);
	}
	
}
/* main program */
void main(argc,argv)
int	argc;
char	*argv[];
{
	/* option decoding */
	extern int	optind;		/* option index */
	extern char	*optarg;	/* option argument ptr */
	int		errflg = 0;	/* option error flag */
	int		c;		/* option switch */
	int		it;		/* item selection */
	char		*ty;		/* item sub type */
	char		*cotype="0";
	/* file variables */
	char		filename[SFSMAXFILENAME]; /* SFS data file name */
	int		fid;		/* input file descriptor */
	int		ofid,cfid;
	struct item_header item;
	int		i,j;
	struct co_rec	*crout;
	
	/* decode switches */
	while ( (c = getopt(argc,argv,"Ii:a:c:f:n:u")) != EOF ) switch (c) {
		case 'I' :	/* Identify */
			fprintf(stderr,"%s: Tree-coding vector quantisation V%s\n",PROGNAME,PROGVERS);
			exit(0);
			break;
		case 'i' :	/* specific item */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == CO_TYPE)
					cotype = ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'a' :	/* build from all items */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == CO_TYPE)
					allitem=1;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			break;
		case 'c' :	/* specific codebook */
			if (itspec(optarg,&it,&ty) == 0) {
				if (it == CO_TYPE)
					cbtype=ty;
				else
					error("unsuitable item specifier %s",optarg);
			}
			else
				error("illegal item specifier %s",optarg);
			building=0;
			break;
		case 'f':
			strcpy(cbfile,optarg);
			building=0;
			break;
		case 'n':
			numcode = atoi(optarg);
			if ((numcode < 2) || (numcode > 1024))
				error("codebook size limited to 2..1024");
			break;
		case 'u':
			uniform=1;
			break;
		case '?' :	/* unknown */
			errflg++;
	}
	if (errflg || (argc<2))
		error("usage: %s (-I) (-i item|-a item) (-c cbitem) (-f cbfile) (-n numcode) (-u) file",PROGNAME);

	/* check options */
	if (allitem && !building)
		error("-a option for building only");

	/* get filename */
	if (optind < argc)
		strcpy(filename,sfsfile(argv[optind]));
	else
		error("no database file specified",NULL);

	/* open file */
	if ((fid=sfsopen(filename,"w",NULL)) < 0)
		error("access error on '%s'",filename);

	/* building or encoding? */
	if (building) {
		/* build a codebook */

		/* count the length of the vectors  */
		if (!allitem) {
			if (!sfsitem(fid,CO_TYPE,cotype,&coitem))
				error("could not find input CO item in '%s'",filename);
			vecsize = SFSRECSIZE(&coitem);
			co = (struct co_rec *)sfsbuffer(&coitem,1);
			totvector = coitem.numframes;
			if ((cobuff=(struct co_rec *)sfsbuffer(&coitem,totvector))==NULL)
				error("could not get memory for input vectors");
			numvector = sfsread(fid,0,totvector,cobuff);
			for (i=0;i<numvector;i++) {
#ifdef IAG
				dumpvector(&cobuff[i],"input");
#endif
				cobuff[i].posn=0;
			}
		}
		else {
			while (sfsnextitem(fid,&item)) {
				if (item.datatype==CO_TYPE) {
					memcpy(&coitem,&item,sizeof(item));
					if (vecsize==0) {
						vecsize = SFSRECSIZE(&coitem);
						co = (struct co_rec *)sfsbuffer(&coitem,1);
					}
					else if (vecsize!=SFSRECSIZE(&coitem))
						error("mismatch of vector sizes in input CO item");
					totvector += coitem.numframes;
				}
			}

			if (totvector==0)
				error("no input data found in '%s'",filename);
			if ((cobuff=(struct co_rec *)sfsbuffer(&coitem,totvector))==NULL)
				error("could not get memory for input vectors");
	
			/* load all the vectors into memory */
			sfsnextitem(fid,NULL);
			while (sfsnextitem(fid,&item)) {
				if (item.datatype==CO_TYPE) {
					memcpy(&coitem,&item,sizeof(item));
					addvectors(fid,coitem.numframes);
				}
			}
		}

		/* create codebook */
		printf("Creating codebook from %d vectors\n",numvector);
		cbroot = (struct cb_rec *)calloc(1,sizeof(struct cb_rec));
		totcode=1;
		printf("Code %d/%d\r",totcode,numcode); fflush(stdout);

		/* cluster root node */
		clustercodebook(cbroot,0);
		printf("Code %d/%d\r",totcode,numcode); fflush(stdout);
		while (totcode < numcode) {
			/* find node with largest distortion */
			cbmax = NULL;
			cbmaxdist = 0;
			searchmaxdist(cbroot);
			if (cbmax->lcr->gain > cbmax->rcr->gain) {
#ifdef IAG
printf("expanding cluster %d\n",cbmax->lcr->posn);
#endif
				cbmax->lcb = (struct cb_rec *)calloc(1,sizeof(struct cb_rec));
				clustercodebook(cbmax->lcb,cbmax->lcr->posn);
				cbmax->lcr->gain = 0;
			}
			else {
#ifdef IAG
printf("expanding cluster %d\n",cbmax->rcr->posn);
#endif
				cbmax->rcb = (struct cb_rec *)calloc(1,sizeof(struct cb_rec));
				clustercodebook(cbmax->rcb,cbmax->rcr->posn);
				cbmax->rcr->gain = 0;
			}
			printf("Code %d/%d\r",totcode,numcode); fflush(stdout);
		}

		/* write out vectors as codebook */
		sfsheader(&opitem,CO_TYPE,-1,4,(vecsize*4+sfsstruct[CO_TYPE])/4,
			0.01,0.0,1,0,0);
		if (allitem)
			sprintf(opitem.history,"%s(%d.*;numcodes=%d%s)",
				PROGNAME,CO_TYPE,numcode,
				(uniform)?"uniform":"");
		else
			sprintf(opitem.history,"%s(%d.%02d;numcodes=%d%s)",
				PROGNAME,
				coitem.datatype,coitem.subtype,numcode,
				(uniform)?"uniform":"");
		sprintf(opitem.params,"size=%d,uniform=%d",numcode,uniform);
		if ((ofid=sfschannel(filename,&opitem))<0)
			error("unable to open output channel to '%s'",filename);

		co->posn=0;
		co->size=1;
		co->mix=0;
		co->flag=0;
		co->gain=0;
		writecodebook(ofid,cbroot);	
		printf("                 \r");
	}
	else {
		/* encode an item */

		/* find item */
		if (!sfsitem(fid,CO_TYPE,cotype,&coitem))
			error("could not find input CO item in '%s'",filename);

		/* load codebook */
		if (cbfile[0]=='\0') strcpy(cbfile,filename);
		if ((cfid=sfsopen(cbfile,"r",NULL))<0)
			error("access error on '%s'",cbfile);
		if (!sfsitem(cfid,CO_TYPE,cbtype,&cbitem))
			error("could not find input codebook in '%s'",cbfile);
		vecsize = SFSRECSIZE(&cbitem);
		if (vecsize != SFSRECSIZE(&coitem))
			error("codebook vector size mismatch");
		co = (struct co_rec *)sfsbuffer(&coitem,1);
		readcodepos=0;
		readcodevq=0;
		cbroot = (struct cb_rec *)calloc(1,sizeof(struct cb_rec));
		readcodebook(cfid,cbroot);
#ifdef IAG
printf("Codebook contains:\n");
dumpcodebook(cbroot,0);
#endif
		/* open output channel */
		sfsheader(&opitem,CO_TYPE,-1,4,(vecsize*4+sfsstruct[CO_TYPE])/4,
			coitem.frameduration,coitem.offset,
			coitem.windowsize,coitem.overlap,coitem.lxsync);
		if (strcmp(cbfile,filename)==0)
			sprintf(opitem.history,"%s(%d.%02d,%d.%02d)",
				PROGNAME,
				coitem.datatype,coitem.subtype,
				cbitem.datatype,cbitem.subtype);
		else
			sprintf(opitem.history,"%s(%d.%02d;file=%s,item=%d.%02d)",
				PROGNAME,
				coitem.datatype,coitem.subtype,
				cbfile,
				cbitem.datatype,cbitem.subtype);
		strcpy(opitem.params,coitem.params);
		if ((ofid=sfschannel(filename,&opitem))<0)
			error("unable to open output channel to '%s'",filename);

		/* process each input record in turn */
		for (i=0;sfsread(fid,i,1,co)==1;i++) {
			crout = mapvector(cbroot,co);
			for (j=0;j<vecsize;j++)
				co->data[j] = crout->data[j];
			co->flag = crout->flag;
			sfswrite(ofid,1,co);
		}
	}

	if (!sfsupdate(filename))
		error("update error on '%s'",filename);

	/* that's all folks */
	exit(0);
}
