/*  File Analyzer for Standard (ICADD/Texas) Markup 5/26/93 */				
 
/* This program has been put in the public domain by David Holladay
   on the condition that it be made available in roughly the form
   you got it in. Each disk should have the software, the documentation,
   the list of tags, and the sample files.
 
   Please Report any improvements and corrections to:
   David Holladay
   Raised Dot Computing
   408 S. Baldwin Street
   Madison, WI 53703
   (608) 257-8833   */
 
 
	#include <stdio.h>
	#include <string.h>
	#include <stdlib.h>
	#include <io.h>
	#include <process.h>
 
	FILE *fi, *fo, *fn;
 
	int c,cnt,j,a,lline,sline,multiple,maxl,crim,curl,unmatched,mode,compos,longpos,index,pt,nextpos=1;
	int jj,k,kk,q,prev,highbit;
	int legal,slash,tt,errors,stackpt,lines,fixm=0,mactxt,glob=0,realo=0,ampcnt,gtcnt;
	int doprint,manycr=0,strip=0,spacepos,enclosef,attrib,dopara,spend=-1;
	int indx[420],count[256],mult[15],combuf[80],countar[420],stack[20];
	int exclude[12]={-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
	char bigar[5000],dos[100],longcom[200],buf[30];
	long spec[256],specl[256],reg[120],regl[120],total,enclose[2];
 
	int ntags=34,nt=48;
 
	/* tags[#] is the list of legal tags
	   repeat_tags =0 means this tag must not repeat; =1 means it can
	   exclude_tags  sets up zones of tags that cannot overlap
	   interior_tags sets up tags that must be inside of other tags
			here <litem> and <lhead> must be inside <list>
 
	    IMPORTANT: if you add or subtract a tag, you need to modify
	    all four arrays.
 
	   Notice: this program is NOT a full SGML parser. It just does
	   some simple tests on the tags in your document. If you want
	   a tester that pays attention to a DTD, then buy a full blown
	   SGML parsing engine. */
 
	char *wp[60]= {"unknown","ASCII","EBSDIC","unknown","WordStar",
		"MicroSoft Word","WordPerfect 4","WordPerfect 5.0/5.1","PFS First Choice","Word Star",
		"MultiMate","Volkswriter","unknown","IBM Writing Assistant","Total Word",
		"DisplayWrite","OfficeWriter","XY Write","Navy DIF","MicroSoft RTF",
		"unknown","unknown","AdvanceWrite Plus","Q&A","Lotus Manuscript",
		"Rapid File","Wang PC","PeachText","Enable","FrameWork",
		"Dec DX","Mass 11","Final Form Text","unknown",
		"Interleaf Publisher","unknown","Legacy","unknown","unknown",
		"unknown","unknown","unknown","Windows Writer","Word for Windows"};
	char *tags[48]= {
		"<para>","<it>","<b>","<lit>","<other>",
		"<litem>","<lhead>","<list>",
		"<h1>","<h2>","<h3>","<h4>","<h5>","<h6>",
		"<toc>","<toc1>","<toc2>","<toc3>","<toc4>","<toc5>","<toc6>",
		"<term>","<au>","<box>",
		"<note>","<bq>","<fig>","<fn>","<lang>","<ti>",
		"<pp>","<anchor>","<ipp>","<xref>"};
 
	int repeat_tags[48]={0,0,0,0,0,
	0,0,0,
	0,0,0,0,0,0,
	0,0,0,0,0,0,0,
	0,0,0,
	0,0,0,0,0,0,
	0,1,1,1};
 
	int exclude_tags[48]={1,0,0,0,0,
	0,0,1,
	1,1,1,1,1,1,
	2,2,2,2,2,2,2,
	0,0,0,
	0,0,0,0,0,0,
	0,0,0,0};
 
	int interior_tags[48]={0,0,0,0,0,
	7,7,0,
	0,0,0,0,0,0,
	0,0,0,0,0,0,0,
	0,0,0,
	0,0,0,0,0,0,
	0,0,0,0};
 
	char *macintosh[128]={
		 /*  0         1        2          3          4          5         6          7    */
/*80*/	"&Auml;",  "&Aring;","&Ccedil;","&Eacute;","&Ntilde;","&Ouml;", "&Uuml;",  "&aacute;",
/*88*/	"&agrave;","&acirc;","&auml;",  "@a",      "&aring;", "&ccedil;","&eacute;","&egrave;",
/*90*/	"&ecirc;", "&euml;","&iacute;", "&igrave;","&icirc;", "&iuml;", "&ntilde;","&oacute;",
/*98*/	"&ograve;","&ocirc;","&ouml;",  "@o",      "&uacute;","&ugrave;","&ucirc;","&uuml;",
/*A0*/	"*",       "&deg;",  "&cent;",  "&pound;   ","(s)",   "&bull;", "(p)",     "&bgr;",
/*A8*/	"(R)",     "(C)",    "(TM)",    "\'",      "\"",      "=/",     "&AElig;", "0",
/*B0*/	"infinity"," ",      " ",       " ",       "&yen;",   "&mgr;",  "&dgr;",   "&Sgr;",
/*B8*/	"&Pgr;",   "&pgr;",  " ",       "&aomacr;","&oomacr;","&OHgr;", "&aelig;", "0",
/*C0*/	"&iquest;","&iexcl;"," ",       " ",       "l",       "<<",     "(t)",     "&lt;&lt;",
/*C8*/	"&gt;&gt;","...",    " ",       "@A",      "@A",      "@O",     "OE",      "&aelig;",
/*D0*/	"-",       "--",     "\"",      "\"",      "\'",      "\'",     " ",       "(d)",
/*D8*/	"&yuml;",  "@Y",     " ",       " ",       " ",       " ",      " ",       " ",
/*E0*/	" ",       " ",      " ",       " ",       " ",       "@A",     "@E",      "@A",
/*E8*/	"@E",      "@E",     "@I",      "@I",      "@I",      "@I",     "@O",      "@O",
/*F0*/   " ",       "@O",     "@U",      "@U",      "@U",      " ",      " ",       " ",
/*F8*/	" ",       " ",      " ",       " ",       " ",       " ",      " ",       ""}; 
	char *pchigh[128]={
 /*       0          1        2          3         4         5          6         7   */
/*80*/	"&Ccedil;","&uuml;","&eacute;","&acirc;","&auml;","&agrave;","&aring;","&ccedil;",
/*88*/	"&ecirc;","&euml;","&egrave;","&iuml;",  "&icirc;","&igrave;","&Auml;","&Aring;",
/*90*/	"&Eacute;","&aelig;","&AElig;","&ocirc;","&ouml;","&ograve;","&ucirc;","&ugrave;",
/*98*/	"&yuml;", "&Ouml;","&Uuml;", "&cent;", "&pound;","&yen;",   "Pt",      "l",
/*A0*/	"&aacute;","&iacute;","&oacute;","&uacute;","&ntilde;","&Ntilde;","&aomacr;","&oomacr;",
/*A8*/	"&iquest;"," "       ," ",     "1/2",   "1/4",   "&iexcl;","&lt;&lt;","&gt;&gt;",
/*B0*/	" ",     " ",         " ",      " ",    " ",     " ",      " ",       " ",
/*B8*/	" ",     " ",         " ",      " ",    " ",     " ",      " ",       " ",
/*C0*/	" ",     " ",         " ",      " ",    " ",     " ",      " ",       " ",
/*C8*/	" ",     " ",         " ",      " ",    " ",     " ",      " ",       " ",
/*D0*/	" ",     " ",         " ",      " ",    " ",     " ",      " ",       " ",
/*D8*/	" ",     " ",         " ",      " ",    " ",     " ",      " ",       " ",
/*E0*/	"&agr;", "&bgr;",    "&Ggr;",  "&Pgr;", "&Sgr;", "&sgr;",  "&mgr;",  "&tgr;",
/*E8*/	"&Fgr;", "&thgr;",   "&OHgr;", "&dgr;", "infinity","0",    " ",      " ",
/*F0*/	" ",     " ",        " ",      " ",     " ",     " ",      " ",      " ",
/*F8*/	"&deg;", "&bull;",   "&bull;", " ",     " ",     " ",      " ",      ""};
 
 
	int fincom(void);
	int getindex(void);
	void begtag(int);
	void endtag(int);
 	int newgetc(void);
 
void main(int argc,char *argv[])
{
	total=argc;
	for (j=1;j<argc;j++)
	{
		if (strstr(argv[j],"/f")!=NULL) fixm=1,total--;
		if (strstr(argv[j],"/g")!=NULL) glob=1,total--;
		if (strstr(argv[j],"/c")!=NULL) manycr=1,total--;
		if (strstr(argv[j],"/a")!=NULL) attrib=1,total--;
		if (strstr(argv[j],"/p")!=NULL) dopara=1,total--;
		if (strstr(argv[j],"/m")!=NULL) mactxt=1,total--;
		if (strstr(argv[j],"/h")!=NULL) highbit=1,total--;
	}
	if (argc<2)
	{
		fprintf(stderr,"\nFormat is as follows:\n%s [source file] {reporting file}",argv[0]);
		fprintf(stderr,"\n     This just checks the source file");
		fprintf(stderr,"\n\n%s [source file] [target file] {reporting file} /f",argv[0]);
		fprintf(stderr,"\n     This creates a new file which is a fixed version of the source file.");
		fprintf(stderr,"\n\n%s ? <Enter> for a list of tags",argv[0]);
		fprintf(stderr,"\n\n -- options available when fixing with /f");
		fprintf(stderr,"\n/h -- fix high bit characters from a PC");
		fprintf(stderr,"\n/m -- fix high bit characters from a Macintosh");
		fprintf(stderr,"\n/g -- run Global Replace (using IMPROVE.RUL) to repair the target file");
		fprintf(stderr,"\n/a -- don't strip off attributes of paired tags in the target file");
		fprintf(stderr,"\n/c -- the input file contains soft returns");
		fprintf(stderr,"\n/p -- insert the <para> tag for unlabeled paragraphs");
		return;
	}
	if (strstr(argv[1],"?")==NULL)
	{
		if ((fi=fopen(argv[1],"rb"))==NULL)
		{
			fprintf(stderr,"\nCannot open file.\n");
			return;
		}
		combuf[0]=fgetc(fi),combuf[1]=fgetc(fi),combuf[2]=fgetc(fi),combuf[3]=fgetc(fi);
		fclose(fi);
		sprintf(buf,"%s",argv[1]);
		j=spawnlp(P_WAIT,"AUTOREC.EXE","AUTOEXEC.EXE",buf,NULL);
		j=(j/4);
		if ((j>1)&&(j<63))
		{
			fprintf(stderr,"\n\n*** Sorry, this is a %s file. Use %s",wp[j],wp[j]);
			fprintf(stderr,"\nto read this file and export an ASCII file. Only ASCII");
			fprintf(stderr,"\nfiles containing ICADD tags meet the ICADD/Texas standards.");
			fprintf(stderr,"\nIf you do not own a copy of %s, then purchase the",wp[j]);
			fprintf(stderr,"\nWord for Word software from Mastersoft (602 277-0900) It costs only $150.");
			fprintf(stderr,"\n\nIf you loaded this file into MegaDots, MegaDots would not");
			fprintf(stderr,"\nrecognize the ICADD tags.\n");
			return;
		}
		if ((combuf[0]=='^')&&(combuf[1]=='R')&&(combuf[2]=='D')&&(combuf[3]=='C'))
		{
			fprintf(stderr,"\n\n*** Sorry, this is a MegaDots file. Use MegaDots");
			fprintf(stderr,"\nto read this file and export an ASCII file. Only ASCII");
			fprintf(stderr,"\nfiles containing ICADD tags meet the ICADD/Texas standards.");
			fprintf(stderr,"\nUse the Control-F4 feature to export to an ASCII line file.");
		  	return;
		}
		fi=fopen(argv[1],"rb");
	}
	k=2;
	if (fixm) k=3;
	if (total>k)
	{
		if ((fo=fopen(argv[k],"w"))==NULL)
		{
			fprintf(stderr,"Cannot open reporting file.\n");
			realo=1;
			return;
		}
	}
	else
	{
		if ((fo=fopen("con","w"))==NULL)
		{
			fprintf(stderr,"Cannot open reporting file.\n");
			return;
		}
	}
 
	if (strstr(argv[1],"?")!=NULL)
	{
		fprintf(fo,"\nICADD/Texas Tags are as follows:\n");
		j=0; while(j<ntags)
		{
			fprintf(fo,"\n%-12s",tags[j]);
			if (repeat_tags[j]) fprintf(fo,"no end tag required (i.e. not a paired tag)   ");
			if (exclude_tags[j]) fprintf(fo,"part of zone %i   ",exclude_tags[j]);
			if (interior_tags[j]) fprintf(fo,"must be inside of tag %s",tags[interior_tags[j]]);
			j++;
		}
		return;
	}
 
	if (fixm)
	{
		if ((fn=fopen(argv[2],"w"))==NULL)
		{
			fprintf(stderr,"Cannot open target file.\n");
			return;
		}
	}
	if (glob)
	{
		fclose (fi);
		sprintf(dos,"global %s qztemp.txt improve.rul /s",argv[1]);
		system(dos);
		if ((fi=fopen("qztemp.txt","rb"))==NULL)
		{
			fprintf(stderr,"\nCannot open input file.\n");
			return;
		}
	}
 
	fprintf(fo,"\n\nTextfile Analyzer Program (c) 1992-3 by David Holladay");
	if (fixm) fprintf(fo,"\nTextfile Fixer Program (c) 1992-3 by David Holladay");
	fprintf(fo,"\n\n--------------- Patterns of Tag Usage ---------------");
 
	cnt=0,mode=0,total=0,errors=0,indx[0]=0,lines=1;
	if (highbit&&mactxt) highbit=0;
	while ((c=newgetc())!=EOF)
	{
		total++,cnt++;
		if ((mode==0)&&(c==62)) gtcnt++;
		if (c==10) lines++;
		if (c=='<') mode=1,compos=0,longpos=0,spacepos=0;
		if ((mode==0)&fixm)
		{
			if ((c==10)&&!manycr)
			{
				while (stackpt>0)
				{
					j=stack[--stackpt];
 
					if ((interior_tags[stack[stackpt+1]]==j)&&j) {stackpt++; break;}
					endtag(j);
				}
 
			}
			if ((c>31)&&dopara&&!enclosef)
			{
				begtag(0);
				countar[0]++,stack[stackpt++]=0,enclosef=1;
			}
			q=stack[1];
			if ((c>31)&&dopara&&(stackpt==1)&&(interior_tags[q]==stack[0])&&(stack[0]!=0))
			{
				q=stack[1];
				begtag(q);
				countar[q]++,stack[stackpt++]=q;
			}
			if ((fixm)&&(c!=13)&&(c!=26)) fprintf(fn,"%c",c);
		}
 
		if (mode==1)      /* mode=1 means inside a tag, mode=0 outside of tags */
		{
			if (c==' ')
			{
				j=0;
				spacepos=longpos;
				if (fixm) longcom[longpos++]=' ';
				while (((c=newgetc())!=EOF)&&(c!='>')&&(j++!=70)) {if (fixm) longcom[longpos++]=c;}
				c='>';
			}
			combuf[compos++]=c,combuf[compos]=0,longcom[longpos++]=c,longcom[longpos]=0;
									/* combuf contains the current tag enclosed in <> */
			if (c=='>')
			{                           /* At this point we have a complete tag */
				q=fincom();
				countar[q]++;      /* increment the counter of the tag useage */
				if (!spacepos) spacepos=180;
				if ((q<nt)&&!repeat_tags[q])   /* if a start tag the must not repeat */
				{
					if (!attrib) longcom[spacepos]='>',longcom[spacepos+1]=0;
					if (reg[q])
					{
						stackpt--;
						if (fixm) endtag(q);
						else
						{
							fprintf(fo,"\n*** Tag %s started at position %ld (line %i)",tags[q],reg[q],regl[q]);
							fprintf(fo," and at position %ld (line %i)",total,lines);
							errors++;
						}
					}
					if (((k=interior_tags[q])!=0)&&!reg[k]&&k)
					{
						if (fixm) begtag(k);
						else
						{
							fprintf(fo,"\n*** Tag %s must be inside of tag %s at position %ld( line %i)",tags[q],tags[k],total,lines);
							errors++;
						}
					}
					if (((k=exclude_tags[q])!=0)&&((kk=exclude[k])!=q)&&kk>=0)
					{
						stackpt--;
						if (fixm) endtag(kk);
 						else
						{
							fprintf(fo,"\n*** Tag %s inside of tag %s at position %ld (line %i)",tags[q],tags[kk],total,lines);
							errors++;
						}
					}
					if ((k=exclude_tags[q])!=0) exclude[k]=q;
					reg[q]=total,regl[q]=lines;    /* reg keeps track of the position regl of the line when tags are open (used for error messages) */
					if (fixm)
					{
						fprintf(fn,"%s",&longcom[0]);
						stack[stackpt++]=q;
					}
				}
				if ((q<nt)&&repeat_tags[q]&&fixm) fprintf(fn,"%s",&longcom[0]);
				if ((q>=nt)&&(q<100)&&repeat_tags[q-nt]&&fixm) fprintf(fn,"%s",&longcom[0]);
 
				if ((q>=nt)&&(q<100)&&!repeat_tags[q-nt])   /* if an end tag that must not repeat */
				{
					if (!attrib) longcom[spacepos]='>',longcom[spacepos+1]=0;
					doprint=1;
					if (!reg[q-nt])
					{
						if (fixm) doprint=0;
						else
						{
							fprintf(fo,"\n*** Tag %s not started, yet ends at position %ld (line %i)",tags[q-nt],total,lines);
							errors++;
						}
					}
					if ((interior_tags[stack[stackpt-1]]==(q-nt))&&(q-nt))
					{
						if (fixm) endtag(stack[--stackpt]);
						else
						{
							fprintf(fo,"\n***Need to end tag %s before you end tag %s in position %ld (line %i)",tags[stack[stackpt-1]],tags[q-nt],total,lines);
							errors++;
						}
					}
					if (((k=stack[--stackpt])!=(q-nt))&&stackpt&&doprint&&fixm)
					{
						if (stack[stackpt-1]==(q-nt))
						{
							endtag(stack[stackpt]);
							endtag(stack[stackpt-1]);
							doprint=0;
						}
						else
						{
							fprintf(fo,"\n*** Notmatching Stack expecting %s, but got %s",tags[k],tags[q-nt]);
							errors++;
						}
					}
					if (doprint&&fixm&&(spend<0)) fprintf(fn,"%s",&longcom[0]);
					if (doprint&&fixm&&(spend>=0)) endtag(spend);
					if ((k=exclude_tags[q-nt])!=0) exclude[k]=-1;
					reg[q-nt]=0,regl[q-nt]=0,spend=-1;
				}
			j=0,enclosef=0;
			while (j<nt) if (reg[j++]) enclosef=1;
			}
			if (compos==65)
			{
				fprintf(fo,"\nUnmatched Command:\n"); k=0;
				while (k<65) fprintf(fo,"%c",combuf[k++]);
				unmatched++,mode=0;
			}
		}
		if ((c>32)&&!mode)
		{           /* a printable character, update the multiple CR array */
			if (multiple>10) multiple=10;
			if (multiple) mult[multiple]++;
			multiple=0,curl++;
			if (c!='>') enclose[enclosef]++;
		}
		if (((c<32)||(c>126))&&(!count[c])) specl[c]=lines,spec[c]=total;  /* keep track of the first use of special characters */
		if ((prev==38)&&(c==32)) ampcnt++;
		count[c]++,prev=c;    /* keep track of character usage */
 
		if (c==10)
		{
			multiple++;
			if (cnt>200) lline++;       /* count of long lines */
			if (cnt<90) sline++;       /*   count of short lines */
			if (curl>maxl) maxl=curl;	/* What is the length of longest line? */
			cnt=0,curl=0;
		}
	}
	if (fixm)
	{
		if (stackpt>5) stackpt=5;
		while (stackpt>0)
		{
			j=stack[--stackpt];
			endtag(j);
			reg[j]=0;
		}
	}
	q=0;
	while (q<(ntags))
	{
		if (reg[q]&&!repeat_tags[q])
		{
			if (fixm) endtag(q);
			else
			{
				fprintf(fo,"\n*** Tag %s Started at position %ld (line %i) but never finished",tags[q],reg[q],regl[q]);
				errors++;
			}
		}
		q++;
	}
	if (!errors&&!fixm)
{
	fsetpos(fi,0);
	rewind(fi);
	fprintf(fo,"\n\n--------------- Making a Second Pass   --------------");
	cnt=0,mode=0,total=0,lines=1;
	while ((c=newgetc())!=EOF)
	{                              /* the second pass uses a stack for more subtle error detection */
		total++,cnt++;
		if (c==10) lines++;
		if (c=='<') mode=1,compos=0;
		if (mode==1)
		{
			if (c==' ')
			{
				j=0;
				while (((c=newgetc())!=EOF)&&(c!='>')&&(j++!=70)) {}
				c='>';
			}
			combuf[compos++]=c,combuf[compos]=0;
			if (c=='>')
			{
				q=fincom();
				if ((q<nt)&&!repeat_tags[q])
				{
					stack[stackpt++]=q;
				}
				if ((q>=nt)&&(q<100)&&!repeat_tags[q-nt])
				{
					if ((k=stack[--stackpt])!=(q-nt)) {fprintf(fo,"\n*** Tags %s and %s Interlaced at position %ld (lines %i)",tags[k],tags[q-nt],total,lines); errors++;}
				}
			}
		}
	}
}
 
	if (!countar[0]) {
		fprintf(fo,"\n*** No Paragraph Tags <para> -- Highly Unlikely");
		fprintf(fo,"\n    You may need to use the /p option to put in paragraph tags.");
		errors++;}
	combuf[0]='<',combuf[1]='h',combuf[2]='1',combuf[3]='>',combuf[4]=0;
	fincom();
	j=countar[q]+countar[q+1]+countar[q+2]+countar[q+3]+countar[q+4]+countar[q+5];
	if (!j) {fprintf(fo,"\n*** No Heading Tags <h1> <h2> <h3> etc. -- Highly Unlikely"); errors++;}
	fprintf(fo,"\n\nTotal Tag Pattern Errors Located: %i",errors);
	if (errors) fprintf(fo,"\n\nTag Pattern Errors can be repaired by the /f fix option");
	fprintf(fo,"\n\n---------------     Character Usage     ---------------");
	if ((maxl<90)&&maxl)
		{
		fprintf(fo,"\n\n*** Maximum line length of %i",maxl);
		fprintf(fo,"\n    This file probably contains soft carriage returns.");
		fprintf(fo,"\n    Fix the file using the /c option to strip off soft returns.\n");
		errors++;
		}
	if (!maxl)
		{
		fprintf(fo,"\n\n*** No line feeds in the file");
		fprintf(fo,"\n    This is probably a Macintosh file.");
		fprintf(fo,"\n    Fix the file using the /m option to change it from Mac to PC.\n");
		errors++;
	}
	if (unmatched) {fprintf(fo,"\n\n***** Bad File -- %i Start tag (<) characters without end tag (>) characters",unmatched); errors++;}
	j=0,count[13]=0; while (j<32)
	{
		if ((j==9)||(j==10)||(j==26)) count[j]=0;
		if (count[j]) {fprintf(fo,"\n*** Control-%c used %i Times, first at line %i, position %i",j+64,count[j],specl[j],spec[j]); errors++;}
		if ((j==12)&&count[12]) {fprintf(fo,"\n*** Control-L is form feed.");
       fprintf(fo,"\n    This file is probably a print image file. Unacceptable"); errors++;}
		if ((j==27)&&count[27]) {fprintf(fo,"\n*** Character <esc> is the escape character.\n   This file is probably a print image file. Unacceptable ***"); errors++;}
		j++;
	}
	j=127; while (j<256)
	{
		if (count[j]) {fprintf(fo,"\n*** Character %i (%c) used %i Times, first at line %i, position %i",j,j,count[j],specl[j],spec[j]); errors++;}
		if ((j==202)&&count[j]) fprintf(fo,"\n    Character 202 is used in Macintosh files, fix with the /m option.");		
		if ((j==209)&&count[j]) fprintf(fo,"\n    Character 209 is used in Macintosh files, fix with the /m option.");		
		if ((j==210)&&count[j]) fprintf(fo,"\n    Character 210 is used in Macintosh files, fix with the /m option.");
		if ((j==211)&&count[j]) fprintf(fo,"\n    Character 211 is used in Macintosh files, fix with the /m option.");		
		if ((j==212)&&count[j]) fprintf(fo,"\n    Character 212 is used in Macintosh files, fix with the /m option.");		
		if ((j==213)&&count[j]) fprintf(fo,"\n    Character 213 is used in Macintosh files, fix with the /m option.");		
	j++;
	}
	j=count[61]+count[64]+count[92]+count[95]+count[96]+count[123]+count[124]+count[125]+count[126];
	if (j) fprintf(fo,"\n\nThis file contains some characters that do not usually appear in standard text.");
	if (j) fprintf(fo,"\nThese characters may be the residue of previous markup. Check them carefully");
	if (j) fprintf(fo,"\nto see if the characters are legal contents or are inapproppriate markup.");
	if (count[61]) fprintf(fo,"\n    Warning: equal sign (=) appears %i times",count[61]);
	if (count[64]) fprintf(fo,"\n    Warning: at sign (@) appears %i times",count[64]);
	if (count[92]) fprintf(fo,"\n    Warning: back slash (\\) appears %i times",count[92]);
	if (count[94]) fprintf(fo,"\n    Warning: caret (^) appears %i times",count[94]);
	if (count[95]) fprintf(fo,"\n    Warning: underbar (_) appears %i times",count[95]);
	if (count[96]) fprintf(fo,"\n    Warning: grave accent (`) appears %i times",count[96]);
	if (count[96]) fprintf(fo,"\n    Perhaps grave accent should be apostrope (') or quote (%c).",34);
	if (count[124]) fprintf(fo,"\n    Warning: vertical bar (|) appears %i times",count[124]);
	if (count[126]) fprintf(fo,"\n    Warning: tilde (~) appears %i times",count[126]);
	if (count[123]) fprintf(fo,"\n    Warning: open brace ({) appears %i times",count[123]);
	if (count[125]) fprintf(fo,"\n    Warning: close brace (}) appears %i times",count[125]);
		if (gtcnt) {fprintf(fo,"\n\n*** On %i occassions, the > character is in the text instead of &gt;",gtcnt); errors++;}
		if (ampcnt) {fprintf(fo,"\n\n*** On %i occasions, the & character is in the text instead of &amp;",ampcnt); errors++;}
	fprintf(fo,"\n");
	j=2,k=0; while (j<10)
	{
		if (mult[j]) {fprintf(fo,"\n    Warning: On %i occasions, there are %i returns in a row.",mult[j],j); k++;}
		j++;
	}
	if (mult[10]) {fprintf(fo,"\n    Warning: On %i occasions, there are more than 9 returns in a row.",mult[10]); k++;}
	if (k) fprintf(fo,"\n    By way of comparison, on %i occasions, there are single returns.",mult[1]);
	if (k) fprintf(fo,"\n    While there is nothing technically wrong with multiple returns,");
	if (k) fprintf(fo,"\n    they may be indications of a file which does not conform to the");
	if (k) fprintf(fo,"\n    ICADD/Texas standards.");
	if ((10*enclose[0])>enclose[1])
		{fprintf(fo,"\n\n*** Over 10 percent of text not enclosed in legal tags.");
		fprintf(fo,"\n    Fix the file with the /p option to turn untagged material");
	   fprintf(fo,"\n    into <para> tags. Do this only if everything else is properly tagged.\n");
		errors++;}
	if (enclose[0]) fprintf(fo,"\n\n %ld characters are not enclosed in tags",enclose[0]);
	if (enclose[0]) fprintf(fo,"\n %ld characters are enclosed in tags",enclose[1]);
	fprintf(fo,"\n\n---------------      Tag Census       ---------------");
	j=0;
	while (indx[j]!=0)
	{
		pt=indx[j],q=0,legal=0;
		while (q<ntags)
		{
			k=0,kk=0;
			while (bigar[pt+k])
			{
				if ((k==1)&&(bigar[pt+k]=='/')) k=2;
				if (tags[q][kk]=='~') {legal=1; goto qdone;}
				if ((tags[q][kk]!=bigar[pt+k])&&((tags[q][kk]-32)!=bigar[pt+k])) goto nope;
				k++,kk++;
			}
			legal=1;
			goto qdone;
			nope: q++;
		}
		qdone: fprintf(fo,"\n");
		if (legal==0) {fprintf(fo,"*** Not legal "); errors++,crim++;}
		fprintf(fo,"Tag ");
		k=0;
		while (bigar[pt+k]) fprintf(fo,"%c",bigar[pt+k++]);
		fprintf(fo," occurs %i times",countar[100+j]);
		j++;
	}
	j=0;
	while (j<nt)
	{
		if (countar[j])
		{
			fprintf(fo,"\nLegal Tag ");
			k=0;
			while (tags[j][k]) fprintf(fo,"%c",tags[j][k++]);
			fprintf(fo," occurs %i times",countar[j]);
		}
		if (countar[j+nt])
		{
			fprintf(fo,"\nLegal Tag </");
			k=1;
			while (tags[j][k]) fprintf(fo,"%c",tags[j][k++]);
			fprintf(fo," occurs %i times",countar[j+nt]);
		}
 
		j++;
	}
	if (crim) {fprintf(fo,"\n\n**** You can get rid of illegal tags by fixing the file");
		fprintf(fo,"\nusing the /g option. You probably will have to modify IMPROVE.RUL");
		fprintf(fo,"\nto change some tags and to strip out other tags. See the documentation");
		fprintf(fo,"\nfor further guidance.");}
	fprintf(fo,"\n\n **** TOTAL ERRORS DETECTED = %i\n",errors);
	fclose(fi);
	if (realo) fclose(fo);
	if (fixm) fclose(fn);
	if (fixm)
	{
		sprintf(dos,"global %s qztemp.txt cleancr.rul /s",argv[2]);
		system(dos);
		sprintf(dos,"copy  qztemp.txt %s >nul",argv[2]);
		system(dos);
	}
	if (glob||manycr||fixm) sprintf(dos,"del qztemp.txt");
	if (glob||manycr||fixm) system(dos);
 
}
 
/* fincom works out the tag mechanics. It uses getindex. If the returned
   from getindex is 100, it puts the Illegal tag in the getarr, and returns
   an index greater than 100. The array 'indx' is used as a pointer into
   'bigar'. 'nextpos' contains the next free spot in 'bigar'. */
 
int fincom()
{
	mode=0;
	index=getindex();
	if (index<100) {return(index);}
	index=0,mode=0,compos=0;
loop: pt=indx[index];
	if (pt==0) goto notfound;
	j=0;
	while (combuf[j])
	{
		if (combuf[j]!=bigar[pt+j]) {index++; goto loop;}
		j++;
	}
found: index=index+100;
	 return(index);
 
notfound:	indx[index]=nextpos,indx[index+1]=0;
	j=0;
	while (combuf[j]) {bigar[nextpos+j]=combuf[j++],bigar[nextpos+j]=0;}
	nextpos=nextpos+j+1,index=index+100;
	return(index);
}
 
/* getindex looks at the string in combuf and decides what sequence number to
   give it. A value of 100 means that it is not in the list of legal tags
   found in the array "tags". Note that if a slash is present, it increases
   the return value by nt (presently 48). Thus <h1> is assigned '8' (the
   sequence in "tags", but </h1> is assigned 8+nt=56.          */
 
int getindex()
{
	q=0,legal=0,slash=0,spend=-1;
	while (q<ntags)
		{
			k=0,kk=0;
			while (combuf[k])
			{
				if ((k==1)&&(combuf[k]=='/')) k=2,slash=1;
				if ((tags[q][kk]!=combuf[k])&&((tags[q][kk]-32)!=combuf[k])) goto nope;
				k++,kk++;
			}
			legal=1;
			goto qdone;
			nope: q++;
		}
	qdone: if (legal) return(q+nt*slash);
	if (slash&&!combuf[3]) {spend=stack[stackpt-1]; return(spend+nt);}
	return (100);
}
 
void begtag(int j)
{
	fprintf(fn,"%s",tags[j]);
	reg[j]=total,regl[j]=lines;
	if ((kk=exclude_tags[j])!=0) exclude[kk]=j;
	jj=0,enclosef=0;
	while (jj<nt) if (reg[jj++]) enclosef=1;
}
 
void endtag(int j)
	{
		fprintf(fn,"</%s",&tags[j][1]);
		reg[j]=0,regl[j]=0;
		if ((kk=exclude_tags[j])!=0) exclude[kk]=-1;
		jj=0,enclosef=0;
		while (jj<nt) if (reg[jj++]) enclosef=1;
	}
 
int newgetc(void)
	{
		static char *ngc;
		if (!mactxt&&!highbit) {c=getc(fi); return(c);}
		if (ngc)
			if (ngc[1]) return(unsigned char)*++ngc;
			else ngc=NULL;
	do {
		c=getc(fi); ngc=NULL;
		if (c==EOF) return (c);
		if (c<9) c=255;
		if (c==11) c=255;
		if ((c>13)&&(c<32)) c=255;
		if (mactxt)
			{
				if (c>127) c=(unsigned char)*(ngc=macintosh[c&127]);
				else if (c==13) c=10;
			}
		if (highbit)
			{
				if (c>127) c=(unsigned char)*(ngc=pchigh[c&127]);
			}
		} while (!c);
	return (c);
	}
 
