/*
 * prettify.c
 * Bob Crispen (crispen@hiwaay.net)
 *
 * Takes a file of Ada 83 source code in stdin (or just a single line of a
 * file or paragraph if invoked from an editor) and converts the case
 * according to the following rules:
 *
 *    (1) Reserved words and attributes in lowercase
 *    (2) Identifiers in Mixed_Case
 *
 * Text following comments "--" is copied verbatim
 *
 * I use this in my vi editor to prettify a single line, but putting
 * the following in my .exrc file:
 *
 * map ^V<your fkey> :.! d:\usr\bin\prettify^M
 *
 * XXX One of the lads at work uses Xinotech in a way that seriously
 * screws up the appearance of files.  My first priority will be to
 * get rid of some of the worst of these artifacts (e.g., extra spaces
 * at the ends of lines, extra blank lines and comment lines).
 *
 * XXX I'll put in the identifiers and attributes for Ada 95 when I
 * get around to it.
 *
 * This software is distributed according to the terms of the Gnu
 * General Public License.
 */
#include <stdio.h>
#include <string.h>
#include <ctype.h>

/*
 * If you've got Ada source files with lines > LINE_SIZE chars, you've
 * got more problems than prettify will cure!
 */
#define LINE_SIZE 200
#define A_LITTLE_BIT 100

/* Forward declarations */
void prettify_one (char *buf);
void process_variable (char *token, int tokend);
int is_attribute (char *word);
int is_ada_keyword (char *word);
int is_punc (int c);
void print_subtok (char *subtok);

main ( )
{
    int c;
    int bufptr= 0;
    char *buf = (char *)malloc(LINE_SIZE);
    while (((c=getc(stdin)) != EOF) && (bufptr < LINE_SIZE)) {
	buf[bufptr++] = c;
	if (c=='\n') {
    	    buf[bufptr] = '\0';
    	    prettify_one(buf);
	    bufptr = 0;
	}
    }
    if (bufptr) {
	buf[bufptr] = '\0';
	prettify_one(buf);
    }
    exit(0);
}

/*
 * Perform prettification on one line
 *
 * We assume that a newline will be at the end of the buffer, so that
 * we don't have to check for unprocessed tokens at the end of the routine.
 */
void prettify_one (char *buf)
{
    int tokptr, in_comment, in_string, dont_put, c, i;
    char c1 = ' ';
    int bufptr = 0;
    int newbufptr = 0;
    int may_be_attr = 0;
    char *token = (char *)malloc(LINE_SIZE);
    char *newbuf = (char *)malloc(LINE_SIZE + A_LITTLE_BIT);

    /*
    * Copy from our original buffer to another buffer, cleaning up the
    * spaces around colons, semicolons and parens
    */
    in_string = 0;
    in_comment = 0;
    dont_put = 0;
    while (c=buf[bufptr++]) {
	if (in_comment || in_string) {
	    newbuf[newbufptr++] = c;
	} else {

	    /*
	    * Colons should be preceded and followed by at least 1 space
	    */
	    if ((c == ':') && (!isspace(c1)))
		newbuf[newbufptr++] = ' ';
	    if ((c1 == ':') && (!isspace(c)))
		if (c != '=')
		    newbuf[newbufptr++] = ' ';

	    /*
	    * L parens should not be followed by a space
	    */
	    if ((c1 == '(') && (isspace(c)) && (c != '\n')) {
		c = c1;
		dont_put = 1;
	    }
	    /*
	    * R parens and semicolons should not be preceded by a space
	    */
	    if ((c == ';') || (c == ')')) {
		while (isspace(c1)) {
		    newbufptr--;
		    c1 = newbuf[newbufptr-1];
		}
	    }

	    /*
	    * The first L paren of an expression should be preceded by a space
	    * unless it follows a dot
	    */
	    if ((c == '(') && (!isspace(c1)) && (c1 != '(') && (c1 != '.'))
		newbuf[newbufptr++] = ' ';

	    /*
	    * Copy the character to the new buffer, unless we want to discard it
	    */
	    if (dont_put)
		dont_put = 0;
	    else
		newbuf[newbufptr++] = c;

	}

	/*
	* Inside a comment field or string, just output the characters
	*/
	if ((c == '-') && (c1 == '-'))
	    in_comment = 1;
	if (in_string)
	    in_string = (c != '"');
	else
	    in_string = (c == '"');
	c1 = c;
    }
    newbuf[newbufptr] = '\0';

    /*
    * Process a character at a time out of the second buffer
    */
    tokptr = 0;
    newbufptr = 0;
    in_comment = 0;
    in_string = 0;
    c1 = ' ';
    while ((c=newbuf[newbufptr++]) != '\0') {
	if (in_comment || in_string) {
	    putc(c, stdout);
	} else {
	    /* If this is a punctuation mark or whitespace */
	    if (is_punc(c)) {
		/* If we've built up a token, output it now */
		if (tokptr) {
		    token[tokptr] = '\0';
		    process_token(token, tokptr, may_be_attr);
		    tokptr = 0;
		}
		if (!isspace(c))
		    may_be_attr = (c == '\'');
		putc (c, stdout);
	    } else {
		token[tokptr++] = c;
	    }
	}
	/* Inside a comment field, just output the characters */
	if ((c == '-') && (c1 == '-'))
	    in_comment = 1;
	/* Ditto for inside a string */
	if (in_string)
	    in_string = (c != '"');
	else
	    in_string = (c == '"');
	c1 = c;
    }
}

/*
*   Process a token.  A token is a string of characters bounded by
*   spaces or punctuation (but not underscores).  A token may be
*   a keyword, an attribute name, or a symbolic name.
*/
process_token (char *token, int tokend, int may_be_attr)
{
    char *newtoken;
    int i;
    char c;

    newtoken = (char *)malloc(LINE_SIZE);

    /*
    * Copy the token into newtoken, converting it to lower-case
    */
    for (i = 0; i<tokend; i++) {
	if (isupper(c = token[i]))
	    newtoken[i] = tolower(c);
	else
	    newtoken[i] = c;
    }

    /*
    * Ada keywords and attribute names stay in lower case
    */
    if (is_ada_keyword (newtoken)) {
	for (i = 0; i<tokend; i++) {
	    putc (newtoken[i], stdout);
	}
    } else {
	if (may_be_attr) {
	    if (is_attribute (newtoken)) {
		/*
		 * XXX Here's where you'd convert attributes to uppercase
		 * if that's your coding standard
		 */
		for (i = 0; i<tokend; i++) {
		    putc (newtoken[i], stdout);
		}
	    } else {
		process_variable (newtoken, tokend);
	    }
	} else {
	    process_variable (newtoken, tokend);
	}
    }
}


/*
 * The token is definitely a variable name.  Convert initial letters of
 * all words to uppercase.
 */
void process_variable (char *token, int tokend)
{
    int i, j, initial, subtokptr;
    char c;
    char *subtok = (char *)malloc(LINE_SIZE);

    /*
    * Store each subpart of the variable name (the parts which are separated
    * by underscores) in a buffer, converting the initial character to
    * uppercase.
    */
    subtokptr = 0;
    initial  = 1;
    for (i = 0; i <tokend; i++) {
	c = token[i];
	if (initial) {
	    initial = 0;
	    subtok[subtokptr++] = toupper(c);
	} else {
	    if (c == '_') {
		initial = 1;
		subtok[subtokptr] = '\0';
		/*
		* Print out the subtoken, possibly converting some characters
		* to uppercase if it's one of our recognized abbreviations
		*/
		print_subtok (subtok);
		putc (c, stdout);
		subtokptr = 0;
	    } else {
		subtok[subtokptr++] = c;
	    }
	}
    }
    subtok[subtokptr] = '\0';
    print_subtok (subtok);
}


#define NUM_KEYWORDS 63
char *keywords[NUM_KEYWORDS] = {
    "abort",
    "abs",
    "accept",
    "access",
    "all",
    "and",
    "array",
    "at",
    "begin",
    "body",
    "case",
    "constant",
    "declare",
    "delay",
    "delta",
    "digits",
    "do",
    "else",
    "elsif",
    "end",
    "entry",
    "exception",
    "exit",
    "for",
    "function",
    "generic",
    "goto",
    "if",
    "in",
    "is",
    "limited",
    "loop",
    "mod",
    "new",
    "not",
    "null",
    "of",
    "or",
    "others",
    "out",
    "package",
    "pragma",
    "private",
    "procedure",
    "raise",
    "range",
    "record",
    "rem",
    "renames",
    "return",
    "reverse",
    "select",
    "separate",
    "subtype",
    "task",
    "terminate",
    "then",
    "type",
    "use",
    "when",
    "while",
    "with",
    "xor"
};

/*
 * Is this an Ada 83 keyword?
 */
int is_ada_keyword (char *word)
{
    int i;
    for (i=0; i<NUM_KEYWORDS; i++) {
	if (!strcmp(word, keywords[i]))
	    return (1);
    }
    return(0);
}

#define NUM_ATTRIBUTES 40
char *attributes[NUM_ATTRIBUTES] = {
    "address",
    "aft",
    "base",
    "callable",
    "constrained",
    "count",
    "delta",
    "digits",
    "emax",
    "epsilon",
    "first",
    "first_bit",
    "fore",
    "image",
    "large",
    "last",
    "last_bit",
    "length",
    "machine_emax",
    "machine_emin",
    "machine_mantissa",
    "machine_overflows",
    "machine_radix",
    "machine_rounds",
    "mantissa",
    "pos",
    "position",
    "pred",
    "range",
    "safe_emax",
    "safe_large",
    "safe_small",
    "size",
    "small",
    "storage_size",
    "succ",
    "terminated",
    "val",
    "value",
    "width"
};

/*
 * Is this an attribute?
 */
int is_attribute (char *word)
{
    int i;
    for (i=0; i<NUM_ATTRIBUTES; i++) {
	if (!strcmp(word, attributes[i]))
	    return (1);
    }
    return(0);
}

#define NUM_PUNC 22
char punc[NUM_PUNC] = {
    ' ',
    '\n',
    '\f',
    '\t',
    '"',
    '#',
    '&',
    '\'',
    '(',
    ')',
    '*',
    '+',
    '-',
    '.',
    ',',
    '/',
    ':',
    ';',
    '<',
    '=',
    '>',
    '|'
};

/*
 * Is the character a punctuation mark?
 */
int is_punc (int c)
{
    int i;
    for (i=0; i<NUM_PUNC; i++) {
	if (c == punc[i])
	    return(1);
    }
    return(0);
}

/*
 * Abbreviations for this program which are not to be printed in the
 * standard mixed-case form
 * 
 * This is where you'd put your domain-specific abbreviations.  Hard-coding
 * this, rather than putting it in a file discourages adding abbreviations
 * to the domain, and (in most organizaitons) requires a change request.
 * The idea is to do a little domain engineering.
 */
#define NUM_PROG_ABBREV 68
char *progabbrev[NUM_PROG_ABBREV][2] = {
    {"Ac",	"AC"},
    {"Adc",	"ADC"},
    {"Adf",	"ADF"},
    {"Adi",	"ADI"},
    {"Ads",	"ADS"},
    {"Afcs",	"AFCS"},
    {"Ahrs",	"AHRS"},
    {"Ai",	"AI"},
    {"Aoa",	"AOA"},
    {"Apu",	"APU"},
    {"Biu",	"BIU"},
    {"Buc",	"BUC"},
    {"Cadc",	"CADC"},
    {"Ccip",	"CCIP"},
    {"Cg",	"CG"},
    {"Dc",	"DC"},
    {"Di",	"DI"},
    {"Dme",	"DME"},
    {"Eec",	"EEC"},
    {"Env",	"ENV"},
    {"Epr",	"EPR"},
    {"Epu",	"EPU"},
    {"Ew",	"EW"},
    {"Fc",	"FC"},
    {"Fd",	"FD"},
    {"Fddi",	"FDDI"},
    {"Fs",	"FS"},
    {"Fvr",	"FVR"},
    {"Hf",	"HF"},
    {"Hsi",	"HSI"},
    {"Hud",	"HUD"},
    {"Id",	"ID"},
    {"Ieee",	"IEEE"},
    {"Iff",	"IFF"},
    {"Ii",	"II"},
    {"Ils",	"ILS"},
    {"Ins",	"INS"},
    {"Inu",	"INU"},
    {"Io",	"IO"},
    {"Ios",	"IOS"},
    {"Jfs",	"JFS"},
    {"Kc",	"KC"},
    {"Kva",	"kVA"},
    {"Mhz",	"MHz"},
    {"Nav",	"NAV"},
    {"Oap",	"OAP"},
    {"Phc",	"PHC"},
    {"Psi",	"PSI"},
    {"Pro",	"PRO"},
    {"Pto",	"PTO"},
    {"Rdr",	"RDR"},
    {"Rpm",	"RPM"},
    {"Rvr",	"RVR"},
    {"Sgi",	"SGI"},
    {"Sms",	"SMS"},
    {"Tacan",	"TACAN"},
    {"Tcn",	"TCN"},
    {"Tne",	"TNE"},
    {"Typea",	"TypeA"},
    {"Typeb",	"TypeB"},
    {"Ufc",	"UFC"},
    {"Uhf",	"UHF"},
    {"Vasi",	"VASI"},
    {"Vhf",	"VHF"},
    {"Vis",	"VIS"},
    {"Vme",	"VME"},
    {"Vortac",	"VORTAC"},
    {"Wpn",	"WPN"}
};

/*
 * Print a subtoken, and if it's a recognized abbreviation, print the
 * capitalized version (e.g., Kva -> kVA)
 */
void print_subtok (char *subtok)
{
    int j;
    for (j=0; j<NUM_PROG_ABBREV; j++)
	if (!strcmp(subtok, progabbrev[j][0]))
	    strcpy (subtok, progabbrev[j][1]);
    printf ("%s", subtok);
}
