/*C_HEADER_FILE****************************************************************
FILE			:	XVocabLib.c
DESC			:	Functions needed to build a binary recognizer for the XBox
TABS			:	3
OWNER			:	Fonix
DATE CREATED:	19 April 2002

(C) Copyright 2001 All rights reserved.
This is an unpublished work, and is confidential and proprietary: 
technology and information of fonix corporation.  No part of this
code may be reproduced, used or disclosed without written consent of 
fonix corporation in each and every instance.

  $Date: 7/30/03 6:15p $
  $Revision: 8 $

*END_HEADER*******************************************************************/
#include "stdafx.h"
#include <stdio.h>
#include <string.h>
#include "FnxPlatform.h"
#include "language.h"
//#include "fnxexstdlib.h"
//#include "FnxMachine.h"
//#include "FnxStdlib.h"

#define HIRAGANA_BOTTOM 0x3041
#define HIRAGANA_TOP 0x3093

#define KATAKANA_BOTTOM 0x30A1
#define KATAKANA_TOP 0x30F6

#define convertToOffset(a) (a < 0xFF00) ? ((0x309F > a) ? a - 0x3041 : a - 0x30a1) : a - 0xff66


conversion HalfwidthKatToFullKat[] =
{
   {0xFF66, 0x30F2}, //HALFWIDTH KATAKANA LETTER WO 
   {0xFF67, 0x30A1}, //HALFWIDTH KATAKANA LETTER SMALL A 
   {0xFF68, 0x30A3}, //HALFWIDTH KATAKANA LETTER SMALL I 
   {0xFF69, 0x30A5}, //HALFWIDTH KATAKANA LETTER SMALL U 
   {0xFF6A, 0x30A7}, //HALFWIDTH KATAKANA LETTER SMALL E 
   {0xFF6B, 0x30A9}, //HALFWIDTH KATAKANA LETTER SMALL O 
   {0xFF6C, 0x30E3}, //HALFWIDTH KATAKANA LETTER SMALL YA 
   {0xFF6D, 0x30E5}, //HALFWIDTH KATAKANA LETTER SMALL YU 
   {0xFF6E, 0x30E7}, //HALFWIDTH KATAKANA LETTER SMALL YO 
   {0xFF6F, 0x30C3}, //HALFWIDTH KATAKANA LETTER SMALL TU 
   {0xFF70, 0x30FC}, //HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 
   {0xFF71, 0x30A2}, //HALFWIDTH KATAKANA LETTER A 
   {0xFF72, 0x30A4}, //HALFWIDTH KATAKANA LETTER I 
   {0xFF73, 0x30A6}, //HALFWIDTH KATAKANA LETTER U 
   {0xFF74, 0x30A8}, //HALFWIDTH KATAKANA LETTER E 
   {0xFF75, 0x30AA}, //HALFWIDTH KATAKANA LETTER O 
   {0xFF76, 0x30AB}, //HALFWIDTH KATAKANA LETTER KA 
   {0xFF77, 0x30AD}, //HALFWIDTH KATAKANA LETTER KI 
   {0xFF78, 0x30AF}, //HALFWIDTH KATAKANA LETTER KU 
   {0xFF79, 0x30B1}, //HALFWIDTH KATAKANA LETTER KE 
   {0xFF7A, 0x30B3}, //HALFWIDTH KATAKANA LETTER KO 
   {0xFF7B, 0x30B5}, //HALFWIDTH KATAKANA LETTER SA 
   {0xFF7C, 0x30B7}, //HALFWIDTH KATAKANA LETTER SI 
   {0xFF7D, 0x30B9}, //HALFWIDTH KATAKANA LETTER SU 
   {0xFF7E, 0x30BB}, //HALFWIDTH KATAKANA LETTER SE 
   {0xFF7F, 0x30BD}, //HALFWIDTH KATAKANA LETTER SO 
   {0xFF80, 0x30BF}, //HALFWIDTH KATAKANA LETTER TA 
   {0xFF81, 0x30C1}, //HALFWIDTH KATAKANA LETTER TI CHI
   {0xFF82, 0x30C2}, //HALFWIDTH KATAKANA LETTER TU 
   {0xFF83, 0x30C6}, //HALFWIDTH KATAKANA LETTER TE 
   {0xFF84, 0x30C8}, //HALFWIDTH KATAKANA LETTER TO 
   {0xFF85, 0x30CA}, //HALFWIDTH KATAKANA LETTER NA 
   {0xFF86, 0x30CB}, //HALFWIDTH KATAKANA LETTER NI 
   {0xFF87, 0x30CC}, //HALFWIDTH KATAKANA LETTER NU 
   {0xFF88, 0x30CD}, //HALFWIDTH KATAKANA LETTER NE 
   {0xFF89, 0x30CE}, //HALFWIDTH KATAKANA LETTER NO 
   {0xFF8A, 0x30CF}, //HALFWIDTH KATAKANA LETTER HA 
   {0xFF8B, 0x30D2}, //HALFWIDTH KATAKANA LETTER HI 
   {0xFF8C, 0x30D5}, //HALFWIDTH KATAKANA LETTER HU 
   {0xFF8D, 0x30D8}, //HALFWIDTH KATAKANA LETTER HE 
   {0xFF8E, 0x30DB}, //HALFWIDTH KATAKANA LETTER HO 
   {0xFF8F, 0x30DE}, //HALFWIDTH KATAKANA LETTER MA 
   {0xFF90, 0x30DF}, //HALFWIDTH KATAKANA LETTER MI 
   {0xFF91, 0x30DF}, //HALFWIDTH KATAKANA LETTER MU 
   {0xFF92, 0x30E1}, //HALFWIDTH KATAKANA LETTER ME 
   {0xFF93, 0x30E2}, //HALFWIDTH KATAKANA LETTER MO 
   {0xFF94, 0x30E4}, //HALFWIDTH KATAKANA LETTER YA 
   {0xFF95, 0x30E6}, //HALFWIDTH KATAKANA LETTER YU 
   {0xFF96, 0x30E8}, //HALFWIDTH KATAKANA LETTER YO 
   {0xFF97, 0x30E9}, //HALFWIDTH KATAKANA LETTER RA 
   {0xFF98, 0x30EA}, //HALFWIDTH KATAKANA LETTER RI 
   {0xFF99, 0x30EB}, //HALFWIDTH KATAKANA LETTER RU 
   {0xFF9A, 0x30EC}, //HALFWIDTH KATAKANA LETTER RE 
   {0xFF9B, 0x30ED}, //HALFWIDTH KATAKANA LETTER RO 
   {0xFF9C, 0x30EF}, //HALFWIDTH KATAKANA LETTER WA 
   {0xFF9D, 0x30F3}, //HALFWIDTH KATAKANA LETTER N 
   {0xFF9E, 0x30FD}, //HALFWIDTH KATAKANA VOICED SOUND MARK 
   {0xFF9F, 0x30FE}, //HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK 
};



language KatHir[] =
{
	{0x3041, "A\0\0\0"}, // small A
	{0x3042, "A\0\0\0"},
	{0x3043, "I\0\0\0"}, // small I
	{0x3044, "I\0\0\0"},
	{0x3045, "U\0\0\0"}, // small U
	{0x3046, "U\0\0\0"},
	{0x3047, "E\0\0\0"}, // small E
	{0x3048, "E\0\0\0"},
	{0x3049, "O\0\0\0"}, // small O
	{0x304A, "O\0\0\0"},
	{0x304B, "KA\0\0"},
	{0x304C, "GA\0\0"},
	{0x304D, "KI\0\0"},
	{0x304E, "GI\0\0"},
	{0x304F, "KU\0\0"},
	{0x3050, "GU\0\0"},
	{0x3051, "KE\0\0"},
	{0x3052, "GE\0\0"},
	{0x3053, "KO\0\0"},
	{0x3054, "GO\0\0"},
	{0x3055, "SA\0\0"},
	{0x3056, "ZA\0\0"},
	{0x3057, "SHI\0"},  // SI
	{0x3058, "JI\0\0"}, // JI
	{0x3059, "SU\0\0"},
	{0x305A, "ZU\0\0"},
	{0x305B, "SE\0\0"},
	{0x305C, "ZE\0\0"},
	{0x305D, "SO\0\0"},
	{0x305E, "ZO\0\0"},
	{0x305F, "TA\0\0"},
	{0x3060, "DA\0\0"},
	{0x3061, "CHI\0"},
	{0x3062, "JI\0\0"},
	{0x3063, "TSU\0"}, // TODO: small TU
	{0x3064, "TSU\0"}, // TODO: Based on JIS X 0208 changed form ZU
	{0x3065, "DU\0\0"},
	{0x3066, "TE\0\0"},
	{0x3067, "DE\0\0"},
	{0x3068, "TO\0\0"},
	{0x3069, "DO\0\0"},
	{0x306A, "NA\0\0"},
	{0x306B, "NI\0\0"},
	{0x306C, "NU\0\0"},
	{0x306D, "NE\0\0"},
	{0x306E, "NO\0\0"},
	{0x306F, "HA\0\0"},
	{0x3070, "BA\0\0"},
	{0x3071, "PA\0\0"},
	{0x3072, "HI\0\0"},
	{0x3073, "BI\0\0"},
	{0x3074, "PI\0\0"},
	{0x3075, "FU\0\0"},
	{0x3076, "BU\0\0"},
	{0x3077, "PU\0\0"},
	{0x3078, "HE\0\0"},
	{0x3079, "BE\0\0"},
	{0x307A, "PE\0\0"},
	{0x307B, "HO\0\0"},
	{0x307C, "BO\0\0"},
	{0x307D, "PO\0\0"},
	{0x307E, "MA\0\0"},
	{0x307F, "MI\0\0"},
	{0x3080, "MU\0\0"},
	{0x3081, "ME\0\0"},
	{0x3082, "MO\0\0"},
	{0x3083, "YA\0\0"}, // small YA
	{0x3084, "YA\0\0"},
	{0x3085, "YU\0\0"}, // small YU
	{0x3086, "YU\0\0"},
	{0x3087, "YO\0\0"}, // small YO
	{0x3088, "YO\0\0"},
	{0x3089, "RA\0\0"},
	{0x308A, "RI\0\0"},
	{0x308B, "RU\0\0"},
	{0x308C, "RE\0\0"},
	{0x308D, "RO\0\0"},
	{0x308E, "WA\0\0"}, // small WA
	{0x308F, "WA\0\0"},
	{0x3090, "WI\0\0"}, //??
	{0x3091, "WE\0\0"}, //??
	{0x3092, "O\0\0\0"},
	{0x3093, "N\0\0\0"},
	{0x3094, "VU\0\0"},
	{0x3095, "KA\0\0"}, // small KA
   {0x3096, "KE\0\0"},  // small KE
   {0x3097, "VA\0\0"},  // filler
   {0x3098, "VI\0\0"},  // filler
   {0x3099, "VE\0\0"},  // filler
   {0x309A, "VO\0\0"},  // filler
   {0x309B, "\xB7\0\0\0"},  // Katakana-hiragana voiced sound mark
   {0x309C, "-\0\0\0"},  // Katakana-hiragana semi-voiced sound mark
   {0x309D, "-\0\0\0"},  // filler
   {0x309E, "-\0\0\0"},  // filler
};

// Seperation of characters
// http://www.unicode.org/reports/tr28/#10_4_hangul
//http://h30097.www3.hp.com/docs/base_doc/DOCUMENTATION/V50_HTML/SUPPDOCS/KOREADOC/KOREACH2.HTM#ch2sect3
languageKorean HangulLetters[] =
{
	{0x3131, "K\0\0\0\0\0"},
	{0x3132, "KK\0\0\0\0"},
	{0x3133, "KS\0\0\0\0"},
	{0x3134, "N\0\0\0\0\0"},
	{0x3135, "NJ\0\0\0\0"},
	{0x3136, "NH\0\0\0\0"},
	{0x3137, "T\0\0\0\0\0"},
	{0x3138, "TT\0\0\0\0"},
	{0x3139, "R\0\0\0\0\0"},
	{0x313A, "LG\0\0\0\0"},
	{0x313B, "LM\0\0\0\0"},
	{0x313C, "LB\0\0\0\0"},
	{0x313D, "LS\0\0\0\0"},
	{0x313E, "LT\0\0\0\0"},
	{0x313F, "LP\0\0\0\0"},
	{0x3140, "LH\0\0\0\0"},
	{0x3141, "M\0\0\0\0\0"},
	{0x3142, "P\0\0\0\0\0"},
	{0x3143, "PP\0\0\0\0"},
	{0x3144, "PS\0\0\0\0"},
	{0x3145, "S\0\0\0\0\0"},
	{0x3146, "SS\0\0\0\0"},
	{0x3147, "NG\0\0\0\0"},
	{0x3148, "J\0\0\0\0\0"},
	{0x3149, "JJ\0\0\0\0"},
	{0x314A, "CH\0\0\0\0"},
	{0x314B, "K\0\0\0\0\0"},
	{0x314C, "T\0\0\0\0\0"},
	{0x314D, "P\0\0\0\0\0"},
	{0x314E, "H\0\0\0\0\0"},
	{0x314F, "A\0\0\0\0\0"},
	{0x3150, "AE\0\0\0\0"},
	{0x3151, "YA\0\0\0\0"},
	{0x3152, "YAE\0\0\0"},
	{0x3153, "EO\0\0\0\0"},
	{0x3154, "E\0\0\0\0\0"},
	{0x3155, "YEO\0\0\0"},
	{0x3156, "YE\0\0\0\0"},
	{0x3157, "O\0\0\0\0\0"},
	{0x3158, "WA\0\0\0\0"},
	{0x3159, "WAE\0\0\0"},
	{0x315A, "OE\0\0\0\0"},
	{0x315B, "YO\0\0\0\0"},
	{0x315C, "U\0\0\0\0\0"},
	{0x315D, "WO\0\0\0\0"},
	{0x315E, "WE\0\0\0\0"},
	{0x315F, "WI\0\0\0\0"},
	{0x3160, "YU\0\0\0\0"},
	{0x3161, "EU\0\0\0\0"},
	{0x3162, "YI\0\0\0\0"},
	{0x3163, "I\0\0\0\0\0"},
	{0x3164, " \0\0\0\0\0"},
	{0x3165, "NN\0\0\0\0"},
	{0x3166, "ND\0\0\0\0"},
	{0x3167, "NS\0\0\0\0"},
	{0x3168, "NZ\0\0\0\0"},
	{0x3169, "LGS\0\0\0"},
	{0x316A, "LD\0\0\0\0"},
	{0x316B, "LBS\0\0\0"},
	{0x316C, "LZ\0\0\0\0"},
	{0x316D, "LH\0\0\0\0"},
	{0x316E, "MB\0\0\0\0"},
	{0x316F, "MS\0\0\0\0"},
	{0x3170, "MZ\0\0\0\0"},
	{0x3171, "FM\0\0\0\0"},
	{0x3172, "PG\0\0\0\0"},
	{0x3173, "PD\0\0\0\0"},
	{0x3174, "PTG\0\0\0"},
	{0x3175, "PTD\0\0\0"},
	{0x3176, "PJ\0\0\0\0"},
	{0x3177, "PT\0\0\0\0"},
	{0x3178, "FP\0\0\0\0"},
	{0x3179, "FPP\0\0\0"},
	{0x317A, "TG\0\0\0\0"},
	{0x317B, "TN\0\0\0\0"},
	{0x317C, "TD\0\0\0\0"},
	{0x317D, "TB\0\0\0\0"},
	{0x317E, "TJ\0\0\0\0"},
	{0x317F, "Z\0\0\0\0\0"},
	{0x3180, "NG\0\0\0\0"},
	{0x3181, "NG\0\0\0\0"},
	{0x3182, "NGS\0\0\0"},
	{0x3183, "NGZ\0\0\0"},
	{0x3184, "FP\0\0\0\0"},
	{0x3185, "H\0\0\0\0\0"},
	{0x3186, "H\0\0\0\0\0"},
	{0x3187, "YOYA0\0"},
	{0x3188, "YOYAE\0"},
	{0x3189, "YOI\0\0\0"},
	{0x318A, "YUYEO\0"},
	{0x318B, "YUYE\0\0"},
	{0x318C, "YUI\0\0\0"},
	{0x318D, "EO\0\0\0\0"},
	{0x318E, "EO\0\0\0\0"},
};

languageKorean HangulLetters2[] =
{
	{0x3260, "K"},
	{0x3261, "N"},
	{0x3262, "T"},
	{0x3263, "R"},
	{0x3264, "M"},
	{0x3265, "P"},
	{0x3266, "S"},
	{0x3267, "NG"},
	{0x3268, "J"},
	{0x3269, "CH"},
	{0x326A, "K"},
	{0x326B, "T"},
	{0x326C, "P"},
	{0x326D, "H"},
	{0x326E, "G A"},
	{0x326F, "N A"},
	{0x3270, "D A"},
	{0x3271, "R A"},
	{0x3272, "M A"},
	{0x3273, "B A"},
	{0x3274, "S A"},
	{0x3275, "NG A"},
	{0x3276, "J A"},
	{0x3277, "CH A"},
	{0x3278, "K A"},
	{0x3279, "T A"},
	{0x327A, "P A"},
	{0x327B, "H A"},
};


// Formula for calulating Hongul
//
//
//	 Unicode - 0xAC00 
//     -----------    = Index Initial
//         588
//
//	 Unicode - 0xAC00 
//     -----------    = Index (Remainder = FinalIndex)
//         28
//
//
//   Index Initial
//  --------- = Initial (Remainder = MiddleIndex)
//     21
//
char HangulSyllablesFinal[][4] =
{
	"G\0\0",
	"GG\0",
	"GS\0",
	"N\0\0",
	"NI\0",
	"NH\0",
	"D\0\0",
	"L\0\0",
	"LG\0",
	"LM\0",
	"LB\0",
	"LS\0",
	"LT\0",
	"LP\0",
	"LH\0",
	"M\0\0",
	"B\0\0",
	"BS\0",
	"S\0\0",
	"SS\0",
	"NG\0",
	"J\0\0",
	"C\0\0",
	"K\0\0",
	"T\0\0",
	"P\0\0",
	"H\0\0",
};

char HangulSyllablesMiddle[][5] =
{
	"A\0\0\0",
	"AE\0\0",
	"YA\0\0",
	"YAE\0",
	"EO\0\0",
	"E\0\0\0",
	"YEO\0",
	"YE\0\0",
	"O\0\0\0",
	"WA\0\0",
	"WAE\0",
	"OE\0\0",
	"YO\0\0",
	"U\0\0\0",
	"WEO\0",
	"WE\0\0",
	"WI\0\0",
	"YU\0\0",
	"EU\0\0",
	"YI\0\0",
	"I\0\0\0",
};

char HangulSyllablesInitial[][4] =
{
	"G\0\0",	//<UAC00> "<U3131><U314F>"	% HANGUL SYLLABLE
	"GG\0",	//<UAE4C> "<U3132><U314F>"	% HANGUL SYLLABLE
	"N\0\0",	//<UB098> "<U3134><U314F>"	% HANGUL SYLLABLE
	"D\0\0",	//<UB2E4> "<U3137><U314F>"	% HANGUL SYLLABLE
	"DD\0",	//<UB530> "<U3138><U314F>"	% HANGUL SYLLABLE
	"R\0\0",	//<UB77C> "<U3139><U314F>"	% HANGUL SYLLABLE
	"M\0\0",	//<UB9C8> "<U3141><U314F>"	% HANGUL SYLLABLE
	"B\0\0",	//<UBC14> "<U3142><U314F>"	% HANGUL SYLLABLE
	"BB\0",	//<UBE60> "<U3143><U314F>"	% HANGUL SYLLABLE
	"S\0\0",	//<UC0AC> "<U3145><U314F>"	% HANGUL SYLLABLE
	"SS\0",	//<UC2F8> "<U3146><U314F>"	% HANGUL SYLLABLE
	"\0\0\0",	//<UC544> "<U3147><U314F>"	% HANGUL SYLLABLE
	"J\0\0",	//<UC790> "<U3148><U314F>"	% HANGUL SYLLABLE
	"JJ\0",	//<UC9DC> "<U3149><U314F>"	% HANGUL SYLLABLE
	"C\0\0",	//<UCC28> "<U314A><U314F>"	% HANGUL SYLLABLE
	"K\0\0",	//<UCE74> "<U314B><U314F>"	% HANGUL SYLLABLE
	"T\0\0",	//<UD0C0> "<U314C><U314F>"	% HANGUL SYLLABLE
	"P\0\0",	//<UD30C> "<U314D><U314F>"	% HANGUL SYLLABLE
	"H\0\0",	//<UD558> "<U314E><U314F>"	% HANGUL SYLLABLE
};


/*FUNCTION_HEADER**********************
 * NAME:    ;BlockByteSwap16
 * DESC:    Swap the bytes of each 2-byte integer in an array
 * IN:      pi - Array of 16-bit (2-byte) integers to swap bytes in
            iSize - Number of elements in pi[]
 * OUT:     pi[0..iSize-1] each have their bytes swapped.
 * RETURN:  n/a
 * NOTES:   
 *END_HEADER***************************/
void BlockByteSwap16(UWORDPTR pw, UDWORD iSize)
{
   unsigned int i;
   for (i=0; i< iSize; i++)
   {
      *pw = ((unsigned short) 0x00FFU & *pw >> 8) | ((unsigned short) 0xFF00U & *pw << 8);
      pw++; // some compilers don't handle the *pw++ = F(*pw) well.
   }
}


/*FUNCTION_HEADER*******************
 * NAME:	  ;KataHalfwidthToFull
 * DESC:	  Convert a unicode halfwidth katakana to full katakana.
 * IN:     pwstr - Pointer to wide-character to be converted.
 * OUT:    
 * RETURN: wstr - containing the character translation.
 * NOTES:  
 *END_HEADER***************************/
wchar_t KataHalfwidthToFull(wchar_t *pwchar)
{
   int offset = convertToOffset(*pwchar);
   wchar_t wcTemp = '\0\0';

   // This is just an added protection against a typing error.
   if (*pwchar == HalfwidthKatToFullKat[offset].wszUCSHalfWidthKat)
   {
      wcTemp = HalfwidthKatToFullKat[offset].wszFullKat;
   }
   else
   {
//    Message(NULL,"There was a problem in converting from halfwidth to full Katakana\n");
   }

   return wcTemp;
}


void ConvertChar(wchar_t *pwch)
{
	   if (*pwch >= 0x2330 && *pwch <= 0x237A)
	   {
		    // This should handle JIS that maps to ASCII not romanji
			*pwch = (*pwch + 0x0014) & 0x00FF;
	   }
	   else if (*pwch >= 0x824F && *pwch <= 0x829A)
	   {
		   // This should handl SJIS that maps to ASCII not romanji
		   *pwch &= 0x00FF;
	   }
	   else if (*pwch >= 0x00A6 && *pwch <= 0x00DF)
	   {
			// Convert the Shift-JIS to halfwidth Katakana for processing.
			*pwch = *pwch + 0xFEC0;
	   }
	   else if (*pwch >= 0x8340 && *pwch <= 0x839E)
	   {
			// Convert the Shift-JIS Katakana to Unicode Hiragana for processing.
			*pwch -= 0x529F;
	   }
	   else if (*pwch >= 0x829F && *pwch <= 0x82FC)
	   {
			// Convert the Shift-JIS Hiragana to Unicode Hiragana for processing.
			*pwch -= 0x525E;
	   }


}

#define MAX_PHRASE_LENGTH 2048 // Maximum length a phrase for recognition is allowed to be.

/*FUNCTION_HEADER*******************
 * NAME:	  ;ConvertToAscii
 * DESC:	  Convert a unicode string to ASCII, using language-specific translations when necessary.
 * IN:     wstr - Wide-character string to be converted.
           szbuf - Character string to put the result into.
 * OUT:    *szbuf contains the string in wstr converted to ASCII.
 * RETURN: 0 on success, -1 on error.
 * NOTES:  Currently supported languages:
             English: 16-bit Unicode converted to 8-bit ASCII.
             Japanese: Shift-JIS, halfwidth Katakana, Katakana, and Hirigana Unicode converted to Romanji ASCII.
 *END_HEADER***************************/
int ConvertToAscii(wchar_t *wstr, char *szbuf, BOOL bUnicode)
{

//   int nTimesThrough = 0;
   wchar_t wsBuf[MAX_PHRASE_LENGTH] = {0};
   wchar_t *pwsTemp = wstr;
   char *szTemp = szbuf;
   unsigned char *pszTest = (char *)pwsTemp;
   char cMb;
   int offset = 0;
   int offset1 = 0;
   int strLength = 0;
   int lineLength = 0;
   int result = 0;

   // Lets just test the first char of the unicode value to see if we need to convert
   // the character to unicode. halfwidth Katakana can come in several ways depending 
   // on the character set.
	if ((*pszTest < 0x80  || (*pszTest >= 0xA6 && *pszTest <= 0xDF)) && !bUnicode)
	{

		lineLength = strlen(pszTest);
		// We need to convert the sentence to wide for the convertion to romongi.
		mbstowcs( wsBuf, pszTest, lineLength );
		pwsTemp = wsBuf;
	}
//	else 
//	{
//		BlockByteSwap16(pwsTemp, wcslen(pwsTemp));
//	}


	lineLength = wcslen(pwsTemp);

   while (lineLength)
   {
	   ConvertChar(pwsTemp);

	   // Move the characters that are in the korean range to be processed.
	   // Hangual character range in unicode
		if (*pwsTemp >= 0xAC00 && *pwsTemp <= 0xD7A3)
		{
         // Get the initial offset in the list.
	      int remainderFinal, reminderMiddle, initialIndex, initialTemp, offset;
	      int Unicode = *pwsTemp;
	      
	      offset = Unicode - 0xAC00;

	      // this will give us the Initial value.
	      initialIndex = offset/0x24c; // the index into the initial array

         // Create the index to the final array
	      initialTemp = offset/28; // 28 is the number in the final array

	      // this will give us the final value.
	      remainderFinal = offset%28;

         // Get the index to the middle
	      reminderMiddle = initialTemp % 21; // 21 is the number of elements in the middle

         strcpy(szTemp, HangulSyllablesInitial[initialIndex]);
		   szTemp += strlen(HangulSyllablesInitial[initialIndex]);
         strcpy(szTemp, HangulSyllablesMiddle[reminderMiddle]);
		   szTemp +=  strlen(HangulSyllablesMiddle[reminderMiddle]);
 	      if (remainderFinal)
         {
		      strcpy(szTemp, HangulSyllablesFinal[remainderFinal - 1]);
			  szTemp += strlen(HangulSyllablesFinal[remainderFinal - 1]);
         }
		}
	   else if ((*pwsTemp >= 0x3131) && (*pwsTemp <= 0x318E))
	   {
		   // The start of Hangul letters.
		    int offset = 0x3131 - *pwsTemp;
			strcpy(szTemp, HangulLetters[offset].szRoman);
			szTemp += strlen(HangulLetters[offset].szRoman);
      }

      if (*pwsTemp >= 0xFF10 && *pwsTemp <= 0xFF19)
      {
         // This is halfwidth Katakana numbering system that is used with full Katakana.
         *szTemp = (char)(*pwsTemp - 0x10 )+ 0x30;
         szTemp++;
      }
      else if (*pwsTemp > 0x3041 && *pwsTemp < 0x30FF || *pwsTemp > 0xFF66)
      {
         if (*pwsTemp > 0xFF66)
         {
            // convert from halfwidth to full Katakana
            *pwsTemp = KataHalfwidthToFull(pwsTemp);
         }

         // Check for the sound marks
         // This most often happens with halfwidth.
         if (0x30FD == *pwsTemp)
         {
            char *pszTemp1 = szTemp;
            pszTemp1-=2;
            switch(*pszTemp1)
            {
            case'K':
               *pszTemp1 = 'G';
               break;
            case'S':
               *pszTemp1 = 'J';
               break;
            case 'H':
               *pszTemp1 = 'B';
               break;
            case 'T':
               *pszTemp1 = 'D';
               break;
            }
            pwsTemp++;
            lineLength--;
            continue;
         }
         else if (0x30FE == *pwsTemp)
         {
            char *pszTemp1 = szTemp;
            pszTemp1-=2;
            if (*pszTemp1 == 'H')
            {
               *pszTemp1 = 'P';
            }
            pwsTemp++;
            lineLength--;
            continue;
         }
         else
         {
            offset = convertToOffset(*pwsTemp);
         }


         switch(offset)
         {
               // special conditions
            case 0x0c:  // ki
            case 0x0d:  // gi
            case 0x17:  // ji
            case 0x2a:  // ni
            case 0x31:  // hi
            case 0x32:  // bi
            case 0x33:  // pi
            case 0x3e:  // mi
            case 0x49:  // ri
               // copy the characters over to the list.
               strLength = strlen(KatHir[offset].szRomongi);
               strncpy(szTemp, KatHir[offset].szRomongi, strLength);
               szTemp += strLength;
               pwsTemp++;
               if (*pwsTemp > 0xFF66)
               {
                  // convert from halfwidth to full Katakana
                  *pwsTemp = KataHalfwidthToFull(pwsTemp);
               }
               offset1 = convertToOffset(*pwsTemp);
               if (offset1 == 0x42 || offset1 == 0x44 || offset1 == 0x46)
               {
                  szTemp--;
                  strLength = strlen(KatHir[offset1].szRomongi);
                  strncpy(szTemp, KatHir[offset1].szRomongi, strLength);
                  szTemp += strLength;
               }
               else
               {
                  pwsTemp--;
               }
   
               break;
            case 0x16:  // shi
               // Get next character.
               pwsTemp++;
               if (*pwsTemp > 0xFF66)
               {
                  // convert from halfwidth to full Katakana
                  *pwsTemp = KataHalfwidthToFull(pwsTemp);
               }
               offset1 = convertToOffset(*pwsTemp);
               // copy the characters over to the list.
               if (0x42 == offset1)
               {
                  strcpy(szTemp, "SHYA");
                  szTemp+=4;
               }
               else if (0x44 == offset1)
               {
                  // copy the characters over to the list.
                  strcpy(szTemp, "SHYU");
                  szTemp+=4;
               }
               else if (0x46 == offset1)
               {
                  // copy the characters over to the list.
                  strcpy(szTemp, "SHYO");
                  szTemp+=4;
               }
               else
               {
                  strLength = strlen(KatHir[offset].szRomongi);
                  strncpy(szTemp, KatHir[offset].szRomongi, strLength);
                  szTemp+=strLength;
                  pwsTemp--;
               }
               break;
            case 0x20:  // chi
               // Get next character.
               pwsTemp++;
               if (*pwsTemp > 0xFF66)
               {
                  // convert from halfwidth to full Katakana
                  *pwsTemp = KataHalfwidthToFull(pwsTemp);

               }
               offset1 = convertToOffset(*pwsTemp);

               // copy the characters over to the list.
               if (0x42 == offset1)
               {
                  strcpy(szTemp, "CHYA");
                  szTemp+=4;
               }
               else if (0x44 == offset1)
               {
                  strcpy(szTemp, "CHYU");
                  szTemp+=4;
               }
               else if (0x46 == offset1)
               {
                  strcpy(szTemp, "CHYO");
                  szTemp+=4;
               }
//               else
//               {
                  // copy the characters over to the list.
//                  strcpy(szTemp, "CHI");
//                  szTemp+=2; // This is to compansate for the triple in
//                  pwsTemp--;
//               }
               else
               {
                  strLength = strlen(KatHir[offset].szRomongi);
                  strncpy(szTemp, KatHir[offset].szRomongi, strLength);
                  szTemp+=strLength;
                  pwsTemp--;
               }

               if (offset1 == 0x06) // small E
               {
                  szTemp--;
                  *szTemp = 'E';
                  szTemp++;
                  pwsTemp++;
               }
               break;
            case 0x22: // Double the concenant that follows.
               // Point to the concenant.
               pwsTemp++;
			   ConvertChar(pwsTemp);
               if (*pwsTemp > 0xFF66)
               {
                  // convert from halfwidth to full Katakana
                  *pwsTemp = KataHalfwidthToFull(pwsTemp);
               }
               offset1 = convertToOffset(*pwsTemp);
               *szTemp =  KatHir[offset1].szRomongi[0];
               pwsTemp--;
               szTemp++;
               break;
            default:
               // copy the characters over to the list.
               strLength = strlen(KatHir[offset].szRomongi);
               strncpy(szTemp, KatHir[offset].szRomongi, strLength);
               szTemp+=strLength;
               break;
         }
      }
      else if (iswascii( *pwsTemp))
      {
         wctomb( &cMb, *pwsTemp );
//         if (cMb == 0x0a || cMb == 0x0d)
//         {
//            *szTemp = '\0';
//         }
         if (cMb == 0x09)
            *szTemp = ' ';
         else *szTemp = cMb;
         szTemp++;
      }
      pwsTemp++;
      lineLength--;
   }
   return result;
}


/*FUNCTION_HEADER*******************
 * NAME:	  ;CheckUnicodeHeader
 * DESC:	  Read the first two or three bytes of a Unicode file to see what kind it is.
 * IN:     fp - File to check.
           pbLittleEndian - Address in which to return a flag indicating whether the unicode
             file is little-endian or not.
 * OUT:    *pbLittleEndian is modified (IF the file is unicode).
           The file is advanced beyond the 2- or 3-byte header, if there is one,
           or is rewound to the beginning, if there is not.
 * RETURN: TRUE if the file is unicode, FALSE otherwise.
 * NOTES:  File should be opened and positioned at the very beginning.
 *END_HEADER***************************/
BOOL CheckUnicodeHeader(FILE *fp, BOOL *pbLittleEndian)
{
   BOOL bUnicode=FALSE;
   unsigned char sBuf[4];

   /* Unicode files usually begin with "FF FE" (little-endian), or "FE FF" (big-endian).
      So check to see if we have one of those cases. */
   fread(sBuf, 2, 1, fp);
   if (sBuf[0]==0xFF && sBuf[1]==0xFE)
   {
	   bUnicode = TRUE;
	   *pbLittleEndian=TRUE;
   }
   else if (sBuf[0]==0xFE && sBuf[1]==0xFF)
   {
	   bUnicode = TRUE;
	   *pbLittleEndian=FALSE;
   }
   else
   {
      /* Compressed unicode files begin with a byte before the FFFE/FEFF bytes
         that inidcate what kind of compression is used.  We'll ignore most kinds,
         but two standard kinds we can handle:
           0x0E => This is a regular (uncompressed, UTF-16) unicode file.
           0x0F => This is a UTF-8 unicode file, i.e., it uses 1 byte per character, 
                   probably in ASCII, so just treat it as though it was not unicode.
         See http://www.unicode.org/unicode/reports/tr6/index.html#Signature
      */
      fread(&sBuf[2], 1, 1, fp);
      if (sBuf[1]==0xFF && sBuf[2]==0xFE)
         *pbLittleEndian=TRUE;
      else if (sBuf[1]==0xFE && sBuf[2]==0xFF)
         *pbLittleEndian=FALSE;
      else fseek(fp, 0, SEEK_SET);
      bUnicode=FALSE;
   }
   return bUnicode;
}  /* CheckUnicodeHeader */


/*FUNCTION_HEADER*******************
 * NAME:	  ;ReadWordList
 * DESC:	  Read a file containing lines of the form "<ID#><tab><word or phrase>\n"
 * IN:     sWordList  - Filename of word list file containing lines with 
                          "<ID#><tab><word or phrase>\n" (Ignore any lines beginning with non-numeric characters).
           ppsWords   - Address in which to return a pointer to an array of 
                          strings with the words (or phrases).
           ppiWordIDs - Address in which to return a pointer to an array of 
                          ID#s associated with each element in (*ppsPhrases)[].
 * OUT:    *ppsPhrases and *ppiPhraseID are modified.
 * RETURN: Number of words or phrases read (or -1 on error).
 * NOTES:
 *END_HEADER***************************/
int FnxReadWordList(char *wsWordFile, char *wsWordConvertionFile)
{
   FILE *fp, *fpWrite;                     /* Word list file */
   wchar_t wsBuf[MAX_PHRASE_LENGTH*sizeof(wchar_t)] = {0}; /* Character array to read each line into */
   unsigned char sBuf[MAX_PHRASE_LENGTH] = {'\0'}; /* Character array to read each line into */
   unsigned char sBufRomanji[MAX_PHRASE_LENGTH] = {'\0'}; /* Character array to read each line into */
   int  iNumWords=0;        /* Number of words in pWordList and pIDList */
   SDWORD iNumber = 0;
   BOOL bUnicode;           /* Flag for whether the text file is unicode */
   BOOL bLittleEndian;      /* Flag for whether the unicode file is little-endian */
   BOOL bGrammar = FALSE;

   /* Open the word list file */
   fp = fopen(wsWordFile, "rb");
   if (fp==NULL)
   {
//      Message(NULL,"Error: Could not open word list file '%S'\n", wsWordList);
      return -1;
   }

   fpWrite = fopen(wsWordConvertionFile, "w");

   bUnicode = CheckUnicodeHeader(fp, &bLittleEndian);

   while (bUnicode ? (int)fgetws(wsBuf, MAX_PHRASE_LENGTH, fp) : (int)fgets(sBuf, MAX_PHRASE_LENGTH, fp))
   {
	   if (bUnicode)
      {
#ifdef WORDS_LITTLEENDIAN
         if (!bLittleEndian)
#else
         if (bLittleEndian)
#endif
         {  /* Swap the bytes if different from the host */
            BlockByteSwap16(wsBuf, wcslen(wsBuf));
         }
         ConvertToAscii(wsBuf, sBufRomanji, bUnicode);
      }
		else
		{
//			int strLength;
//			wchar_t * wTemp;
			// We need to check each line to see if it is Unicode or not.
//			if (sBuf[0] < 0x80)
//			{

//				strLength = strlen(sBuf);
//				memset(wsBuf, 0, MAX_PHRASE_LENGTH*sizeof(wchar_t));
				// We need to convert the sentence to wide for the convertion to romongi.
//				mbstowcs( wsBuf,  sBuf, strLength );
//				wTemp = wsBuf;
//			}
//			else
//			{
//				wTemp = (wchar_t *)sBuf;
//				BlockByteSwap16(wTemp, wcslen(wTemp));
//			}
			// sBuf will be over written.
			ConvertToAscii((wchar_t*)sBuf, sBufRomanji, FALSE);
		}

	  if (sBufRomanji[0]==0)
	  {
//		   printf("\n");
         fwrite("\n", sizeof(char), 1, fpWrite);
		   continue; /* Skip blank lines */
	  }

//     printf("Romongi String = %s\n", sBufRomanji);
     fwrite(sBufRomanji, sizeof(char), strlen(sBufRomanji), fpWrite);

	  memset( sBufRomanji, 0, MAX_PHRASE_LENGTH);
	  sBuf[0] = 0;
	  wsBuf[0] = 0;
  
	}  /* while not at end of file */

   /* Close the word list file */
   fclose(fpWrite);
   fclose(fp);
   return 0;
}  /* ReadWordList */

