// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
// PARTICULAR PURPOSE.
//
// Copyright (c) Microsoft Corporation. All rights reserved

/*******************************************************************************
* TtsEngObj.cpp *
*---------------*
*   Description:
*       This module is the main implementation file for the CTTSEngObj class.
*
*******************************************************************************/

//--- Additional includes
#include "stdafx.h"
#include <escarpment.h>
#include "..\m3d.h"
#include "TtsEngObj.h"

//--- Local

/*****************************************************************************
* CTTSEngObj::FinalConstruct *
*----------------------------*
*   Description:
*       Constructor
*****************************************************************************/
HRESULT CTTSEngObj::FinalConstruct()
{

    HRESULT hr = S_OK;

    //--- Init vars
//     m_hVoiceData = NULL;
    // m_pVoiceData = NULL;
    // m_pWordList  = NULL;
    // m_ulNumWords = 0;

    m_pTTS = NULL;
    m_fTTSSpokeSomething = FALSE;
    m_pOutputSite = NULL;
    m_fTempSpeed = 1.0;
    m_fTempVolume = 1.0;


   // initialize
   EscInitialize (L"mikerozak@bigpond.com", 2511603559, 0);

    return hr;
} /* CTTSEngObj::FinalConstruct */

/*****************************************************************************
* CTTSEngObj::FinalRelease *
*--------------------------*
*   Description:
*       destructor
*****************************************************************************/
void CTTSEngObj::FinalRelease()
{


    // delete m_pWordList;

    //if( m_pVoiceData )
   // {
    //    ::UnmapViewOfFile( (void*)m_pVoiceData );
    //}

    //if( m_hVoiceData )
    //{
    //    ::CloseHandle( m_hVoiceData );
    //}


    if (m_pTTS)
       delete m_pTTS;

   MLexiconCacheShutDown (FALSE);
   EscUninitialize ();

} /* CTTSEngObj::FinalRelease */

/*****************************************************************************
* CTTSEngObj::MapFile *
*---------------------*
*   Description:
*       Helper function used by SetObjectToken to map file.  This function
*   assumes that m_cpToken has been initialized.
*****************************************************************************/
#if 0 // not used
HRESULT CTTSEngObj::MapFile( const WCHAR * pszTokenVal,  // Value that contains file path
                            HANDLE * phMapping,          // Pointer to file mapping handle
                            void ** ppvData )            // Pointer to the data
{
    HRESULT hr = S_OK;
    CSpDynamicString dstrFilePath;
    hr = m_cpToken->GetStringValue( pszTokenVal, &dstrFilePath );
    if ( SUCCEEDED( hr ) )
    {
        bool fWorked = false;
        *phMapping = NULL;
        *ppvData = NULL;
        HANDLE hFile;
#ifdef _WIN32_WCE
        hFile = CreateFileForMapping( dstrFilePath, GENERIC_READ,
                                      FILE_SHARE_READ, NULL, OPEN_EXISTING,
                                      FILE_ATTRIBUTE_NORMAL, NULL );
#else
        hFile = CreateFile( CW2T(dstrFilePath), GENERIC_READ,
                            FILE_SHARE_READ, NULL, OPEN_EXISTING,
                            FILE_ATTRIBUTE_NORMAL, NULL );
#endif
        if (hFile != INVALID_HANDLE_VALUE)
        {
            *phMapping = ::CreateFileMapping( hFile, NULL, PAGE_READONLY, 0, 0, NULL );
            if (*phMapping)
            {
                *ppvData = ::MapViewOfFile( *phMapping, FILE_MAP_READ, 0, 0, 0 );
                if (*ppvData)
                {
                    fWorked = true;
                }
            }
            ::CloseHandle( hFile );
        }
        if (!fWorked)
        {
            hr = HRESULT_FROM_WIN32(::GetLastError());
            if (*phMapping)
            {
                ::CloseHandle(*phMapping);
                *phMapping = NULL;
            }
        }
    }
    return hr;
} /* CTTSEngObj::MapFile */
#endif // 0


//
//=== ISpObjectWithToken Implementation ======================================
//

/*****************************************************************************
* CTTSEngObj::SetObjectToken *
*----------------------------*
*   Description:
*       This function performs the majority of the initialization of the voice.
*   Once the object token has been provided, the filenames are read from the
*   token key and the files are mapped.
*****************************************************************************/
STDMETHODIMP CTTSEngObj::SetObjectToken(ISpObjectToken * pToken)
{
    HRESULT hr = SpGenericSetObjectToken(pToken, m_cpToken);
    if (FAILED(hr))
       return hr;

    CSpDynamicString dstrFilePath;
    hr = m_cpToken->GetStringValue( L"VoiceData", &dstrFilePath );
    if (FAILED(hr))
       return hr;

    // load in TTS
    if (m_pTTS)
       delete m_pTTS;
    m_pTTS = new CMTTS;
    if (!m_pTTS)
       return E_FAIL;
    if (!m_pTTS->Open (dstrFilePath.m_psz, TRUE)) {
         // Passing in TRUE so that won't use TTS engine cache, which isn' thread safe
       delete m_pTTS;
       m_pTTS = NULL;
       return E_FAIL;
    }


    return hr;
} /* CTTSEngObj::SetObjectToken */

//
//=== ISpTTSEngine Implementation ============================================
//

/*****************************************************************************
* CTTSEngObj::Speak *
*-------------------*
*   Description:
*       This is the primary method that SAPI calls to render text.
*-----------------------------------------------------------------------------
*   Input Parameters
*
*   pUser
*       Pointer to the current user profile object. This object contains
*       information like what languages are being used and this object
*       also gives access to resources like the SAPI master lexicon object.
*
*   dwSpeakFlags
*       This is a set of flags used to control the behavior of the
*       SAPI voice object and the associated engine.
*
*   VoiceFmtIndex
*       Zero based index specifying the output format that should
*       be used during rendering.
*
*   pTextFragList
*       A linked list of text fragments to be rendered. There is
*       one fragement per XML state change. If the input text does
*       not contain any XML markup, there will only be a single fragment.
*
*   pOutputSite
*       The interface back to SAPI where all output audio samples and events are written.
*
*   Return Values
*       S_OK - This should be returned after successful rendering or if
*              rendering was interrupted because *pfContinue changed to FALSE.
*       E_INVALIDARG 
*       E_OUTOFMEMORY
*
*****************************************************************************/
STDMETHODIMP CTTSEngObj::Speak( DWORD dwSpeakFlags,
                                REFGUID rguidFormatId,
                                const WAVEFORMATEX * pWaveFormatEx,
                                const SPVTEXTFRAG* pTextFragList,
                                ISpTTSEngineSite* pOutputSite )
{

    HRESULT hr = S_OK;

    //--- Check args
    if( SP_IS_BAD_INTERFACE_PTR( pOutputSite ) ||
        SP_IS_BAD_READ_PTR( pTextFragList )  )
    {
        hr = E_INVALIDARG;
        return hr;
    }

    if (!m_pTTS)
       return E_FAIL;

    // combine all the text into one string
    // BUGBUG - ignoring dwSPeakFlags -doing SPF_NLP_SPEAK_PUNC

    const SPVTEXTFRAG* pTextFragListCur;
    CMem memSpeakMML, memScratch;
    MemZero (&memSpeakMML);
    PWSTR pszScratch;

    CListVariable lPrepend, lAppend;
    WCHAR szTemp[128];
    PWSTR psz;
    DWORD i;

    for (pTextFragListCur = pTextFragList; pTextFragListCur; pTextFragListCur = pTextFragListCur->pNext) {
       switch (pTextFragListCur->State.eAction) {
          case SPVA_SpellOut: // BUGBUG - for now have spell-out speak
          case SPVA_Speak:
             {
                // add MML flags for speaking speed, etc.
                lPrepend.Clear();
                lAppend.Clear();

                if (pTextFragListCur->State.EmphAdj) {
                   psz = L"<emphasis level=strong>";
                   lPrepend.Add (psz, (wcslen(psz)+1)*sizeof(WCHAR));
                   psz = L"</emphasis>";
                   lAppend.Add (psz, (wcslen(psz)+1)*sizeof(WCHAR));
                }
                if (pTextFragListCur->State.RateAdj || (pTextFragListCur->State.Volume != 100) ||
                   pTextFragListCur->State.PitchAdj.MiddleAdj /* || pTextFragListCur->State.PitchAdj.RangeAdj */) {

                      wcscpy_s (szTemp, sizeof(szTemp)/sizeof(WCHAR), L"<prosody");

                      // rate
                      if (pTextFragListCur->State.RateAdj)
                         swprintf_s (szTemp + wcslen(szTemp), sizeof(szTemp)/sizeof(WCHAR)/4,
                           L" rate=%d%%",
                           (int)(pow(3.0, (fp)pTextFragListCur->State.RateAdj / 10.0) * 100.0)
                           );

                      // volume
                      if (pTextFragListCur->State.Volume != 100)
                         swprintf_s (szTemp + wcslen(szTemp), sizeof(szTemp)/sizeof(WCHAR)/4,
                           L" volume=%d%%",
                           (int)pTextFragListCur->State.Volume
                           );

                      // pitch
                      if (pTextFragListCur->State.PitchAdj.MiddleAdj)
                         swprintf_s (szTemp + wcslen(szTemp), sizeof(szTemp)/sizeof(WCHAR)/4,
                           L" pitch=%d%%",
                           (int)(pow(4.0 / 3.0, (fp)pTextFragListCur->State.PitchAdj.MiddleAdj / 10.0) * 100.0)
                           );

                      // range
                      // BUGBUG - Not doing range because marked as "reserved for future use"
                      //if (pTextFragListCur->State.PitchAdj.RangeAdj)
                      //  swprintf_s (szTemp + wcslen(szTemp), sizeof(szTemp),
                      //     L" range=%d%%",
                      //     (int)(pow(4.0 / 3.0, (fp)pTextFragListCur->State.PitchAdj.RangeAdj / 10.0) * 100.0)
                      //     );

                      // finish it up
                      wcscat_s (szTemp, sizeof(szTemp)/sizeof(WCHAR), L">");
                      lPrepend.Add (szTemp, (wcslen(szTemp)+1)*sizeof(WCHAR));
                      psz = L"</prosody>";
                      lAppend.Add (psz, (wcslen(psz)+1)*sizeof(WCHAR));
                }
                // BUGBUG - ignoring pPhoneIds
                // BUGBUG - ignoring ePartOfSpeech
                // BUGBUG - ignoring context

                // prepend the settings
                for (i = 0; i < lPrepend.Num(); i++)
                   MemCat (&memSpeakMML, (PWSTR) lPrepend.Get(i));


                // repeat
                DWORD dwCur = 0;
                while (dwCur < pTextFragListCur->ulTextLen) {
                   // what type is this
                   BOOL fIsWhite = iswspace (pTextFragListCur->pTextStart[dwCur]) ? TRUE : FALSE;

                   // find end of this
                   DWORD dwEnd;
                   for (dwEnd = dwCur+1; dwEnd < pTextFragListCur->ulTextLen; dwEnd++) {
                      BOOL fIsThisWhite = iswspace (pTextFragListCur->pTextStart[dwEnd]) ? TRUE : FALSE;
                      if (fIsThisWhite != fIsWhite)
                         break;  // done
                   } //dwEnd

                   // enoug memory to have this NULL-terminated
                   if (!memScratch.Required ((dwEnd - dwCur + 1) * sizeof(WCHAR)) )
                      return E_FAIL;
                   pszScratch = (PWSTR)memScratch.p;
                   memcpy (pszScratch, pTextFragListCur->pTextStart + dwCur, (dwEnd - dwCur) * sizeof(WCHAR));
                   pszScratch[dwEnd - dwCur] = 0;

                   // if this isn't whitespace, then append a bookmark so know it's a word
                   if (!fIsWhite) {
                      MemCat (&memSpeakMML, L"<bookmark mark=\"word:");
                      MemCat (&memSpeakMML, (int)(pTextFragListCur->ulTextSrcOffset + dwCur));
                      MemCat (&memSpeakMML, L"-");
                      MemCat (&memSpeakMML, (int)(pTextFragListCur->ulTextSrcOffset + dwEnd));
                      MemCat (&memSpeakMML, L"\"/>");
                   }

                   // append, but sanitize so is OK for MML
                   MemCatSanitize (&memSpeakMML, pszScratch);

                   // next
                   dwCur = dwEnd;
                } // dwCur

                // append the settings
                for (i = lAppend.Num()-1; i < lAppend.Num(); i++)
                   MemCat (&memSpeakMML, (PWSTR) lAppend.Get(i));

             }
             break;

          case SPVA_Silence:
             // do silence
             MemCat (&memSpeakMML, L"<break time=");
             MemCat (&memSpeakMML, (int)pTextFragListCur->State.SilenceMSecs);
             MemCat (&memSpeakMML, L"ms/>");
             break;

          case SPVA_Pronounce:
             // BUGBUG - do pronounce
             break;

          case SPVA_Bookmark:
             // enoug memory to have this NULL-terminated
             if (!memScratch.Required ((pTextFragListCur->ulTextLen + 1) * sizeof(WCHAR)) )
                return E_FAIL;
             pszScratch = (PWSTR)memScratch.p;
             memcpy (pszScratch, pTextFragListCur->pTextStart, pTextFragListCur->ulTextLen * sizeof(WCHAR));
             pszScratch[pTextFragListCur->ulTextLen] = 0;

             MemCat (&memSpeakMML, L"<bookmark mark=\"sapimark:");
             MemCatSanitize (&memSpeakMML, pszScratch);
             MemCat (&memSpeakMML, L"\"/>");
             break;

          default:
          case SPVA_Section:
          case SPVA_ParseUnknownTag:
             // dont really know what to do with these, so do nothing
             break;
       } // switch
    } // pTextFragListCur

    m_fTTSSpokeSomething = FALSE;
    m_pOutputSite = pOutputSite;
    m_ullAudioOff = 0;
    m_fTempSpeed = 1.0;
    m_fTempVolume = 1.0;

    DWORD dwSamplesPerSec = 22050;  // NOTE: Hard coded
    if (pWaveFormatEx)
       dwSamplesPerSec = pWaveFormatEx->nSamplesPerSec;
    BOOL fRet = m_pTTS->SynthGenWave  (NULL, dwSamplesPerSec, (PWSTR)memSpeakMML.p,
      TRUE, 1 /* default TTS quality */, FALSE, NULL, NULL, this);

    if (!fRet && !m_fTTSSpokeSomething)
       return E_FAIL;

#if 0 // old code
        //--- Init some vars
        m_pCurrFrag   = pTextFragList;
        m_pNextChar   = m_pCurrFrag->pTextStart;
        m_pEndChar    = m_pNextChar + m_pCurrFrag->ulTextLen;
        m_ullAudioOff = 0;

        //--- Parse
        //    We've supplied a simple word/sentence breaker just to show one
        //    way of walking the fragment list. It obviously doesn't deal with
        //    things like abreviations and expansion of numbers and dates.
        CItemList ItemList;

        while( SUCCEEDED( hr ) && !(pOutputSite->GetActions() & SPVES_ABORT) )
        {
            //--- Do skip?
            if( pOutputSite->GetActions() & SPVES_SKIP )
            {
                long lSkipCnt;
                SPVSKIPTYPE eType;
                hr = pOutputSite->GetSkipInfo( &eType, &lSkipCnt );
                if( SUCCEEDED( hr ) )
                {
                    //--- Notify SAPI how many items we skipped. We're returning zero
                    //    because this feature isn't implemented.
                    hr = pOutputSite->CompleteSkip( 0 );
                }
            }

            //--- Build the text item list
            if( SUCCEEDED( hr ) && (hr = GetNextSentence( ItemList )) != S_OK )
            {
                break;                
            }

            //--- We aren't going to do any part of speech determination,
            //    prosody, or pronunciation determination. If you were, one thing
            //    you will need is access to the SAPI lexicon. You can get that with
            //    the following call.
            //    CComPtr<ISpLexicon> cpLexicon;
            //    hr = pUser->GetLexicon( &cpLexicon );

            if( !(pOutputSite->GetActions() & SPVES_ABORT) )
            {
                //--- Fire begin sentence event
                CSentItem& FirstItem = ItemList.GetHead();
                CSentItem& LastItem  = ItemList.GetTail();
                CSpEvent Event;
                Event.eEventId             = SPEI_SENTENCE_BOUNDARY;
                Event.elParamType          = SPET_LPARAM_IS_UNDEFINED;
                Event.ullAudioStreamOffset = m_ullAudioOff;
                Event.lParam               = (LPARAM)FirstItem.ulItemSrcOffset;
                Event.wParam               = (WPARAM)LastItem.ulItemSrcOffset +
                                                     LastItem.ulItemSrcLen -
                                                     FirstItem.ulItemSrcOffset;
                hr = pOutputSite->AddEvents( &Event, 1 );

                //--- Output
                if( SUCCEEDED( hr ) )
                {
                    hr = OutputSentence( ItemList, pOutputSite );
                }
            }
        }

        //--- S_FALSE just says that we hit the end, return okay
        if( hr == S_FALSE )
        {
            hr = S_OK;
        }
#endif

    return hr;
} /* CTTSEngObj::Speak */

#if 0 // dead code
/*****************************************************************************
* CTTSEngObj::OutputSentence *
*----------------------------*
*   This method is used to output an item list.
****************************************************************************/
HRESULT CTTSEngObj::OutputSentence( CItemList& ItemList, ISpTTSEngineSite* pOutputSite )
{
    HRESULT hr = S_OK;
    ULONG WordIndex;

    //--- Lookup words in our voice
    SPLISTPOS ListPos = ItemList.GetHeadPosition();
    while( ListPos && !(pOutputSite->GetActions() & SPVES_ABORT) )
    {
        CSentItem& Item = ItemList.GetNext( ListPos );

        //--- Process sentence items
        switch( Item.pXmlState->eAction )
        {
          //--- Speak some text ---------------------------------------
          case SPVA_Speak:
          {
            //--- We don't say anything for punctuation or control characters
            //    in this sample. 
            if( iswalpha( Item.pItem[0] ) || iswdigit( Item.pItem[0] ) )
            {
                //--- Lookup the word, if we can't find it just use the first one
                for( WordIndex = 0; WordIndex < m_ulNumWords; ++WordIndex )
                {
                    if( ( m_pWordList[WordIndex].ulTextLen == Item.ulItemLen ) &&
                        ( !_wcsnicmp( m_pWordList[WordIndex].pText, Item.pItem, Item.ulItemLen )) )
                    {
                        break;
                    }
                }
                if( WordIndex == m_ulNumWords )
                {
                    WordIndex = 0;
                }

                //--- Queue the event
                CSpEvent Event;
                Event.eEventId             = SPEI_WORD_BOUNDARY;
                Event.elParamType          = SPET_LPARAM_IS_UNDEFINED;
                Event.ullAudioStreamOffset = m_ullAudioOff;
                Event.lParam               = Item.ulItemSrcOffset;
                Event.wParam               = Item.ulItemSrcLen;
                pOutputSite->AddEvents( &Event, 1 );

                //--- Queue the audio data
                hr = pOutputSite->Write( m_pWordList[WordIndex].pAudio,
                                         m_pWordList[WordIndex].ulNumAudioBytes,
                                         NULL );

                //--- Update the audio offset
                m_ullAudioOff += m_pWordList[WordIndex].ulNumAudioBytes;
            }
          }
          break;

          //--- Output some silence for a pause -----------------------
          case SPVA_Silence:
          {
            BYTE Buff[1000];
            memset( Buff, 0, 1000 );
            ULONG NumSilenceBytes = Item.pXmlState->SilenceMSecs * 22;

            //--- Queue the audio data in chunks so that we can get
            //    interrupted if necessary.
            while( !(pOutputSite->GetActions() & SPVES_ABORT) )
            {
                if( NumSilenceBytes > 1000 )
                {
                    hr = pOutputSite->Write( Buff, 1000, NULL );
                    NumSilenceBytes -= 1000;
                }
                else
                {
                    hr = pOutputSite->Write( Buff, NumSilenceBytes, NULL );
                    break;
                }
            }

            //--- Update the audio offset
            m_ullAudioOff += NumSilenceBytes;
          }
          break;

          //--- Fire a bookmark event ---------------------------------
          case SPVA_Bookmark:
          {
            //--- The bookmark is NOT a null terminated string in the Item, but we need
            //--- to convert it to one.  Allocate enough space for the string.
            WCHAR * pszBookmark = (WCHAR *)_malloca((Item.ulItemLen + 1) * sizeof(WCHAR));
            memcpy(pszBookmark, Item.pItem, Item.ulItemLen * sizeof(WCHAR));
            pszBookmark[Item.ulItemLen] = 0;
            //--- Queue the event
            SPEVENT Event;
            Event.eEventId             = SPEI_TTS_BOOKMARK;
            Event.elParamType          = SPET_LPARAM_IS_STRING;
            Event.ullAudioStreamOffset = m_ullAudioOff;
            Event.lParam               = (LPARAM)pszBookmark;
            Event.wParam               = _wtol(pszBookmark);
            hr = pOutputSite->AddEvents( &Event, 1 );
            //--- Free the space for the string.
            _freea(pszBookmark);
          }
          break;

          case SPVA_Pronounce:
            //--- Our sample engine doesn't handle this. If it
            //    did, you would use the associated pronunciation in
            //    the XmlState structure instead of the lexicon.
            break;

          case SPVA_ParseUnknownTag:
            //--- This will reference an XML tag that is unknown to SAPI
            //    if your engine has private tags to control state, you
            //    would examine these tags and see if you recognize it. This
            //    would also be the point that you would make the rendering
            //    state change.
            break;
        }
    }

    return hr;
} /* CTTSEngObj::OutputSentence */
#endif  // 0

/*****************************************************************************
* CTTSEngObj::GetVoiceFormat *
*----------------------------*
*   Description:
*       This method returns the output data format associated with the
*   specified format Index. Formats are in order of quality with the best
*   starting at 0.
*****************************************************************************/
STDMETHODIMP CTTSEngObj::GetOutputFormat( const GUID * pTargetFormatId, const WAVEFORMATEX * pTargetWaveFormatEx,
                                          GUID * pDesiredFormatId, WAVEFORMATEX ** ppCoMemDesiredWaveFormatEx )
{

    if(pDesiredFormatId==NULL || ppCoMemDesiredWaveFormatEx==NULL)
    {
        return E_INVALIDARG;
    }


    HRESULT hr = S_OK;

    DWORD dwSamplesPerSec = 22050;   // NOTE: Hard coded. doesn't matter
    if (pTargetWaveFormatEx)
       dwSamplesPerSec = pTargetWaveFormatEx->nSamplesPerSec;
         // since do a better job of up/down sampling than SAPI

     WAVEFORMATEX * pwfex = (WAVEFORMATEX *)::CoTaskMemAlloc(sizeof(WAVEFORMATEX));
     *ppCoMemDesiredWaveFormatEx = pwfex;
     if (pwfex)
     {
         BOOL bIsStereo = FALSE;
         BOOL bIs16 = TRUE;
         pwfex->wFormatTag = WAVE_FORMAT_PCM;
         pwfex->nChannels = pwfex->nBlockAlign = (WORD)(bIsStereo ? 2 : 1);
         pwfex->nSamplesPerSec = dwSamplesPerSec;
         pwfex->wBitsPerSample = 8;
         if (bIs16)
         {
             pwfex->wBitsPerSample *= 2;
             pwfex->nBlockAlign *= 2;
         }
          pwfex->nAvgBytesPerSec = pwfex->nSamplesPerSec * pwfex->nBlockAlign;
         pwfex->cbSize = 0;
         *pDesiredFormatId = SPDFID_WaveFormatEx;
     }
     else
     {
         hr = E_OUTOFMEMORY;
     }
     // hr = SpConvertStreamFormatEnum(SPSF_11kHz16BitMono, pDesiredFormatId, ppCoMemDesiredWaveFormatEx);

    return hr;
} /* CTTSEngObj::GetVoiceFormat */

//
//=== This code is just a simplified parser ==================================
//
/*****************************************************************************
* CTTSEngObj::GetNextSentence *
*-----------------------------*
*   This method is used to create a list of items to be spoken.
****************************************************************************/
#if 0 // not used
HRESULT CTTSEngObj::GetNextSentence( CItemList& ItemList )
{
    HRESULT hr = S_OK;

    //--- Clear the destination
    ItemList.RemoveAll();

    //--- Is there any work to do
    if( m_pCurrFrag == NULL )
    {
        hr = S_FALSE;
    }
    else
    {
        BOOL fSentDone = false;
        BOOL fGoToNextFrag = false;

        while( m_pCurrFrag && !fSentDone )
        {
            if( m_pCurrFrag->State.eAction == SPVA_Speak )
            {
                fSentDone = AddNextSentItem( ItemList );

                //--- Advance fragment?
                if( m_pNextChar >= m_pEndChar )
                {
                    fGoToNextFrag = true;
                }
            }
            else
            {
                //--- Add non spoken fragments
                CSentItem Item;
                Item.pItem           = m_pCurrFrag->pTextStart;
                Item.ulItemLen       = m_pCurrFrag->ulTextLen;
                Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset;
                Item.ulItemSrcLen    = Item.ulItemLen;
                Item.pXmlState       = &m_pCurrFrag->State;
                ItemList.AddTail( Item );
                fGoToNextFrag = true;
            }

            if( fGoToNextFrag )
            {
                fGoToNextFrag = false;
                m_pCurrFrag = m_pCurrFrag->pNext;
                if( m_pCurrFrag )
                {
                    m_pNextChar = m_pCurrFrag->pTextStart;
                    m_pEndChar  = m_pNextChar + m_pCurrFrag->ulTextLen;
                }
                else
                {
                    m_pNextChar = NULL;
                    m_pEndChar  = NULL;
                }
            }
        } // end while

        if( ItemList.IsEmpty() )
        {
            hr = S_FALSE;
        }
    }
    return hr;
} /* CTTSEngObj::GetNextSentence */

#endif // 0

/*****************************************************************************
* IsSpace *
*---------*
*   Returns true if the character is a space, tab, carriage return, or line feed.
****************************************************************************/
#if 0 // not used
static BOOL IsSpace( WCHAR wc )
{
    return ( ( wc == 0x20 ) || ( wc == 0x9 ) || ( wc == 0xD  ) || ( wc == 0xA ) );
}
#endif // 0

/*****************************************************************************
* SkipWhiteSpace *
*----------------*
*   Returns the position of the next non-whitespace character.
****************************************************************************/
#if 0 // not used
static const WCHAR* SkipWhiteSpace( const WCHAR* pPos )
{
    while( IsSpace( *pPos ) ) ++pPos;
    return pPos;
}
#endif // 0

/*****************************************************************************
* FindNextToken *
*---------------*
*   Locates the next space delimited token in the stream
****************************************************************************/
#if 0 // not used
static const WCHAR* 
    FindNextToken( const WCHAR* pStart, const WCHAR* pEnd, const WCHAR*& pNext )
{
    const WCHAR* pPos = SkipWhiteSpace( pStart );
    pNext = pPos;
    if( pNext == pEnd )
    {
        pPos = NULL;
    }
    else
    {
        while( *pNext && !IsSpace( *pNext ) )
        {
            if( ++pNext == pEnd )
            {
                //--- This can happen when a text fragment is
                //    tight up against a following xml tag.
                break;
            }
        }
    }
    return pPos;
} /* FindNextToken */
#endif // 0

/*****************************************************************************
* SearchSet *
*-----------*
*   Finds the character in the specified array.
****************************************************************************/
#if 0 // not used
BOOL SearchSet( WCHAR wc, const WCHAR* Set, ULONG Count, ULONG* pIndex )
{
    for( ULONG i = 0; i < Count; ++i )
    {
        if( wc == Set[i] )
        {
            *pIndex = i;
            return true;
        }
    }
    return false;
}
#endif // 0

/*****************************************************************************
* CTTSEngObj::AddNextSentItem *
*-----------------------------*
*   Locates the next sentence item in the stream and adds it to the list.
*   Returns true if the last item added is the end of the sentence.
****************************************************************************/
#if 0 // not used
BOOL CTTSEngObj::AddNextSentItem( CItemList& ItemList )
{
    //--- Get the token
    ULONG ulIndex;
    CSentItem Item;
    Item.pItem = FindNextToken( m_pNextChar, m_pEndChar, m_pNextChar );

    //--- This case can occur when we hit the end of a text fragment.
    //    Returning at this point will cause advancement to the next fragment.
    if( Item.pItem == NULL )
    {
        return false;
    }

    const WCHAR* pTrailChar = m_pNextChar-1;
    ULONG TokenLen = (ULONG)(m_pNextChar - Item.pItem);

    //--- Split off leading punction if any
    static const WCHAR LeadItems[] = { L'(', L'\"', L'{', L'\'', L'[' };
    while( TokenLen > 1 )
    {
        if( SearchSet( Item.pItem[0], LeadItems, sp_countof(LeadItems), &ulIndex ) )
        {
            CSentItem LItem;
            LItem.pItem           = Item.pItem;
            LItem.ulItemLen       = 1;
            LItem.pXmlState       = &m_pCurrFrag->State;
            LItem.ulItemSrcLen    = LItem.ulItemLen;
            LItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
                                    (ULONG)( LItem.pItem - m_pCurrFrag->pTextStart );
            ItemList.AddTail( LItem );
            ++Item.pItem;
            --TokenLen;
        }
        else
        {
            break;
        }
    }

    //--- Get primary item insert position
    SPLISTPOS ItemPos = ItemList.AddTail( Item );

    //--- Split off trailing punction if any.
    static const WCHAR EOSItems[] = { L'.', L'!', L'?' };
    static const WCHAR TrailItems[] = { L',', L'\"', L';', L':', L')', L'}', L'\'', L']' };
    BOOL fIsEOS = false;
    while( TokenLen > 1 )
    {
        BOOL fAddTrailItem = false;
        if( SearchSet( *pTrailChar, EOSItems, sp_countof(EOSItems), &ulIndex ) )
        {
            fIsEOS = true;
            fAddTrailItem = true;
        }
        else if( SearchSet( *pTrailChar, TrailItems, sp_countof(TrailItems), &ulIndex ) )
        {
            fAddTrailItem = true;
        }

        if( fAddTrailItem )
        {
            CSentItem TItem;
            TItem.pItem           = pTrailChar;
            TItem.ulItemLen       = 1;
            TItem.pXmlState       = &m_pCurrFrag->State;
            TItem.ulItemSrcLen    = TItem.ulItemLen;
            TItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
                                    (ULONG)( TItem.pItem - m_pCurrFrag->pTextStart );
            ItemList.InsertAfter( ItemPos, TItem );
            --TokenLen;
            --pTrailChar;
        }
        else
        {
            break;
        }
    }

    //--- Abreviation or sentence end?
    //    If we are at the end of the buffer then EOS is implied.
    if( *m_pNextChar == NULL )
    {
        fIsEOS = true;
        if( !SearchSet( *(m_pNextChar-1), EOSItems, sp_countof(EOSItems), &ulIndex ) )
        {
            //--- Terminate with a period if we are at the end of a buffer
            //    and no end of sentence punction has been added.
            static const WCHAR* pPeriod = L".";
            CSentItem EOSItem;
            EOSItem.pItem           = pPeriod;
            EOSItem.ulItemLen       = 1;
            EOSItem.pXmlState       = &m_pCurrFrag->State;
            EOSItem.ulItemSrcLen    = EOSItem.ulItemLen;
            EOSItem.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
                                    (ULONG)( (m_pNextChar-1) - m_pCurrFrag->pTextStart );
            ItemList.AddTail( EOSItem );
        }
    }
    else if( pTrailChar[1] == L'.' )
    {
        //--- Here is where you would try to prove that it's not EOS
        //    It might be an abreviation. That's a hard problem that
        //    we are not going to attempt here.
    }
    
    //--- Substitute underscore for apostrophe
    for( ULONG i = 0; i < TokenLen; ++i )
    {
        if( Item.pItem[i] == L'\'' )
        {
            ((WCHAR)Item.pItem[i]) = L'_';
        }
    }

    //--- Add the main item
    if( TokenLen > 0 )
    {
        Item.ulItemLen       = TokenLen;
        Item.pXmlState       = &m_pCurrFrag->State;
        Item.ulItemSrcLen    = Item.ulItemLen;
        Item.ulItemSrcOffset = m_pCurrFrag->ulTextSrcOffset +
                               (ULONG)( Item.pItem - m_pCurrFrag->pTextStart );
        ItemList.SetAt( ItemPos, Item );
    }

    return fIsEOS;
} /* CTTSEngObj::AddNextSentItem */
#endif // 0


/*********************************************************************************************
EnglishPhoneToSAPIPhone - Converts an english phone to a sapi phone nubmer.

BUGBUG - this only does english sapi

inputs
   PCWSTR         pszNonEnglishPhone - So can get stress from number. Can be NULL.
   DWORD          dwEnglishPhone - English phoneme
returns
   DWORD - SAPI phone. High bit set if stress.
*/
DWORD EnglishPhoneToSAPIPhone (PCWSTR pszNonEnglishPhone, DWORD dwEnglishPhone)
{
   PLEXENGLISHPHONE ple = MLexiconEnglishPhoneGet (dwEnglishPhone);
   if (!ple)
      return 7;   // unknown, so pretend it's silence

   DWORD dwPhone = ple->dwSAPIPhone;

   if (pszNonEnglishPhone)
      for (; pszNonEnglishPhone[0]; pszNonEnglishPhone++)
         if ((pszNonEnglishPhone[0] >= L'1') && (pszNonEnglishPhone[0] <= L'9')) {
            dwPhone |= 0x80000000;
            break;
         }

   return dwPhone;
}


/*********************************************************************************************
EnglishPhoneToSAPIViseme - Converts an english phone to a sapi phone nubmer.

BUGBUG - this only does english sapi

inputs
   PCWSTR         pszNonEnglishPhone - So can get stress from number. Can be NULL.
   DWORD          dwEnglishPhone - English phoneme
returns
   DWORD - SAPI viseme. High bit set if stress.
*/
DWORD EnglishPhoneToSAPIViseme (PCWSTR pszNonEnglishPhone, DWORD dwEnglishPhone)
{
   PLEXENGLISHPHONE ple = MLexiconEnglishPhoneGet (dwEnglishPhone);
   if (!ple)
      return 0;   // unknown, so pretend it's silence

   DWORD dwPhone = ple->dwSAPIViseme;

   if (pszNonEnglishPhone)
      for (; pszNonEnglishPhone[0]; pszNonEnglishPhone++)
         if ((pszNonEnglishPhone[0] >= L'1') && (pszNonEnglishPhone[0] <= L'9')) {
            dwPhone |= 0x80000000;
            break;
         }

   return dwPhone;
}

static int __cdecl SPEVENTCompare (const void *p1, const void *p2)
{
   SPEVENT *pp1 = (SPEVENT*) p1;
   SPEVENT *pp2 = (SPEVENT*) p2;
   int iRet = (int) ((__int64)pp1->ullAudioStreamOffset - (__int64)pp2->ullAudioStreamOffset);

   return iRet;
}


/*********************************************************************************************
CTTSEngObj::TTSSpeedVolume - Standard API
*/
BOOL CTTSEngObj::TTSSpeedVolume (fp *pafSpeed, fp *pafVolume)
{
   HRESULT hr;

   // NOTE that spoke something
   m_fTTSSpokeSomething = TRUE;

   // see if wants to abort
   DWORD dwActions = m_pOutputSite->GetActions();
   if (dwActions & SPVES_ABORT)
      return FALSE;

   // handle skip
   if (dwActions & SPVES_SKIP) {
      long lSkipCnt;
      SPVSKIPTYPE eType;
      hr = m_pOutputSite->GetSkipInfo( &eType, &lSkipCnt );
      if( SUCCEEDED( hr ) )
      {
         //--- Notify SAPI how many items we skipped. We're returning zero
         //    because this feature isn't implemented.
         // BUGBUG - probably won't pass test because of this, but ok hack for now.
         // Eventually deal with skip properly
         hr = m_pOutputSite->CompleteSkip( 0 );
         return FALSE;
      }
   }

   if (dwActions & SPVES_RATE) {
      long lRateAdjust;
      hr = m_pOutputSite->GetRate (&lRateAdjust);
      if (SUCCEEDED(hr))
         m_fTempSpeed = pow(3.0, (fp)lRateAdjust / 10.0);
   }

   if (dwActions & SPVES_VOLUME) {
      USHORT dwVolume;
      hr = m_pOutputSite->GetVolume (&dwVolume);
      if (SUCCEEDED(hr))
         m_fTempVolume = (fp)dwVolume / 100;
   }

   *pafSpeed = m_fTempSpeed;
   *pafVolume = m_fTempVolume;

   return TRUE;
}

/*********************************************************************************************
CTTSEngObj::TTSWaveData - Standard API
*/
BOOL CTTSEngObj::TTSWaveData (PCM3DWave pWave)
{
   HRESULT hr;

   // NOTE that spoke something
   m_fTTSSpokeSomething = TRUE;

   // see if wants to abort
   DWORD dwActions = m_pOutputSite->GetActions();
   if (dwActions & SPVES_ABORT)
      return FALSE;

   // handle skip
   if (dwActions & SPVES_SKIP) {
      long lSkipCnt;
      SPVSKIPTYPE eType;
      hr = m_pOutputSite->GetSkipInfo( &eType, &lSkipCnt );
      if( SUCCEEDED( hr ) )
      {
         //--- Notify SAPI how many items we skipped. We're returning zero
         //    because this feature isn't implemented.
         // BUGBUG - probably won't pass test because of this, but ok hack for now.
         // Eventually deal with skip properly
         hr = m_pOutputSite->CompleteSkip( 0 );
         return FALSE;
      }
   }

   // figure out the events
   CListFixed lSPEVENT;
   lSPEVENT.Init (sizeof(SPEVENT));
   ULONGLONG ulEvents = 0;
   hr = m_pOutputSite->GetEventInterest (&ulEvents);
   DWORD i;
   DWORD *padwIndex;
   PWVPHONEME pwp, pwpNext;
   SPEVENT spe;
   PWSTR pszMark, pszDash;
   DWORD dwSampleNext, dwDuration, dwPhoneThis, dwPhoneNext;
   DWORD dwWordStart, dwWordEnd, dwFindLen;
   PWSTR pszFind;
   if (SUCCEEDED(hr)) {
      if (ulEvents & SPEI_TTS_BOOKMARK) {
         memset (&spe, 0, sizeof(spe));
         spe.eEventId = SPEI_TTS_BOOKMARK;
         spe.elParamType = SPET_LPARAM_IS_STRING;

         padwIndex = (DWORD*)pWave->m_lBookmarkIndex.Get(0);
         pszFind = L"sapimark:";
         dwFindLen = (DWORD)wcslen(pszFind);
         for (i = 0; i < pWave->m_lBookmarkString.Num(); i++) {
            pszMark = (PWSTR) pWave->m_lBookmarkString.Get(i);
            if (!pszMark || _wcsnicmp(pszMark, pszFind, dwFindLen))
               continue;   // not the right type of mark
            pszMark += dwFindLen;

            // else, found a bookmark for a word
            spe.wParam = _wtol(pszMark);
            spe.lParam = (LPARAM) pszMark;
            spe.ullAudioStreamOffset = m_ullAudioOff + padwIndex[i] * sizeof(short);

            lSPEVENT.Add (&spe);
         } // i
      } // bookmark

      if (ulEvents & SPEI_WORD_BOUNDARY) {
         memset (&spe, 0, sizeof(spe));
         spe.eEventId = SPEI_WORD_BOUNDARY;
         spe.elParamType = SPET_LPARAM_IS_UNDEFINED;

         padwIndex = (DWORD*)pWave->m_lBookmarkIndex.Get(0);
         pszFind = L"word:";
         dwFindLen = (DWORD)wcslen(pszFind);
         for (i = 0; i < pWave->m_lBookmarkString.Num(); i++) {
            pszMark = (PWSTR) pWave->m_lBookmarkString.Get(i);
            if (!pszMark || _wcsnicmp(pszMark, pszFind, dwFindLen))
               continue;   // not the right type of mark
            pszMark += dwFindLen;
            pszDash = wcschr (pszMark, L'-');
            if (!pszDash)
               continue;   // shouldnt happen
            pszDash++;

            // start and end
            dwWordStart = _wtoi(pszMark);
            dwWordEnd = _wtoi(pszDash);
            dwWordEnd = max(dwWordEnd, dwWordStart);

            // else, found a bookmark for a word
            spe.wParam = dwWordEnd - dwWordStart;
            spe.lParam = dwWordStart;
            spe.ullAudioStreamOffset = m_ullAudioOff + padwIndex[i] * sizeof(short);

            lSPEVENT.Add (&spe);
         } // i
      } // word boundary

      if (ulEvents & SPEI_SENTENCE_BOUNDARY) {
         memset (&spe, 0, sizeof(spe));
         spe.eEventId = SPEI_SENTENCE_BOUNDARY;
         spe.elParamType = SPET_LPARAM_IS_UNDEFINED;

         padwIndex = (DWORD*)pWave->m_lBookmarkIndex.Get(0);
         pszFind = L"sentence:";
               // "sentence:" is automatically created by TTS engine from
               // looking at "word:" bookmarks
         dwFindLen = (DWORD)wcslen(pszFind);
         for (i = 0; i < pWave->m_lBookmarkString.Num(); i++) {
            pszMark = (PWSTR) pWave->m_lBookmarkString.Get(i);
            if (!pszMark || _wcsnicmp(pszMark, pszFind, dwFindLen))
               continue;   // not the right type of mark
            pszMark += dwFindLen;
            pszDash = wcschr (pszMark, L'-');
            if (!pszDash)
               continue;   // shouldnt happen
            pszDash++;

            // start and end
            dwWordStart = _wtoi(pszMark);
            dwWordEnd = _wtoi(pszDash);
            dwWordEnd = max(dwWordEnd, dwWordStart);

            // else, found a bookmark for a word
            spe.wParam = dwWordEnd - dwWordStart;
            spe.lParam = dwWordStart;
            spe.ullAudioStreamOffset = m_ullAudioOff + padwIndex[i] * sizeof(short);

            lSPEVENT.Add (&spe);
         } // i
      } // sentence boundary

      if (ulEvents & SPEI_PHONEME) {
         pwp = (PWVPHONEME) pWave->m_lWVPHONEME.Get(0);
         memset (&spe, 0, sizeof(spe));
         spe.eEventId = SPEI_PHONEME;
         spe.elParamType = SPET_LPARAM_IS_UNDEFINED;
         for (i = 0; i < pWave->m_lWVPHONEME.Num(); i++, pwp++) {
            pwpNext = ((i+1) < pWave->m_lWVPHONEME.Num()) ? (pwp+1) : NULL;
            dwSampleNext = pwpNext ? pwpNext->dwSample : pWave->m_dwSamples;
            dwSampleNext = max(pwp->dwSample, dwSampleNext);
            dwDuration = (dwSampleNext - pwp->dwSample) * 1000 / pWave->m_dwSamplesPerSec;
            dwPhoneThis = EnglishPhoneToSAPIPhone (pwp->awcNameLong, pwp->dwEnglishPhone);
            dwPhoneNext = pwpNext ? EnglishPhoneToSAPIPhone (pwpNext->awcNameLong, pwpNext->dwEnglishPhone) : 7; /* 7 = silence */

            spe.wParam = MAKELONG ((WORD)dwPhoneNext, (WORD)dwDuration);
            spe.lParam = MAKELONG ((WORD)dwPhoneThis, ((dwPhoneThis & 0x80000000) ? SPVFEATURE_STRESSED : 0));
            spe.ullAudioStreamOffset = m_ullAudioOff + pwp->dwSample * sizeof(short);

            lSPEVENT.Add (&spe);
         } // i
      }

      if (ulEvents & SPEI_VISEME) {
         pwp = (PWVPHONEME) pWave->m_lWVPHONEME.Get(0);
         memset (&spe, 0, sizeof(spe));
         spe.eEventId = SPEI_VISEME;
         spe.elParamType = SPET_LPARAM_IS_UNDEFINED;
         for (i = 0; i < pWave->m_lWVPHONEME.Num(); i++, pwp++) {
            pwpNext = ((i+1) < pWave->m_lWVPHONEME.Num()) ? (pwp+1) : NULL;
            dwSampleNext = pwpNext ? pwpNext->dwSample : pWave->m_dwSamples;
            dwSampleNext = max(pwp->dwSample, dwSampleNext);
            dwDuration = (dwSampleNext - pwp->dwSample) * 1000 / pWave->m_dwSamplesPerSec;
            dwPhoneThis = EnglishPhoneToSAPIViseme (pwp->awcNameLong, pwp->dwEnglishPhone);
            dwPhoneNext = pwpNext ? EnglishPhoneToSAPIViseme (pwpNext->awcNameLong, pwpNext->dwEnglishPhone) : 0; /* 0 = silence */

            spe.wParam = MAKELONG ((WORD)dwPhoneNext, (WORD)dwDuration);
            spe.lParam = MAKELONG ((WORD)dwPhoneThis, ((dwPhoneThis & 0x80000000) ? SPVFEATURE_STRESSED : 0));
            spe.ullAudioStreamOffset = m_ullAudioOff + pwp->dwSample * sizeof(short);

            lSPEVENT.Add (&spe);
         } // i      
      } // if want visemes

      // sort events
      SPEVENT *pse = (SPEVENT*)lSPEVENT.Get(0);
      qsort (pse, lSPEVENT.Num(), sizeof(SPEVENT), SPEVENTCompare);

      // send this
      if (lSPEVENT.Num())
         m_pOutputSite->AddEvents (pse, lSPEVENT.Num());
   } // if SUCCEEDED(hr) for GetEventInterest

   hr = m_pOutputSite->Write (pWave->m_psWave, pWave->m_dwSamples * sizeof(short), NULL);

   m_ullAudioOff += pWave->m_dwSamples * sizeof(short);

   return SUCCEEDED(hr);
}

