2012年3月25日 星期日

TTS AutoIt - zqSpeak


20120824 Update:
Update binary and source code
Add background sound fade-out while zqSpeak TTS is speaking.

Please refer to Default Ducking Experience for detail.
This fade-out effect only works in Windows 7 (and above?).



使用AutoIt寫的
選取文字後按Ctrl-F12即可發音 TTS (Text-to-Speech, 文字轉語音)
會自動判斷日文或英文選擇不同聲音
聲音資料庫使用NeoSpeech Julie (英文), Misaki (日文)

You should install NeoSpeech Julie, Misaki before this script being to work.
You could ask google where to download them.

zqSpeak.exe - precompiled executable
Download zqSpeak.exe
Modify the script yourself to suit your own need...

Here is the video


Here is the code

#cs ----------------------------------------------------------------------------

 AutoIt Version: 3.3.6.1
 Author: zaqimon
 
 Use 32-bit only.

[20120824]
 Add "Default Ducking Experience", background sound fade out while zqSpeak TTS is speaking.

[20120325]
 Select the text then press Hotkey (Ctrl-F12) for TTS
 Hotkeys:
   Ctrl-F12: Speak/Pause/Resume
   Ctrl-Alt-F12: Stop
 
 Text-to-Speech (TTS) using Miscrosoft Speech API (SAPI) with NeoSpeech VW Julie (English) and VW Misaki (Japanese).
 Auto detect language - English and Japanese - and select correct TTS speaker accordingly.
 You may google NeoSpeech database and install it first by yourself.

#ce ----------------------------------------------------------------------------

#Include 
#Include 
#Include 

#Include 
#Include 


_Singleton("zaqimon_TTS_MS_SAPI") ; allow only 1 instance

; leave only [Exit] tray menu, remove default menu
Opt( "TrayAutoPause", 0 )
Opt( "TrayMenuMode", 1 )
Opt( "TrayOnEventMode", 1 )
Opt( "GUIOnEventMode", 1 )
;TraySetIcon("Shell32.dll",246) ; change default icon in AutoIt compiler will also change tray icon

Global $WaveBuffer1, $WaveBuffer2
Global $WAVEHDR1, $WAVEHDR2, $RegisterProc, $WaveInHandle, $WavehdrSize, $Recording
Global $DllWinmm, $HWMain
Const $MAIN_TITLE = "zqSpeak dummy window"

Const $MM_WIM_OPEN  = 0x3BE
Const $MM_WIM_CLOSE = 0x3BF
Const $MM_WIM_DATA  = 0x3C0

Const $SVSFlagsAsync = 1
Const $SVSFPurgeBeforeSpeak = 2
Const $SAFT48kHz16BitMono = 38
Const $SVEPhoneme = 64
; $oSpeech.Status.RunningState could be 0, it seems this is PAUSE state
Const $SRSEDone = 1
Const $SRSEIsSpeaking = 2

; dummy window for receiving WAVEIN messages
$HWMain = GUICreate($MAIN_TITLE)
GUIRegisterMsg($MM_WIM_OPEN, "MY_WM_WIM")
GUIRegisterMsg($MM_WIM_CLOSE, "MY_WM_WIM")
GUIRegisterMsg($MM_WIM_DATA, "MY_WM_WIM")

$exititem = TrayCreateItem( "Exit" )
TrayItemSetOnEvent( -1, "_bye" )
TraySetState()

; set global hotkey
HotKeySet("^{F12}", "SpeakIt") ; Play/Pause, Ctrl-F12
HotKeySet("^!{F12}", "StopIt") ; Stop, Ctrl-Alt-F12

$oSpeech = ObjCreate('SAPI.SpVoice') ; may need "regsvr32 c:\Windows\System32\Speech\Common\sapi.dll"
$oSpeechOut = ObjCreate('SAPI.SpMMAudioOut')
ObjEvent($oSpeech,"SpVoiceEvent_") ; register event, I want to monitor SpVoiceEvent_EndStream() to fade-in background sound.

If IsObj($oSpeech) = 0 Or IsObj($oSpeech) = 0 Then
    MsgBox(0x10,'Error','Fail to create SAPI object.' & @CRLF & 'Try "regsvr32 c:\Windows\System32\Speech\Common\sapi.dll" first')
    Exit 2
EndIf

$oSpeechOut.Format.Type = $SAFT48kHz16BitMono ; SAFT48kHz16BitMono, this sounds best on my computer

$oSpeech.AllowAudioOutputFormatChangesOnNextSet = False
$oSpeech.AudioOutputStream = $oSpeechOut
;$oSpeechOut.Volume = 10 ; this seems no effect
$oSpeech.Volume = 100
$oSpeech.AlertBoundary = $SVEPhoneme ; Pause/Resume no work if not set AlertBoundary

_Rec_Init()

While 1
    Sleep(100)
WEnd


;==========================================================

Func _bye()
    _Rec_Uninit()
    Exit
EndFunc

Func SpVoiceEvent_EndStream() ; receive event from SpVoice registered by ObjEvent
    ; Stop Record here. Allow multiple stop, because I call Resume & Speak nothing to mimic Stop, it triggers 2 EndStream event
    _Rec_Stop()
EndFunc

Func SpeakIt()
    If $oSpeech.Status.RunningState = $SRSEDone Then
        ; get text from clipboard
        ; pre-process Text
        ; check if Japanese character
        ; Speak
        
        Local $tts_text
        ClipPut("") ; clear clipboard first, or we may hear text currently in clipboard
        Send("^c") ; send Ctrl-C
        ;;; Send("^c") ; may need to send twice for successful Ctrl-C !?
        $tts_text = ClipGet()
        If StringLen($tts_text)=0 Or StringIsSpace($tts_text) Then Return ; if empty text, no need to speak
        PreprocessText($tts_text)
        ;MsgBox(0,"",$tts_text)
        If TextLanguage($tts_text) = 1 Then
            $oSpeech.Voice = $oSpeech.GetVoices("name = VW Misaki").Item(0) ; Japanese
        Else
            $oSpeech.Voice = $oSpeech.GetVoices("name = VW Julie").Item(0) ; English
        EndIf
        _Rec_Start()
        Sleep(750) ; it takes one moment to fade out
        $oSpeech.Speak($tts_text,$SVSFlagsAsync+$SVSFPurgeBeforeSpeak)
    ElseIf $oSpeech.Status.RunningState = $SRSEIsSpeaking Then
        ; pause it when we are speaking
        $oSpeech.Pause()
    Else
        ; we are NOT-SPEAKING && NOT-DONE, just assume PAUSEed
        ; resume it
        $oSpeech.Resume()
    EndIf
EndFunc

Func StopIt()
    ; Resume first. then speak NOTHING
    $oSpeech.Resume()
    $oSpeech.Speak("",$SVSFlagsAsync+$SVSFPurgeBeforeSpeak)
EndFunc

Func PreprocessText(ByRef $txt)
    $txt = StringRegExpReplace($txt,"(\w)\-[\r\n]*(\w)","\1\2") ; replace HYPHEN; might also replace non-HYPHEN
    Local $a = StringToASCIIArray($txt)
    for $i = 0 to UBound($a) - 1
        If $a[$i] = Dec("2019") Then $a[$i] = Dec("0027") ; convert apostrape, U+2019 to U+0027, correction for "VW Julie"
        If $a[$i] = Asc(@CR) OR $a[$i] = Asc(@LF) Then $a[$i] = Asc(" ") ; replace all @CR @LF to SPACE for speaking smoothly
    Next
    $txt = StringFromASCIIArray($a)
EndFunc

Func TextLanguage($txt)
    Local $a = StringToASCIIArray($txt)
    Local $ac = _Min(UBound($a) - 1,19) ; check the first 20 characters
    for $i = 0 to $ac
        If  ( $a[$i] >= Dec("3040") And $a[$i] <= Dec("30FF") ) _ ; Hiragana, Katakana
            Or _
            ( $a[$i] >= Dec("4E00") And $a[$i] <= Dec("9FFF") ) _ ; CJK Unified Ideographs
        Then Return 1 ; found Japanese character
    Next
    Return 0
EndFunc


;===========================================================================================
#cs
  in order to trigger "Default Ducking Experience", background sound fade-out/fade-in
  waste of time processing nothing just to pretend we are recording something from WAVE_MAPPED_DEFAULT_COMMUNICATION_DEVICE
#ce

Func _Rec_Init()
    Local $WAVE_MAPPER = -1
    Local $WAVE_FORMAT_PCM = 1
    Local $rec_channel = 1, $rec_sample = 8000, $rec_bit = 16
    Local $rec_block = $rec_bit / 8 * $rec_channel
    Local $rec_byte = $rec_block  * $rec_sample
    ; 8kHz. 1 channel, 16bit, needs 16KB buffer per second, I prepare 2 of it

    $WaveInHandle = 0
    $WavehdrSize = 0
    $Recording = 0
    $DllWinmm = DllOpen("Winmm.dll")
    $TagWAVEFORMATEX = _
    "WORD  wFormatTag;" & _
    "WORD  nChannels;" & _
    "DWORD nSamplesPerSec;" & _
    "DWORD nAvgBytesPerSec;" & _
    "WORD  nBlockAlign;" & _
    "WORD  wBitsPerSample;" & _
    "WORD  cbSize"
    $WAVEFORMATEX = DllStructCreate($TagWAVEFORMATEX)
    ; 8kHz, 1 channel, 16-bit
    DllStructSetData($WAVEFORMATEX,"cbSize",0)
    DllStructSetData($WAVEFORMATEX,"wFormatTag",$WAVE_FORMAT_PCM)
    DllStructSetData($WAVEFORMATEX,"nChannels",$rec_channel)
    DllStructSetData($WAVEFORMATEX,"nSamplesPerSec",$rec_sample)
    DllStructSetData($WAVEFORMATEX,"wBitsPerSample",$rec_bit)
    DllStructSetData($WAVEFORMATEX,"nBlockAlign", $rec_block) ; 16(bit) / 8 * 1(channel)
    DllStructSetData($WAVEFORMATEX,"nAvgBytesPerSec",$rec_byte) ; 8000 * 2
    ; waveInOpen flag: 0x00010000 CALLBACK_WINDOW || 0x00000010 WAVE_MAPPED_DEFAULT_COMMUNICATION_DEVICE
    ; DO NOT USE CALLBACK_FUNCTION, it's problematic with AutoIt because waveInProc is called from another thread. unstable. crash.
    $MMRESULT = DllCall($DllWinmm,"UINT","waveInOpen","ptr*",0,"UINT",$WAVE_MAPPER, _
    "ptr",DllStructGetPtr($WAVEFORMATEX),"ptr",$HWMain,"ptr",0,"DWORD",0x00010010)
    If @error Then Return 3
    if Not ($MMRESULT[0] = 0) Then Return 4
    
    $WaveInHandle = $MMRESULT[1] ; save WaveInHandle
    
    $wavehdr_tag = _
    "ptr lpData;" & _
    "DWORD dwBufferLength;" & _
    "DWORD dwBytesRecorded;" &  _
    "DWORD_PTR dwUser;" & _
    "DWORD dwFlags;" & _
    "DWORD dwLoops;" & _
    "ptr lpNext;" & _
    "DWORD_PTR reserved"
    ; double buffer to ensure the ducking sound won't popup accidentally
    $WAVEHDR1 = DllStructCreate($wavehdr_tag)
    $WaveBuffer1 = DllStructCreate("BYTE [" & $rec_byte & "]") ; 1 second of recorded wave data, why WaveChat.cpp use WORD not BYTE
    DllStructSetData($WAVEHDR1,"dwBufferLength",$rec_byte)
    DllStructSetData($WAVEHDR1,"lpData",DllStructGetPtr($WaveBuffer1))
    $WAVEHDR2 = DllStructCreate($wavehdr_tag)
    $WaveBuffer2 = DllStructCreate("BYTE [" & $rec_byte & "]") ; 1 second of recorded wave data, why WaveChat.cpp use WORD not BYTE
    DllStructSetData($WAVEHDR2,"dwBufferLength",$rec_byte)
    DllStructSetData($WAVEHDR2,"lpData",DllStructGetPtr($WaveBuffer2))
    $WavehdrSize = DllStructGetSize($WAVEHDR1) ; sizeof(struct wavehdr)
    
EndFunc

Func _Rec_Uninit()
    ; it's a good idea to clenup something
    $MMRESULT = DllCall($DllWinmm,"int","waveInClose","ptr",$WaveInHandle)
    DllClose($DllWinmm)
    $WaveBuffer1 = 0
    $WaveBuffer2 = 0
    $WAVEHDR1 = 0
    $WAVEHDR2 = 0
EndFunc

Func _Rec_Start()
    If $WavehdrSize = 0 OR $Recording = 1 Then Return ; sanity check
    DllCall($DllWinmm,"int","waveInPrepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR1),"UINT",$WavehdrSize)
    DllCall($DllWinmm,"int","waveInAddBuffer","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR1),"UINT",$WavehdrSize)
    DllCall($DllWinmm,"int","waveInPrepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR2),"UINT",$WavehdrSize)
    DllCall($DllWinmm,"int","waveInAddBuffer","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR2),"UINT",$WavehdrSize)
    DllCall($DllWinmm,"int","waveInStart","ptr",$WaveInHandle)
    $Recording = 1
EndFunc

Func _Rec_Stop()
    If $WavehdrSize = 0 OR $Recording = 0 Then Return ; sanity check
    $Recording = 0
    DllCall($DllWinmm,"int","waveInStop","ptr",$WaveInHandle)
    DllCall($DllWinmm,"int","waveInReset","ptr",$WaveInHandle)
    DllCall($DllWinmm,"int","waveInUnprepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR1),"UINT",$WavehdrSize)
    DllCall($DllWinmm,"int","waveInUnprepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR2),"UINT",$WavehdrSize)
EndFunc

Func MY_WM_WIM($hWnd, $Msg, $wParam, $lParam)
    Switch $Msg
        Case $MM_WIM_DATA
            ; wParam is HWAVEIN, lParam is LPWAVEHDR, don't care data, just reuse the buffer
            DllCall($DllWinmm,"int","waveInAddBuffer","ptr",$wParam,"ptr",$lParam,"UINT",$WavehdrSize)
    EndSwitch
EndFunc

沒有留言:

張貼留言