20120824 Update:
Update binary and source codeAdd background sound fade-out while zqSpeak TTS is speaking.
Please refer to Default Ducking Experience for detail.
This fade-out effect only works in Windows 7 (and above?).
使用AutoIt寫的
選取文字後按Ctrl-F12即可發音 TTS (Text-to-Speech, 文字轉語音)
會自動判斷日文或英文選擇不同聲音
聲音資料庫使用NeoSpeech Julie (英文), Misaki (日文)
You should install NeoSpeech Julie, Misaki before this script being to work.
You could ask google where to download them.
zqSpeak.exe - precompiled executable
Download zqSpeak.exe
Modify the script yourself to suit your own need...
Here is the video
Here is the code
#cs ----------------------------------------------------------------------------
AutoIt Version: 3.3.6.1
Author: zaqimon
Use 32-bit only.
[20120824]
Add "Default Ducking Experience", background sound fade out while zqSpeak TTS is speaking.
[20120325]
Select the text then press Hotkey (Ctrl-F12) for TTS
Hotkeys:
Ctrl-F12: Speak/Pause/Resume
Ctrl-Alt-F12: Stop
Text-to-Speech (TTS) using Miscrosoft Speech API (SAPI) with NeoSpeech VW Julie (English) and VW Misaki (Japanese).
Auto detect language - English and Japanese - and select correct TTS speaker accordingly.
You may google NeoSpeech database and install it first by yourself.
#ce ----------------------------------------------------------------------------
#Include
#Include
#Include
#Include
#Include
_Singleton("zaqimon_TTS_MS_SAPI") ; allow only 1 instance
; leave only [Exit] tray menu, remove default menu
Opt( "TrayAutoPause", 0 )
Opt( "TrayMenuMode", 1 )
Opt( "TrayOnEventMode", 1 )
Opt( "GUIOnEventMode", 1 )
;TraySetIcon("Shell32.dll",246) ; change default icon in AutoIt compiler will also change tray icon
Global $WaveBuffer1, $WaveBuffer2
Global $WAVEHDR1, $WAVEHDR2, $RegisterProc, $WaveInHandle, $WavehdrSize, $Recording
Global $DllWinmm, $HWMain
Const $MAIN_TITLE = "zqSpeak dummy window"
Const $MM_WIM_OPEN = 0x3BE
Const $MM_WIM_CLOSE = 0x3BF
Const $MM_WIM_DATA = 0x3C0
Const $SVSFlagsAsync = 1
Const $SVSFPurgeBeforeSpeak = 2
Const $SAFT48kHz16BitMono = 38
Const $SVEPhoneme = 64
; $oSpeech.Status.RunningState could be 0, it seems this is PAUSE state
Const $SRSEDone = 1
Const $SRSEIsSpeaking = 2
; dummy window for receiving WAVEIN messages
$HWMain = GUICreate($MAIN_TITLE)
GUIRegisterMsg($MM_WIM_OPEN, "MY_WM_WIM")
GUIRegisterMsg($MM_WIM_CLOSE, "MY_WM_WIM")
GUIRegisterMsg($MM_WIM_DATA, "MY_WM_WIM")
$exititem = TrayCreateItem( "Exit" )
TrayItemSetOnEvent( -1, "_bye" )
TraySetState()
; set global hotkey
HotKeySet("^{F12}", "SpeakIt") ; Play/Pause, Ctrl-F12
HotKeySet("^!{F12}", "StopIt") ; Stop, Ctrl-Alt-F12
$oSpeech = ObjCreate('SAPI.SpVoice') ; may need "regsvr32 c:\Windows\System32\Speech\Common\sapi.dll"
$oSpeechOut = ObjCreate('SAPI.SpMMAudioOut')
ObjEvent($oSpeech,"SpVoiceEvent_") ; register event, I want to monitor SpVoiceEvent_EndStream() to fade-in background sound.
If IsObj($oSpeech) = 0 Or IsObj($oSpeech) = 0 Then
MsgBox(0x10,'Error','Fail to create SAPI object.' & @CRLF & 'Try "regsvr32 c:\Windows\System32\Speech\Common\sapi.dll" first')
Exit 2
EndIf
$oSpeechOut.Format.Type = $SAFT48kHz16BitMono ; SAFT48kHz16BitMono, this sounds best on my computer
$oSpeech.AllowAudioOutputFormatChangesOnNextSet = False
$oSpeech.AudioOutputStream = $oSpeechOut
;$oSpeechOut.Volume = 10 ; this seems no effect
$oSpeech.Volume = 100
$oSpeech.AlertBoundary = $SVEPhoneme ; Pause/Resume no work if not set AlertBoundary
_Rec_Init()
While 1
Sleep(100)
WEnd
;==========================================================
Func _bye()
_Rec_Uninit()
Exit
EndFunc
Func SpVoiceEvent_EndStream() ; receive event from SpVoice registered by ObjEvent
; Stop Record here. Allow multiple stop, because I call Resume & Speak nothing to mimic Stop, it triggers 2 EndStream event
_Rec_Stop()
EndFunc
Func SpeakIt()
If $oSpeech.Status.RunningState = $SRSEDone Then
; get text from clipboard
; pre-process Text
; check if Japanese character
; Speak
Local $tts_text
ClipPut("") ; clear clipboard first, or we may hear text currently in clipboard
Send("^c") ; send Ctrl-C
;;; Send("^c") ; may need to send twice for successful Ctrl-C !?
$tts_text = ClipGet()
If StringLen($tts_text)=0 Or StringIsSpace($tts_text) Then Return ; if empty text, no need to speak
PreprocessText($tts_text)
;MsgBox(0,"",$tts_text)
If TextLanguage($tts_text) = 1 Then
$oSpeech.Voice = $oSpeech.GetVoices("name = VW Misaki").Item(0) ; Japanese
Else
$oSpeech.Voice = $oSpeech.GetVoices("name = VW Julie").Item(0) ; English
EndIf
_Rec_Start()
Sleep(750) ; it takes one moment to fade out
$oSpeech.Speak($tts_text,$SVSFlagsAsync+$SVSFPurgeBeforeSpeak)
ElseIf $oSpeech.Status.RunningState = $SRSEIsSpeaking Then
; pause it when we are speaking
$oSpeech.Pause()
Else
; we are NOT-SPEAKING && NOT-DONE, just assume PAUSEed
; resume it
$oSpeech.Resume()
EndIf
EndFunc
Func StopIt()
; Resume first. then speak NOTHING
$oSpeech.Resume()
$oSpeech.Speak("",$SVSFlagsAsync+$SVSFPurgeBeforeSpeak)
EndFunc
Func PreprocessText(ByRef $txt)
$txt = StringRegExpReplace($txt,"(\w)\-[\r\n]*(\w)","\1\2") ; replace HYPHEN; might also replace non-HYPHEN
Local $a = StringToASCIIArray($txt)
for $i = 0 to UBound($a) - 1
If $a[$i] = Dec("2019") Then $a[$i] = Dec("0027") ; convert apostrape, U+2019 to U+0027, correction for "VW Julie"
If $a[$i] = Asc(@CR) OR $a[$i] = Asc(@LF) Then $a[$i] = Asc(" ") ; replace all @CR @LF to SPACE for speaking smoothly
Next
$txt = StringFromASCIIArray($a)
EndFunc
Func TextLanguage($txt)
Local $a = StringToASCIIArray($txt)
Local $ac = _Min(UBound($a) - 1,19) ; check the first 20 characters
for $i = 0 to $ac
If ( $a[$i] >= Dec("3040") And $a[$i] <= Dec("30FF") ) _ ; Hiragana, Katakana
Or _
( $a[$i] >= Dec("4E00") And $a[$i] <= Dec("9FFF") ) _ ; CJK Unified Ideographs
Then Return 1 ; found Japanese character
Next
Return 0
EndFunc
;===========================================================================================
#cs
in order to trigger "Default Ducking Experience", background sound fade-out/fade-in
waste of time processing nothing just to pretend we are recording something from WAVE_MAPPED_DEFAULT_COMMUNICATION_DEVICE
#ce
Func _Rec_Init()
Local $WAVE_MAPPER = -1
Local $WAVE_FORMAT_PCM = 1
Local $rec_channel = 1, $rec_sample = 8000, $rec_bit = 16
Local $rec_block = $rec_bit / 8 * $rec_channel
Local $rec_byte = $rec_block * $rec_sample
; 8kHz. 1 channel, 16bit, needs 16KB buffer per second, I prepare 2 of it
$WaveInHandle = 0
$WavehdrSize = 0
$Recording = 0
$DllWinmm = DllOpen("Winmm.dll")
$TagWAVEFORMATEX = _
"WORD wFormatTag;" & _
"WORD nChannels;" & _
"DWORD nSamplesPerSec;" & _
"DWORD nAvgBytesPerSec;" & _
"WORD nBlockAlign;" & _
"WORD wBitsPerSample;" & _
"WORD cbSize"
$WAVEFORMATEX = DllStructCreate($TagWAVEFORMATEX)
; 8kHz, 1 channel, 16-bit
DllStructSetData($WAVEFORMATEX,"cbSize",0)
DllStructSetData($WAVEFORMATEX,"wFormatTag",$WAVE_FORMAT_PCM)
DllStructSetData($WAVEFORMATEX,"nChannels",$rec_channel)
DllStructSetData($WAVEFORMATEX,"nSamplesPerSec",$rec_sample)
DllStructSetData($WAVEFORMATEX,"wBitsPerSample",$rec_bit)
DllStructSetData($WAVEFORMATEX,"nBlockAlign", $rec_block) ; 16(bit) / 8 * 1(channel)
DllStructSetData($WAVEFORMATEX,"nAvgBytesPerSec",$rec_byte) ; 8000 * 2
; waveInOpen flag: 0x00010000 CALLBACK_WINDOW || 0x00000010 WAVE_MAPPED_DEFAULT_COMMUNICATION_DEVICE
; DO NOT USE CALLBACK_FUNCTION, it's problematic with AutoIt because waveInProc is called from another thread. unstable. crash.
$MMRESULT = DllCall($DllWinmm,"UINT","waveInOpen","ptr*",0,"UINT",$WAVE_MAPPER, _
"ptr",DllStructGetPtr($WAVEFORMATEX),"ptr",$HWMain,"ptr",0,"DWORD",0x00010010)
If @error Then Return 3
if Not ($MMRESULT[0] = 0) Then Return 4
$WaveInHandle = $MMRESULT[1] ; save WaveInHandle
$wavehdr_tag = _
"ptr lpData;" & _
"DWORD dwBufferLength;" & _
"DWORD dwBytesRecorded;" & _
"DWORD_PTR dwUser;" & _
"DWORD dwFlags;" & _
"DWORD dwLoops;" & _
"ptr lpNext;" & _
"DWORD_PTR reserved"
; double buffer to ensure the ducking sound won't popup accidentally
$WAVEHDR1 = DllStructCreate($wavehdr_tag)
$WaveBuffer1 = DllStructCreate("BYTE [" & $rec_byte & "]") ; 1 second of recorded wave data, why WaveChat.cpp use WORD not BYTE
DllStructSetData($WAVEHDR1,"dwBufferLength",$rec_byte)
DllStructSetData($WAVEHDR1,"lpData",DllStructGetPtr($WaveBuffer1))
$WAVEHDR2 = DllStructCreate($wavehdr_tag)
$WaveBuffer2 = DllStructCreate("BYTE [" & $rec_byte & "]") ; 1 second of recorded wave data, why WaveChat.cpp use WORD not BYTE
DllStructSetData($WAVEHDR2,"dwBufferLength",$rec_byte)
DllStructSetData($WAVEHDR2,"lpData",DllStructGetPtr($WaveBuffer2))
$WavehdrSize = DllStructGetSize($WAVEHDR1) ; sizeof(struct wavehdr)
EndFunc
Func _Rec_Uninit()
; it's a good idea to clenup something
$MMRESULT = DllCall($DllWinmm,"int","waveInClose","ptr",$WaveInHandle)
DllClose($DllWinmm)
$WaveBuffer1 = 0
$WaveBuffer2 = 0
$WAVEHDR1 = 0
$WAVEHDR2 = 0
EndFunc
Func _Rec_Start()
If $WavehdrSize = 0 OR $Recording = 1 Then Return ; sanity check
DllCall($DllWinmm,"int","waveInPrepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR1),"UINT",$WavehdrSize)
DllCall($DllWinmm,"int","waveInAddBuffer","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR1),"UINT",$WavehdrSize)
DllCall($DllWinmm,"int","waveInPrepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR2),"UINT",$WavehdrSize)
DllCall($DllWinmm,"int","waveInAddBuffer","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR2),"UINT",$WavehdrSize)
DllCall($DllWinmm,"int","waveInStart","ptr",$WaveInHandle)
$Recording = 1
EndFunc
Func _Rec_Stop()
If $WavehdrSize = 0 OR $Recording = 0 Then Return ; sanity check
$Recording = 0
DllCall($DllWinmm,"int","waveInStop","ptr",$WaveInHandle)
DllCall($DllWinmm,"int","waveInReset","ptr",$WaveInHandle)
DllCall($DllWinmm,"int","waveInUnprepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR1),"UINT",$WavehdrSize)
DllCall($DllWinmm,"int","waveInUnprepareHeader","ptr",$WaveInHandle,"ptr",DllStructGetPtr($WAVEHDR2),"UINT",$WavehdrSize)
EndFunc
Func MY_WM_WIM($hWnd, $Msg, $wParam, $lParam)
Switch $Msg
Case $MM_WIM_DATA
; wParam is HWAVEIN, lParam is LPWAVEHDR, don't care data, just reuse the buffer
DllCall($DllWinmm,"int","waveInAddBuffer","ptr",$wParam,"ptr",$lParam,"UINT",$WavehdrSize)
EndSwitch
EndFunc
沒有留言:
張貼留言