Automatically convert TTS audio to MP3 on demand (#102814)
* Add ATTR_PREFERRED_FORMAT to TTS for auto-converting audio * Move conversion into SpeechManager * Handle None case for expected_extension * Only use ATTR_AUDIO_OUTPUT * Prefer MP3 in pipelines * Automatically convert to mp3 on demand * Add preferred audio format * Break out preferred format * Add ATTR_BLOCKING to allow async fetching * Make a copy of supported options * Fix MaryTTS tests * Update ESPHome to use "wav" instead of "raw" * Clean up tests, remove blocking * Clean up rest of TTS tests * Fix ESPHome tests * More test coverage
This commit is contained in:
parent
054089291f
commit
ae516ffbb5
19 changed files with 723 additions and 241 deletions
|
@ -4,7 +4,7 @@ import io
|
|||
import logging
|
||||
import wave
|
||||
|
||||
from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
|
||||
from wyoming.audio import AudioChunk, AudioStop
|
||||
from wyoming.client import AsyncTcpClient
|
||||
from wyoming.tts import Synthesize, SynthesizeVoice
|
||||
|
||||
|
@ -88,12 +88,16 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
|
|||
@property
|
||||
def supported_options(self):
|
||||
"""Return list of supported options like voice, emotion."""
|
||||
return [tts.ATTR_AUDIO_OUTPUT, tts.ATTR_VOICE, ATTR_SPEAKER]
|
||||
return [
|
||||
tts.ATTR_AUDIO_OUTPUT,
|
||||
tts.ATTR_VOICE,
|
||||
ATTR_SPEAKER,
|
||||
]
|
||||
|
||||
@property
|
||||
def default_options(self):
|
||||
"""Return a dict include default options."""
|
||||
return {tts.ATTR_AUDIO_OUTPUT: "wav"}
|
||||
return {}
|
||||
|
||||
@callback
|
||||
def async_get_supported_voices(self, language: str) -> list[tts.Voice] | None:
|
||||
|
@ -143,27 +147,4 @@ class WyomingTtsProvider(tts.TextToSpeechEntity):
|
|||
except (OSError, WyomingError):
|
||||
return (None, None)
|
||||
|
||||
if options[tts.ATTR_AUDIO_OUTPUT] == "wav":
|
||||
return ("wav", data)
|
||||
|
||||
# Raw output (convert to 16Khz, 16-bit mono)
|
||||
with io.BytesIO(data) as wav_io:
|
||||
wav_reader: wave.Wave_read = wave.open(wav_io, "rb")
|
||||
raw_data = (
|
||||
AudioChunkConverter(
|
||||
rate=16000,
|
||||
width=2,
|
||||
channels=1,
|
||||
)
|
||||
.convert(
|
||||
AudioChunk(
|
||||
audio=wav_reader.readframes(wav_reader.getnframes()),
|
||||
rate=wav_reader.getframerate(),
|
||||
width=wav_reader.getsampwidth(),
|
||||
channels=wav_reader.getnchannels(),
|
||||
)
|
||||
)
|
||||
.audio
|
||||
)
|
||||
|
||||
return ("raw", raw_data)
|
||||
return ("wav", data)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue