Play pre-recorded message if pipeline is misconfigured (#91985)

* Play pre-recorded message if pipeline is misconfigured * Use voip_device
2023-04-24 21:48:40 -05:00 · 2023-04-24 21:48:40 -05:00 · ebabf504da
commit ebabf504da
parent 5114a1400f
3 changed files with 72 additions and 30 deletions
--- a/homeassistant/components/voip/voip.py
+++ b/homeassistant/components/voip/voip.py
@ -18,6 +18,7 @@ from homeassistant.components.assist_pipeline import (
    Pipeline,
    PipelineEvent,
    PipelineEventType,
+    async_get_pipeline,
    async_pipeline_from_audio_stream,
    select as pipeline_select,
 )
@ -26,19 +27,42 @@ from homeassistant.const import __version__
 from homeassistant.core import Context, HomeAssistant
 from homeassistant.util.ulid import ulid

-from .const import DOMAIN
+from .const import CHANNELS, DOMAIN, RATE, RTP_AUDIO_SETTINGS, WIDTH

 if TYPE_CHECKING:
    from .devices import VoIPDevice, VoIPDevices

-_BUFFERED_CHUNKS_BEFORE_SPEECH = 100  # ~2 seconds
-_TONE_DELAY = 0.2  # seconds before playing tone
-_MESSAGE_DELAY = 1.0  # seconds before playing "not configured" message
-_LOOP_DELAY = 2.0  # seconds before replaying not-configured message
-_RTP_AUDIO_SETTINGS = {"rate": 16000, "width": 2, "channels": 1, "sleep_ratio": 1.01}
 _LOGGER = logging.getLogger(__name__)


+def make_protocol(
+    hass: HomeAssistant, devices: VoIPDevices, call_info: CallInfo
+) -> VoipDatagramProtocol:
+    """Plays a pre-recorded message if pipeline is misconfigured."""
+    voip_device = devices.async_get_or_create(call_info)
+    pipeline_id = pipeline_select.get_chosen_pipeline(
+        hass,
+        DOMAIN,
+        voip_device.voip_id,
+    )
+    pipeline = async_get_pipeline(hass, pipeline_id)
+    if (
+        (pipeline is None)
+        or (pipeline.stt_engine is None)
+        or (pipeline.tts_engine is None)
+    ):
+        # Play pre-recorded message instead of failing
+        return PreRecordMessageProtocol(hass, "problem.pcm")
+
+    # Pipeline is properly configured
+    return PipelineRtpDatagramProtocol(
+        hass,
+        hass.config.language,
+        voip_device,
+        Context(user_id=devices.config_entry.data["user"]),
+    )
+
+
 class HassVoipDatagramProtocol(VoipDatagramProtocol):
    """HA UDP server for Voice over IP (VoIP)."""

@ -51,14 +75,11 @@ class HassVoipDatagramProtocol(VoipDatagramProtocol):
                session_name="voip_hass",
                version=__version__,
            ),
-            valid_protocol_factory=lambda call_info: PipelineRtpDatagramProtocol(
-                hass,
-                hass.config.language,
-                devices.async_get_or_create(call_info),
-                Context(user_id=devices.config_entry.data["user"]),
+            valid_protocol_factory=lambda call_info: make_protocol(
+                hass, devices, call_info
            ),
-            invalid_protocol_factory=lambda call_info: NotConfiguredRtpDatagramProtocol(
-                hass,
+            invalid_protocol_factory=lambda call_info: PreRecordMessageProtocol(
+                hass, "not_configured.pcm"
            ),
        )
        self.hass = hass
@ -81,12 +102,13 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
        context: Context,
        pipeline_timeout: float = 30.0,
        audio_timeout: float = 2.0,
+        buffered_chunks_before_speech: int = 100,
        listening_tone_enabled: bool = True,
        processing_tone_enabled: bool = True,
+        tone_delay: float = 0.2,
    ) -> None:
        """Set up pipeline RTP server."""
-        # STT expects 16Khz mono with 16-bit samples
-        super().__init__(rate=16000, width=2, channels=1)
+        super().__init__(rate=RATE, width=WIDTH, channels=CHANNELS)

        self.hass = hass
        self.language = language
@ -94,8 +116,10 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
        self.pipeline: Pipeline | None = None
        self.pipeline_timeout = pipeline_timeout
        self.audio_timeout = audio_timeout
+        self.buffered_chunks_before_speech = buffered_chunks_before_speech
        self.listening_tone_enabled = listening_tone_enabled
        self.processing_tone_enabled = processing_tone_enabled
+        self.tone_delay = tone_delay

        self._audio_queue: asyncio.Queue[bytes] = asyncio.Queue()
        self._context = context
@ -142,7 +166,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
            # Wait for speech before starting pipeline
            segmenter = VoiceCommandSegmenter()
            chunk_buffer: deque[bytes] = deque(
-                maxlen=_BUFFERED_CHUNKS_BEFORE_SPEECH,
+                maxlen=self.buffered_chunks_before_speech,
            )
            speech_detected = await self._wait_for_speech(
                segmenter,
@ -294,7 +318,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):

            # Assume TTS audio is 16Khz 16-bit mono
            await self.hass.async_add_executor_job(
-                partial(self.send_audio, audio_bytes, **_RTP_AUDIO_SETTINGS)
+                partial(self.send_audio, audio_bytes, **RTP_AUDIO_SETTINGS)
            )
        finally:
            # Signal pipeline to restart
@ -313,8 +337,8 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
            partial(
                self.send_audio,
                self._tone_bytes,
-                silence_before=_TONE_DELAY,
-                **_RTP_AUDIO_SETTINGS,
+                silence_before=self.tone_delay,
+                **RTP_AUDIO_SETTINGS,
            )
        )

@ -331,7 +355,7 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
            partial(
                self.send_audio,
                self._processing_bytes,
-                **_RTP_AUDIO_SETTINGS,
+                **RTP_AUDIO_SETTINGS,
            )
        )

@ -340,13 +364,22 @@ class PipelineRtpDatagramProtocol(RtpDatagramProtocol):
        return (Path(__file__).parent / file_name).read_bytes()


-class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol):
-    """Plays audio on a loop to inform the user to configure the phone in Home Assistant."""
+class PreRecordMessageProtocol(RtpDatagramProtocol):
+    """Plays a pre-recorded message on a loop."""

-    def __init__(self, hass: HomeAssistant) -> None:
+    def __init__(
+        self,
+        hass: HomeAssistant,
+        file_name: str,
+        message_delay: float = 1.0,
+        loop_delay: float = 2.0,
+    ) -> None:
        """Set up RTP server."""
-        super().__init__(rate=16000, width=2, channels=1)
+        super().__init__(rate=RATE, width=WIDTH, channels=CHANNELS)
        self.hass = hass
+        self.file_name = file_name
+        self.message_delay = message_delay
+        self.loop_delay = loop_delay
        self._audio_task: asyncio.Task | None = None
        self._audio_bytes: bytes | None = None

@ -357,9 +390,8 @@ class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol):

        if self._audio_bytes is None:
            # 16Khz, 16-bit mono audio message
-            self._audio_bytes = (
-                Path(__file__).parent / "not_configured.pcm"
-            ).read_bytes()
+            file_path = Path(__file__).parent / self.file_name
+            self._audio_bytes = file_path.read_bytes()

        if self._audio_task is None:
            self._audio_task = self.hass.async_create_background_task(
@ -372,12 +404,12 @@ class NotConfiguredRtpDatagramProtocol(RtpDatagramProtocol):
            partial(
                self.send_audio,
                self._audio_bytes,
-                silence_before=_MESSAGE_DELAY,
-                **_RTP_AUDIO_SETTINGS,
+                silence_before=self.message_delay,
+                **RTP_AUDIO_SETTINGS,
            )
        )

-        await asyncio.sleep(_LOOP_DELAY)
+        await asyncio.sleep(self.loop_delay)

        # Allow message to play again
        self._audio_task = None