Skip TTS events entirely with empty text (#105617)

2023-12-12 23:21:16 -06:00 · 2023-12-12 23:21:16 -06:00 · a73e86a741
commit a73e86a741
parent 431a44ab67
3 changed files with 59 additions and 40 deletions
--- a/homeassistant/components/assist_pipeline/pipeline.py
+++ b/homeassistant/components/assist_pipeline/pipeline.py
@ -369,6 +369,7 @@ class PipelineStage(StrEnum):
    STT = "stt"
    INTENT = "intent"
    TTS = "tts"
+    END = "end"


 PIPELINE_STAGE_ORDER = [
@ -1024,35 +1025,32 @@ class PipelineRun:
            )
        )

-        if tts_input := tts_input.strip():
-            try:
-                # Synthesize audio and get URL
-                tts_media_id = tts_generate_media_source_id(
-                    self.hass,
-                    tts_input,
-                    engine=self.tts_engine,
-                    language=self.pipeline.tts_language,
-                    options=self.tts_options,
-                )
-                tts_media = await media_source.async_resolve_media(
-                    self.hass,
-                    tts_media_id,
-                    None,
-                )
-            except Exception as src_error:
-                _LOGGER.exception("Unexpected error during text-to-speech")
-                raise TextToSpeechError(
-                    code="tts-failed",
-                    message="Unexpected error during text-to-speech",
-                ) from src_error
+        try:
+            # Synthesize audio and get URL
+            tts_media_id = tts_generate_media_source_id(
+                self.hass,
+                tts_input,
+                engine=self.tts_engine,
+                language=self.pipeline.tts_language,
+                options=self.tts_options,
+            )
+            tts_media = await media_source.async_resolve_media(
+                self.hass,
+                tts_media_id,
+                None,
+            )
+        except Exception as src_error:
+            _LOGGER.exception("Unexpected error during text-to-speech")
+            raise TextToSpeechError(
+                code="tts-failed",
+                message="Unexpected error during text-to-speech",
+            ) from src_error

-            _LOGGER.debug("TTS result %s", tts_media)
-            tts_output = {
-                "media_id": tts_media_id,
-                **asdict(tts_media),
-            }
-        else:
-            tts_output = {}
+        _LOGGER.debug("TTS result %s", tts_media)
+        tts_output = {
+            "media_id": tts_media_id,
+            **asdict(tts_media),
+        }

        self.process_event(
            PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
@ -1345,7 +1343,11 @@ class PipelineInput:
                        self.conversation_id,
                        self.device_id,
                    )
-                    current_stage = PipelineStage.TTS
+                    if tts_input.strip():
+                        current_stage = PipelineStage.TTS
+                    else:
+                        # Skip TTS
+                        current_stage = PipelineStage.END

                if self.run.end_stage != PipelineStage.INTENT:
                    # text-to-speech
--- a/tests/components/assist_pipeline/snapshots/test_websocket.ambr
+++ b/tests/components/assist_pipeline/snapshots/test_websocket.ambr
@ -662,15 +662,33 @@
 # ---
 # name: test_pipeline_empty_tts_output.1
  dict({
-    'engine': 'test',
-    'language': 'en-US',
-    'tts_input': '',
-    'voice': 'james_earl_jones',
+    'conversation_id': None,
+    'device_id': None,
+    'engine': 'homeassistant',
+    'intent_input': 'never mind',
+    'language': 'en',
  })
 # ---
 # name: test_pipeline_empty_tts_output.2
  dict({
-    'tts_output': dict({
+    'intent_output': dict({
+      'conversation_id': None,
+      'response': dict({
+        'card': dict({
+        }),
+        'data': dict({
+          'failed': list([
+          ]),
+          'success': list([
+          ]),
+          'targets': list([
+          ]),
+        }),
+        'language': 'en',
+        'response_type': 'action_done',
+        'speech': dict({
+        }),
+      }),
    }),
  })
 # ---
--- a/tests/components/assist_pipeline/test_websocket.py
+++ b/tests/components/assist_pipeline/test_websocket.py
@ -2467,10 +2467,10 @@ async def test_pipeline_empty_tts_output(
    await client.send_json_auto_id(
        {
            "type": "assist_pipeline/run",
-            "start_stage": "tts",
+            "start_stage": "intent",
            "end_stage": "tts",
            "input": {
-                "text": "",
+                "text": "never mind",
            },
        }
    )
@ -2486,16 +2486,15 @@ async def test_pipeline_empty_tts_output(
    assert msg["event"]["data"] == snapshot
    events.append(msg["event"])

-    # text-to-speech
+    # intent
    msg = await client.receive_json()
-    assert msg["event"]["type"] == "tts-start"
+    assert msg["event"]["type"] == "intent-start"
    assert msg["event"]["data"] == snapshot
    events.append(msg["event"])

    msg = await client.receive_json()
-    assert msg["event"]["type"] == "tts-end"
+    assert msg["event"]["type"] == "intent-end"
    assert msg["event"]["data"] == snapshot
-    assert not msg["event"]["data"]["tts_output"]
    events.append(msg["event"])

    # run end