diff --git a/homeassistant/components/assist_pipeline/pipeline.py b/homeassistant/components/assist_pipeline/pipeline.py index 3303895eec2..320812b2039 100644 --- a/homeassistant/components/assist_pipeline/pipeline.py +++ b/homeassistant/components/assist_pipeline/pipeline.py @@ -254,6 +254,8 @@ class PipelineEventType(StrEnum): WAKE_WORD_START = "wake_word-start" WAKE_WORD_END = "wake_word-end" STT_START = "stt-start" + STT_VAD_START = "stt-vad-start" + STT_VAD_END = "stt-vad-end" STT_END = "stt-end" INTENT_START = "intent-start" INTENT_END = "intent-end" @@ -612,11 +614,31 @@ class PipelineRun: stream: AsyncIterable[bytes], ) -> AsyncGenerator[bytes, None]: """Stop stream when voice command is finished.""" + sent_vad_start = False + timestamp_ms = 0 async for chunk in stream: if not segmenter.process(chunk): + # Silence detected at the end of voice command + self.process_event( + PipelineEvent( + PipelineEventType.STT_VAD_END, + {"timestamp": timestamp_ms}, + ) + ) break + if segmenter.in_command and (not sent_vad_start): + # Speech detected at start of voice command + self.process_event( + PipelineEvent( + PipelineEventType.STT_VAD_START, + {"timestamp": timestamp_ms}, + ) + ) + sent_vad_start = True + yield chunk + timestamp_ms += (len(chunk) // 2) // 16 # milliseconds @ 16Khz # Transcribe audio stream result = await self.stt_provider.async_process_audio_stream( diff --git a/tests/components/assist_pipeline/snapshots/test_init.ambr b/tests/components/assist_pipeline/snapshots/test_init.ambr index d0330952f04..58835e37973 100644 --- a/tests/components/assist_pipeline/snapshots/test_init.ambr +++ b/tests/components/assist_pipeline/snapshots/test_init.ambr @@ -311,6 +311,12 @@ }), 'type': , }), + dict({ + 'data': dict({ + 'timestamp': 0, + }), + 'type': , + }), dict({ 'data': dict({ 'stt_output': dict({ diff --git a/tests/components/assist_pipeline/test_init.py b/tests/components/assist_pipeline/test_init.py index 44e448aa785..184f479f830 100644 --- a/tests/components/assist_pipeline/test_init.py +++ b/tests/components/assist_pipeline/test_init.py @@ -40,7 +40,7 @@ async def test_pipeline_from_audio_stream_auto( In this test, no pipeline is specified. """ - events = [] + events: list[assist_pipeline.PipelineEvent] = [] async def audio_data(): yield b"part1" @@ -79,7 +79,7 @@ async def test_pipeline_from_audio_stream_legacy( """ client = await hass_ws_client(hass) - events = [] + events: list[assist_pipeline.PipelineEvent] = [] async def audio_data(): yield b"part1" @@ -139,7 +139,7 @@ async def test_pipeline_from_audio_stream_entity( """ client = await hass_ws_client(hass) - events = [] + events: list[assist_pipeline.PipelineEvent] = [] async def audio_data(): yield b"part1" @@ -199,7 +199,7 @@ async def test_pipeline_from_audio_stream_no_stt( """ client = await hass_ws_client(hass) - events = [] + events: list[assist_pipeline.PipelineEvent] = [] async def audio_data(): yield b"part1" @@ -257,7 +257,7 @@ async def test_pipeline_from_audio_stream_unknown_pipeline( In this test, the pipeline does not exist. """ - events = [] + events: list[assist_pipeline.PipelineEvent] = [] async def audio_data(): yield b"part1" @@ -294,7 +294,7 @@ async def test_pipeline_from_audio_stream_wake_word( ) -> None: """Test creating a pipeline from an audio stream with wake word.""" - events = [] + events: list[assist_pipeline.PipelineEvent] = [] # [0, 1, ...] wake_chunk_1 = bytes(it.islice(it.cycle(range(256)), BYTES_ONE_SECOND))