Add speech-to-text cooldown for local wake word (#108806)

* Deconflict based on wake word

* Undo test

* Make wake up key a string, rename error

* Update snapshot

* Change to "wake word phrase" and normalize

* Move normalization into the wake provider

* Working on describe

* Use satellite info to resolve wake word phrase

* Add test for wake word phrase

* Match phrase with model name in wake word provider

* Check model id

* Use one constant wake word cooldown

* Update homeassistant/components/assist_pipeline/error.py

Co-authored-by: Paulus Schoutsen <balloob@gmail.com>

* Fix wake word tests

---------

Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
This commit is contained in:
Michael Hansen 2024-02-26 19:35:19 -06:00 committed by GitHub
parent c38e0d22b8
commit f6622ea8e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 641 additions and 184 deletions

View file

@ -55,10 +55,11 @@ from .const import (
CONF_DEBUG_RECORDING_DIR,
DATA_CONFIG,
DATA_LAST_WAKE_UP,
DEFAULT_WAKE_WORD_COOLDOWN,
DOMAIN,
WAKE_WORD_COOLDOWN,
)
from .error import (
DuplicateWakeUpDetectedError,
IntentRecognitionError,
PipelineError,
PipelineNotFound,
@ -453,9 +454,6 @@ class WakeWordSettings:
audio_seconds_to_buffer: float = 0
"""Seconds of audio to buffer before detection and forward to STT."""
cooldown_seconds: float = DEFAULT_WAKE_WORD_COOLDOWN
"""Seconds after a wake word detection where other detections are ignored."""
@dataclass(frozen=True)
class AudioSettings:
@ -742,16 +740,22 @@ class PipelineRun:
wake_word_output: dict[str, Any] = {}
else:
# Avoid duplicate detections by checking cooldown
wake_up_key = f"{self.wake_word_entity_id}.{result.wake_word_id}"
last_wake_up = self.hass.data[DATA_LAST_WAKE_UP].get(wake_up_key)
last_wake_up = self.hass.data[DATA_LAST_WAKE_UP].get(
result.wake_word_phrase
)
if last_wake_up is not None:
sec_since_last_wake_up = time.monotonic() - last_wake_up
if sec_since_last_wake_up < wake_word_settings.cooldown_seconds:
_LOGGER.debug("Duplicate wake word detection occurred")
raise WakeWordDetectionAborted
if sec_since_last_wake_up < WAKE_WORD_COOLDOWN:
_LOGGER.debug(
"Duplicate wake word detection occurred for %s",
result.wake_word_phrase,
)
raise DuplicateWakeUpDetectedError(result.wake_word_phrase)
# Record last wake up time to block duplicate detections
self.hass.data[DATA_LAST_WAKE_UP][wake_up_key] = time.monotonic()
self.hass.data[DATA_LAST_WAKE_UP][
result.wake_word_phrase
] = time.monotonic()
if result.queued_audio:
# Add audio that was pending at detection.
@ -1308,6 +1312,9 @@ class PipelineInput:
stt_stream: AsyncIterable[bytes] | None = None
"""Input audio for stt. Required when start_stage = stt."""
wake_word_phrase: str | None = None
"""Optional key used to de-duplicate wake-ups for local wake word detection."""
intent_input: str | None = None
"""Input for conversation agent. Required when start_stage = intent."""
@ -1352,6 +1359,25 @@ class PipelineInput:
assert self.stt_metadata is not None
assert stt_processed_stream is not None
if self.wake_word_phrase is not None:
# Avoid duplicate wake-ups by checking cooldown
last_wake_up = self.run.hass.data[DATA_LAST_WAKE_UP].get(
self.wake_word_phrase
)
if last_wake_up is not None:
sec_since_last_wake_up = time.monotonic() - last_wake_up
if sec_since_last_wake_up < WAKE_WORD_COOLDOWN:
_LOGGER.debug(
"Speech-to-text cancelled to avoid duplicate wake-up for %s",
self.wake_word_phrase,
)
raise DuplicateWakeUpDetectedError(self.wake_word_phrase)
# Record last wake up time to block duplicate detections
self.run.hass.data[DATA_LAST_WAKE_UP][
self.wake_word_phrase
] = time.monotonic()
stt_input_stream = stt_processed_stream
if stt_audio_buffer: