Add speech-to-text cooldown for local wake word (#108806)
* Deconflict based on wake word * Undo test * Make wake up key a string, rename error * Update snapshot * Change to "wake word phrase" and normalize * Move normalization into the wake provider * Working on describe * Use satellite info to resolve wake word phrase * Add test for wake word phrase * Match phrase with model name in wake word provider * Check model id * Use one constant wake word cooldown * Update homeassistant/components/assist_pipeline/error.py Co-authored-by: Paulus Schoutsen <balloob@gmail.com> * Fix wake word tests --------- Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
This commit is contained in:
parent
c38e0d22b8
commit
f6622ea8e0
20 changed files with 641 additions and 184 deletions
|
@ -55,10 +55,11 @@ from .const import (
|
|||
CONF_DEBUG_RECORDING_DIR,
|
||||
DATA_CONFIG,
|
||||
DATA_LAST_WAKE_UP,
|
||||
DEFAULT_WAKE_WORD_COOLDOWN,
|
||||
DOMAIN,
|
||||
WAKE_WORD_COOLDOWN,
|
||||
)
|
||||
from .error import (
|
||||
DuplicateWakeUpDetectedError,
|
||||
IntentRecognitionError,
|
||||
PipelineError,
|
||||
PipelineNotFound,
|
||||
|
@ -453,9 +454,6 @@ class WakeWordSettings:
|
|||
audio_seconds_to_buffer: float = 0
|
||||
"""Seconds of audio to buffer before detection and forward to STT."""
|
||||
|
||||
cooldown_seconds: float = DEFAULT_WAKE_WORD_COOLDOWN
|
||||
"""Seconds after a wake word detection where other detections are ignored."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AudioSettings:
|
||||
|
@ -742,16 +740,22 @@ class PipelineRun:
|
|||
wake_word_output: dict[str, Any] = {}
|
||||
else:
|
||||
# Avoid duplicate detections by checking cooldown
|
||||
wake_up_key = f"{self.wake_word_entity_id}.{result.wake_word_id}"
|
||||
last_wake_up = self.hass.data[DATA_LAST_WAKE_UP].get(wake_up_key)
|
||||
last_wake_up = self.hass.data[DATA_LAST_WAKE_UP].get(
|
||||
result.wake_word_phrase
|
||||
)
|
||||
if last_wake_up is not None:
|
||||
sec_since_last_wake_up = time.monotonic() - last_wake_up
|
||||
if sec_since_last_wake_up < wake_word_settings.cooldown_seconds:
|
||||
_LOGGER.debug("Duplicate wake word detection occurred")
|
||||
raise WakeWordDetectionAborted
|
||||
if sec_since_last_wake_up < WAKE_WORD_COOLDOWN:
|
||||
_LOGGER.debug(
|
||||
"Duplicate wake word detection occurred for %s",
|
||||
result.wake_word_phrase,
|
||||
)
|
||||
raise DuplicateWakeUpDetectedError(result.wake_word_phrase)
|
||||
|
||||
# Record last wake up time to block duplicate detections
|
||||
self.hass.data[DATA_LAST_WAKE_UP][wake_up_key] = time.monotonic()
|
||||
self.hass.data[DATA_LAST_WAKE_UP][
|
||||
result.wake_word_phrase
|
||||
] = time.monotonic()
|
||||
|
||||
if result.queued_audio:
|
||||
# Add audio that was pending at detection.
|
||||
|
@ -1308,6 +1312,9 @@ class PipelineInput:
|
|||
stt_stream: AsyncIterable[bytes] | None = None
|
||||
"""Input audio for stt. Required when start_stage = stt."""
|
||||
|
||||
wake_word_phrase: str | None = None
|
||||
"""Optional key used to de-duplicate wake-ups for local wake word detection."""
|
||||
|
||||
intent_input: str | None = None
|
||||
"""Input for conversation agent. Required when start_stage = intent."""
|
||||
|
||||
|
@ -1352,6 +1359,25 @@ class PipelineInput:
|
|||
assert self.stt_metadata is not None
|
||||
assert stt_processed_stream is not None
|
||||
|
||||
if self.wake_word_phrase is not None:
|
||||
# Avoid duplicate wake-ups by checking cooldown
|
||||
last_wake_up = self.run.hass.data[DATA_LAST_WAKE_UP].get(
|
||||
self.wake_word_phrase
|
||||
)
|
||||
if last_wake_up is not None:
|
||||
sec_since_last_wake_up = time.monotonic() - last_wake_up
|
||||
if sec_since_last_wake_up < WAKE_WORD_COOLDOWN:
|
||||
_LOGGER.debug(
|
||||
"Speech-to-text cancelled to avoid duplicate wake-up for %s",
|
||||
self.wake_word_phrase,
|
||||
)
|
||||
raise DuplicateWakeUpDetectedError(self.wake_word_phrase)
|
||||
|
||||
# Record last wake up time to block duplicate detections
|
||||
self.run.hass.data[DATA_LAST_WAKE_UP][
|
||||
self.wake_word_phrase
|
||||
] = time.monotonic()
|
||||
|
||||
stt_input_stream = stt_processed_stream
|
||||
|
||||
if stt_audio_buffer:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue