Deduplicate entity_id in the states table (#89557)

This commit is contained in:
J. Nick Koston 2023-03-12 10:01:58 -10:00 committed by GitHub
parent 8d88b02c2e
commit c41f91be89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3715 additions and 1018 deletions

View file

@ -10,6 +10,7 @@ from typing import Any
from sqlalchemy.engine import Result
from sqlalchemy.engine.row import Row
from homeassistant.components.recorder import get_instance
from homeassistant.components.recorder.filters import Filters
from homeassistant.components.recorder.models import (
bytes_to_uuid_hex_or_none,
@ -149,16 +150,28 @@ class EventProcessor:
#
return result.yield_per(1024)
stmt = statement_for_request(
start_day,
end_day,
self.event_types,
self.entity_ids,
self.device_ids,
self.filters,
self.context_id,
)
with session_scope(hass=self.hass) as session:
metadata_ids: list[int] | None = None
if self.entity_ids:
instance = get_instance(self.hass)
entity_id_to_metadata_id = instance.states_meta_manager.get_many(
self.entity_ids, session
)
metadata_ids = [
metadata_id
for metadata_id in entity_id_to_metadata_id.values()
if metadata_id is not None
]
stmt = statement_for_request(
start_day,
end_day,
self.event_types,
self.entity_ids,
metadata_ids,
self.device_ids,
self.filters,
self.context_id,
)
return self.humanify(yield_rows(session.execute(stmt)))
def humanify(

View file

@ -1,6 +1,7 @@
"""Queries for logbook."""
from __future__ import annotations
from collections.abc import Collection
from datetime import datetime as dt
from sqlalchemy.sql.lambdas import StatementLambdaElement
@ -21,6 +22,7 @@ def statement_for_request(
end_day_dt: dt,
event_types: tuple[str, ...],
entity_ids: list[str] | None = None,
states_metadata_ids: Collection[int] | None = None,
device_ids: list[str] | None = None,
filters: Filters | None = None,
context_id: str | None = None,
@ -32,7 +34,9 @@ def statement_for_request(
# No entities: logbook sends everything for the timeframe
# limited by the context_id and the yaml configured filter
if not entity_ids and not device_ids:
states_entity_filter = filters.states_entity_filter() if filters else None
states_entity_filter = (
filters.states_metadata_entity_filter() if filters else None
)
events_entity_filter = filters.events_entity_filter() if filters else None
return all_stmt(
start_day,
@ -56,7 +60,7 @@ def statement_for_request(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids or [],
json_quoted_entity_ids,
json_quoted_device_ids,
)
@ -68,7 +72,7 @@ def statement_for_request(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids or [],
json_quoted_entity_ids,
)

View file

@ -20,6 +20,7 @@ from homeassistant.components.recorder.db_schema import (
EventTypes,
StateAttributes,
States,
StatesMeta,
)
from homeassistant.components.recorder.filters import like_domain_matchers
from homeassistant.components.recorder.queries import select_event_type_ids
@ -57,7 +58,7 @@ EVENT_COLUMNS = (
STATE_COLUMNS = (
States.state_id.label("state_id"),
States.state.label("state"),
States.entity_id.label("entity_id"),
StatesMeta.entity_id.label("entity_id"),
SHARED_ATTRS_JSON["icon"].as_string().label("icon"),
OLD_FORMAT_ATTRS_JSON["icon"].as_string().label("old_format_icon"),
)
@ -65,7 +66,7 @@ STATE_COLUMNS = (
STATE_CONTEXT_ONLY_COLUMNS = (
States.state_id.label("state_id"),
States.state.label("state"),
States.entity_id.label("entity_id"),
StatesMeta.entity_id.label("entity_id"),
literal(value=None, type_=sqlalchemy.String).label("icon"),
literal(value=None, type_=sqlalchemy.String).label("old_format_icon"),
)
@ -186,6 +187,7 @@ def legacy_select_events_context_id(
.outerjoin(
StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
)
.outerjoin(StatesMeta, (States.metadata_id == StatesMeta.metadata_id))
.outerjoin(EventTypes, (Events.event_type_id == EventTypes.event_type_id))
.where((Events.time_fired_ts > start_day) & (Events.time_fired_ts < end_day))
.where(Events.context_id_bin == context_id_bin)
@ -213,6 +215,7 @@ def apply_states_filters(sel: Select, start_day: float, end_day: float) -> Selec
.outerjoin(
StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
)
.outerjoin(StatesMeta, (States.metadata_id == StatesMeta.metadata_id))
)
@ -249,7 +252,7 @@ def _not_possible_continuous_domain_matcher() -> ColumnElement[bool]:
"""
return sqlalchemy.and_(
*[
~States.entity_id.like(entity_domain)
~StatesMeta.entity_id.like(entity_domain)
for entity_domain in (
*ALWAYS_CONTINUOUS_ENTITY_ID_LIKE,
*CONDITIONALLY_CONTINUOUS_ENTITY_ID_LIKE,
@ -266,7 +269,7 @@ def _conditionally_continuous_domain_matcher() -> ColumnElement[bool]:
"""
return sqlalchemy.or_(
*[
States.entity_id.like(entity_domain)
StatesMeta.entity_id.like(entity_domain)
for entity_domain in CONDITIONALLY_CONTINUOUS_ENTITY_ID_LIKE
],
).self_group()

View file

@ -15,6 +15,7 @@ from homeassistant.components.recorder.db_schema import (
Events,
EventTypes,
States,
StatesMeta,
)
from .common import (
@ -68,6 +69,7 @@ def _apply_devices_context_union(
select_states_context_only()
.select_from(devices_cte)
.outerjoin(States, devices_cte.c.context_id_bin == States.context_id_bin)
.outerjoin(StatesMeta, (States.metadata_id == StatesMeta.metadata_id))
),
)

View file

@ -1,7 +1,7 @@
"""Entities queries for logbook."""
from __future__ import annotations
from collections.abc import Iterable
from collections.abc import Collection, Iterable
import sqlalchemy
from sqlalchemy import lambda_stmt, select, union_all
@ -11,12 +11,13 @@ from sqlalchemy.sql.selectable import CTE, CompoundSelect, Select
from homeassistant.components.recorder.db_schema import (
ENTITY_ID_IN_EVENT,
ENTITY_ID_LAST_UPDATED_INDEX_TS,
METADATA_ID_LAST_UPDATED_INDEX_TS,
OLD_ENTITY_ID_IN_EVENT,
EventData,
Events,
EventTypes,
States,
StatesMeta,
)
from .common import (
@ -35,7 +36,7 @@ def _select_entities_context_ids_sub_query(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
) -> Select:
"""Generate a subquery to find context ids for multiple entities."""
@ -47,7 +48,7 @@ def _select_entities_context_ids_sub_query(
.filter(
(States.last_updated_ts > start_day) & (States.last_updated_ts < end_day)
)
.where(States.entity_id.in_(entity_ids)),
.where(States.metadata_id.in_(states_metadata_ids)),
).subquery()
return select(union.c.context_id_bin).group_by(union.c.context_id_bin)
@ -57,7 +58,7 @@ def _apply_entities_context_union(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
) -> CompoundSelect:
"""Generate a CTE to find the entity and device context ids and a query to find linked row."""
@ -65,16 +66,16 @@ def _apply_entities_context_union(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids,
json_quoted_entity_ids,
).cte()
# We used to optimize this to exclude rows we already in the union with
# a States.entity_id.not_in(entity_ids) but that made the
# a StatesMeta.metadata_ids.not_in(states_metadata_ids) but that made the
# query much slower on MySQL, and since we already filter them away
# in the python code anyways since they will have context_only
# set on them the impact is minimal.
return sel.union_all(
states_select_for_entity_ids(start_day, end_day, entity_ids),
states_select_for_entity_ids(start_day, end_day, states_metadata_ids),
apply_events_context_hints(
select_events_context_only()
.select_from(entities_cte)
@ -86,6 +87,7 @@ def _apply_entities_context_union(
select_states_context_only()
.select_from(entities_cte)
.outerjoin(States, entities_cte.c.context_id_bin == States.context_id_bin)
.outerjoin(StatesMeta, (States.metadata_id == StatesMeta.metadata_id))
),
)
@ -94,7 +96,7 @@ def entities_stmt(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
) -> StatementLambdaElement:
"""Generate a logbook query for multiple entities."""
@ -106,19 +108,19 @@ def entities_stmt(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids,
json_quoted_entity_ids,
).order_by(Events.time_fired_ts)
)
def states_select_for_entity_ids(
start_day: float, end_day: float, entity_ids: list[str]
start_day: float, end_day: float, states_metadata_ids: Collection[int]
) -> Select:
"""Generate a select for states from the States table for specific entities."""
return apply_states_filters(
apply_entities_hints(select_states()), start_day, end_day
).where(States.entity_id.in_(entity_ids))
).where(States.metadata_id.in_(states_metadata_ids))
def apply_event_entity_id_matchers(
@ -140,9 +142,11 @@ def apply_event_entity_id_matchers(
def apply_entities_hints(sel: Select) -> Select:
"""Force mysql to use the right index on large selects."""
return sel.with_hint(
States, f"FORCE INDEX ({ENTITY_ID_LAST_UPDATED_INDEX_TS})", dialect_name="mysql"
States,
f"FORCE INDEX ({METADATA_ID_LAST_UPDATED_INDEX_TS})",
dialect_name="mysql",
).with_hint(
States,
f"FORCE INDEX ({ENTITY_ID_LAST_UPDATED_INDEX_TS})",
f"FORCE INDEX ({METADATA_ID_LAST_UPDATED_INDEX_TS})",
dialect_name="mariadb",
)

View file

@ -1,7 +1,7 @@
"""Entities and Devices queries for logbook."""
from __future__ import annotations
from collections.abc import Iterable
from collections.abc import Collection, Iterable
from sqlalchemy import lambda_stmt, select, union_all
from sqlalchemy.sql.elements import ColumnElement
@ -13,6 +13,7 @@ from homeassistant.components.recorder.db_schema import (
Events,
EventTypes,
States,
StatesMeta,
)
from .common import (
@ -35,7 +36,7 @@ def _select_entities_device_id_context_ids_sub_query(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
json_quoted_device_ids: list[str],
) -> Select:
@ -50,7 +51,7 @@ def _select_entities_device_id_context_ids_sub_query(
.filter(
(States.last_updated_ts > start_day) & (States.last_updated_ts < end_day)
)
.where(States.entity_id.in_(entity_ids)),
.where(States.metadata_id.in_(states_metadata_ids)),
).subquery()
return select(union.c.context_id_bin).group_by(union.c.context_id_bin)
@ -60,7 +61,7 @@ def _apply_entities_devices_context_union(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
json_quoted_device_ids: list[str],
) -> CompoundSelect:
@ -68,17 +69,17 @@ def _apply_entities_devices_context_union(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids,
json_quoted_entity_ids,
json_quoted_device_ids,
).cte()
# We used to optimize this to exclude rows we already in the union with
# a States.entity_id.not_in(entity_ids) but that made the
# a States.metadata_id.not_in(states_metadata_ids) but that made the
# query much slower on MySQL, and since we already filter them away
# in the python code anyways since they will have context_only
# set on them the impact is minimal.
return sel.union_all(
states_select_for_entity_ids(start_day, end_day, entity_ids),
states_select_for_entity_ids(start_day, end_day, states_metadata_ids),
apply_events_context_hints(
select_events_context_only()
.select_from(devices_entities_cte)
@ -94,6 +95,7 @@ def _apply_entities_devices_context_union(
.outerjoin(
States, devices_entities_cte.c.context_id_bin == States.context_id_bin
)
.outerjoin(StatesMeta, (States.metadata_id == StatesMeta.metadata_id))
),
)
@ -102,7 +104,7 @@ def entities_devices_stmt(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
json_quoted_device_ids: list[str],
) -> StatementLambdaElement:
@ -117,7 +119,7 @@ def entities_devices_stmt(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids,
json_quoted_entity_ids,
json_quoted_device_ids,
).order_by(Events.time_fired_ts)

View file

@ -64,6 +64,7 @@ from .db_schema import (
EventTypes,
StateAttributes,
States,
StatesMeta,
Statistics,
StatisticsRuns,
StatisticsShortTerm,
@ -82,10 +83,14 @@ from .queries import (
find_shared_data_id,
get_shared_attributes,
get_shared_event_datas,
has_entity_ids_to_migrate,
has_event_type_to_migrate,
has_events_context_ids_to_migrate,
has_states_context_ids_to_migrate,
)
from .run_history import RunHistory
from .table_managers.event_types import EventTypeManager
from .table_managers.states_meta import StatesMetaManager
from .tasks import (
AdjustLRUSizeTask,
AdjustStatisticsTask,
@ -94,6 +99,7 @@ from .tasks import (
CommitTask,
ContextIDMigrationTask,
DatabaseLockTask,
EntityIDMigrationTask,
EventTask,
EventTypeIDMigrationTask,
ImportStatisticsTask,
@ -215,6 +221,7 @@ class Recorder(threading.Thread):
self._state_attributes_ids: LRU = LRU(STATE_ATTRIBUTES_ID_CACHE_SIZE)
self._event_data_ids: LRU = LRU(EVENT_DATA_ID_CACHE_SIZE)
self.event_type_manager = EventTypeManager()
self.states_meta_manager = StatesMetaManager()
self._pending_state_attributes: dict[str, StateAttributes] = {}
self._pending_event_data: dict[str, EventData] = {}
self._pending_expunge: list[States] = []
@ -652,7 +659,7 @@ class Recorder(threading.Thread):
# If the migrate is live or the schema is valid, we need to
# wait for startup to complete. If its not live, we need to continue
# on.
self.hass.add_job(self.async_set_db_ready)
self._activate_and_set_db_ready()
# We wait to start a live migration until startup has finished
# since it can be cpu intensive and we do not want it to compete
@ -663,7 +670,7 @@ class Recorder(threading.Thread):
# Make sure we cleanly close the run if
# we restart before startup finishes
self._shutdown()
self.hass.add_job(self.async_set_db_ready)
self._activate_and_set_db_ready()
return
if not schema_status.valid:
@ -681,11 +688,11 @@ class Recorder(threading.Thread):
"Database Migration Failed",
"recorder_database_migration",
)
self.hass.add_job(self.async_set_db_ready)
self._activate_and_set_db_ready()
self._shutdown()
return
self.hass.add_job(self.async_set_db_ready)
self._activate_and_set_db_ready()
# Catch up with missed statistics
with session_scope(session=self.get_session()) as session:
@ -694,26 +701,44 @@ class Recorder(threading.Thread):
_LOGGER.debug("Recorder processing the queue")
self._adjust_lru_size()
self.hass.add_job(self._async_set_recorder_ready_migration_done)
self._activate_table_managers_or_migrate()
self._run_event_loop()
self._shutdown()
def _activate_table_managers_or_migrate(self) -> None:
"""Activate the table managers or schedule migrations."""
# Currently we always check if context ids need to be migrated
# since there are multiple tables. This could be optimized
# to check both the states and events table to see if there
# are any missing and avoid inserting the task but it currently
# is not needed since there is no dependent code branching
# on the result of the migration.
self.queue_task(ContextIDMigrationTask())
def _activate_and_set_db_ready(self) -> None:
"""Activate the table managers or schedule migrations and mark the db as ready."""
with session_scope(session=self.get_session()) as session:
if session.execute(has_event_type_to_migrate()).scalar():
if (
self.schema_version < 36
or session.execute(has_events_context_ids_to_migrate()).scalar()
or session.execute(has_states_context_ids_to_migrate()).scalar()
):
self.queue_task(ContextIDMigrationTask())
if (
self.schema_version < 37
or session.execute(has_event_type_to_migrate()).scalar()
):
self.queue_task(EventTypeIDMigrationTask())
else:
_LOGGER.debug("Activating event type manager as all data is migrated")
_LOGGER.debug("Activating event_types manager as all data is migrated")
self.event_type_manager.active = True
if (
self.schema_version < 38
or session.execute(has_entity_ids_to_migrate()).scalar()
):
self.queue_task(EntityIDMigrationTask())
else:
_LOGGER.debug("Activating states_meta manager as all data is migrated")
self.states_meta_manager.active = True
# We must only set the db ready after we have set the table managers
# to active if there is no data to migrate.
#
# This ensures that the history queries will use the new tables
# and not the old ones as soon as the API is available.
self.hass.add_job(self.async_set_db_ready)
def _run_event_loop(self) -> None:
"""Run the event loop for the recorder."""
# Use a session for the event read loop
@ -750,6 +775,7 @@ class Recorder(threading.Thread):
self._pre_process_state_change_events(state_change_events)
self._pre_process_non_state_change_events(non_state_change_events)
self.event_type_manager.load(non_state_change_events, self.event_session)
self.states_meta_manager.load(state_change_events, self.event_session)
def _pre_process_state_change_events(self, events: list[Event]) -> None:
"""Load startup state attributes from the database.
@ -1033,13 +1059,26 @@ class Recorder(threading.Thread):
def _process_state_changed_event_into_session(self, event: Event) -> None:
"""Process a state_changed event into the session."""
assert self.event_session is not None
dbstate = States.from_event(event)
if not (
if (entity_id := dbstate.entity_id) is None or not (
shared_attrs_bytes := self._serialize_state_attributes_from_event(event)
):
return
assert self.event_session is not None
event_session = self.event_session
# Map the entity_id to the StatesMeta table
states_meta_manager = self.states_meta_manager
if pending_states_meta := states_meta_manager.get_pending(entity_id):
dbstate.states_meta_rel = pending_states_meta
elif metadata_id := states_meta_manager.get(entity_id, event_session):
dbstate.metadata_id = metadata_id
else:
states_meta = StatesMeta(entity_id=entity_id)
states_meta_manager.add_pending(states_meta)
event_session.add(states_meta)
dbstate.states_meta_rel = states_meta
shared_attrs = shared_attrs_bytes.decode("utf-8")
dbstate.attributes = None
# Matching attributes found in the pending commit
@ -1063,16 +1102,20 @@ class Recorder(threading.Thread):
self._pending_state_attributes[shared_attrs] = dbstate_attributes
self.event_session.add(dbstate_attributes)
if old_state := self._old_states.pop(dbstate.entity_id, None):
if old_state := self._old_states.pop(entity_id, None):
if old_state.state_id:
dbstate.old_state_id = old_state.state_id
else:
dbstate.old_state = old_state
if event.data.get("new_state"):
self._old_states[dbstate.entity_id] = dbstate
self._old_states[entity_id] = dbstate
self._pending_expunge.append(dbstate)
else:
dbstate.state = None
if states_meta_manager.active:
dbstate.entity_id = None
self.event_session.add(dbstate)
def _handle_database_error(self, err: Exception) -> bool:
@ -1138,6 +1181,7 @@ class Recorder(threading.Thread):
self._event_data_ids[event_data.shared_data] = event_data.data_id
self._pending_event_data = {}
self.event_type_manager.post_commit_pending()
self.states_meta_manager.post_commit_pending()
# Expire is an expensive operation (frequently more expensive
# than the flush and commit itself) so we only
@ -1165,6 +1209,7 @@ class Recorder(threading.Thread):
self._pending_state_attributes.clear()
self._pending_event_data.clear()
self.event_type_manager.reset()
self.states_meta_manager.reset()
if not self.event_session:
return
@ -1199,6 +1244,14 @@ class Recorder(threading.Thread):
"""Migrate event type ids if needed."""
return migration.migrate_event_type_ids(self)
def _migrate_entity_ids(self) -> bool:
"""Migrate entity_ids if needed."""
return migration.migrate_entity_ids(self)
def _post_migrate_entity_ids(self) -> bool:
"""Post migrate entity_ids if needed."""
return migration.post_migrate_entity_ids(self)
def _send_keep_alive(self) -> None:
"""Send a keep alive to keep the db connection open."""
assert self.event_session is not None

View file

@ -68,7 +68,7 @@ class Base(DeclarativeBase):
"""Base class for tables."""
SCHEMA_VERSION = 37
SCHEMA_VERSION = 38
_LOGGER = logging.getLogger(__name__)
@ -77,6 +77,7 @@ TABLE_EVENT_DATA = "event_data"
TABLE_EVENT_TYPES = "event_types"
TABLE_STATES = "states"
TABLE_STATE_ATTRIBUTES = "state_attributes"
TABLE_STATES_META = "states_meta"
TABLE_RECORDER_RUNS = "recorder_runs"
TABLE_SCHEMA_CHANGES = "schema_changes"
TABLE_STATISTICS = "statistics"
@ -97,6 +98,7 @@ ALL_TABLES = [
TABLE_EVENT_TYPES,
TABLE_RECORDER_RUNS,
TABLE_SCHEMA_CHANGES,
TABLE_STATES_META,
TABLE_STATISTICS,
TABLE_STATISTICS_META,
TABLE_STATISTICS_RUNS,
@ -111,7 +113,7 @@ TABLES_TO_CHECK = [
]
LAST_UPDATED_INDEX_TS = "ix_states_last_updated_ts"
ENTITY_ID_LAST_UPDATED_INDEX_TS = "ix_states_entity_id_last_updated_ts"
METADATA_ID_LAST_UPDATED_INDEX_TS = "ix_states_metadata_id_last_updated_ts"
EVENTS_CONTEXT_ID_BIN_INDEX = "ix_events_context_id_bin"
STATES_CONTEXT_ID_BIN_INDEX = "ix_states_context_id_bin"
CONTEXT_ID_BIN_MAX_LENGTH = 16
@ -363,7 +365,7 @@ class States(Base):
__table_args__ = (
# Used for fetching the state of entities at a specific time
# (get_states in history.py)
Index(ENTITY_ID_LAST_UPDATED_INDEX_TS, "entity_id", "last_updated_ts"),
Index(METADATA_ID_LAST_UPDATED_INDEX_TS, "metadata_id", "last_updated_ts"),
Index(
STATES_CONTEXT_ID_BIN_INDEX,
"context_id_bin",
@ -374,7 +376,9 @@ class States(Base):
)
__tablename__ = TABLE_STATES
state_id: Mapped[int] = mapped_column(Integer, Identity(), primary_key=True)
entity_id: Mapped[str | None] = mapped_column(String(MAX_LENGTH_STATE_ENTITY_ID))
entity_id: Mapped[str | None] = mapped_column(
String(MAX_LENGTH_STATE_ENTITY_ID)
) # no longer used for new rows
state: Mapped[str | None] = mapped_column(String(MAX_LENGTH_STATE_STATE))
attributes: Mapped[str | None] = mapped_column(
Text().with_variant(mysql.LONGTEXT, "mysql", "mariadb")
@ -421,6 +425,10 @@ class States(Base):
context_parent_id_bin: Mapped[bytes | None] = mapped_column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
)
metadata_id: Mapped[int | None] = mapped_column(
Integer, ForeignKey("states_meta.metadata_id"), index=True
)
states_meta_rel: Mapped[StatesMeta | None] = relationship("StatesMeta")
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
@ -583,6 +591,23 @@ class StateAttributes(Base):
return {}
class StatesMeta(Base):
"""Metadata for states."""
__table_args__ = (_DEFAULT_TABLE_ARGS,)
__tablename__ = TABLE_STATES_META
metadata_id: Mapped[int] = mapped_column(Integer, Identity(), primary_key=True)
entity_id: Mapped[str | None] = mapped_column(String(MAX_LENGTH_STATE_ENTITY_ID))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.StatesMeta("
f"id={self.metadata_id}, entity_id='{self.entity_id}'"
")>"
)
class StatisticsBase:
"""Statistics base class."""

View file

@ -2,7 +2,6 @@
from __future__ import annotations
from collections.abc import Callable, Collection, Iterable
import json
from typing import Any
from sqlalchemy import Column, Text, cast, not_, or_
@ -10,13 +9,14 @@ from sqlalchemy.sql.elements import ColumnElement
from homeassistant.const import CONF_DOMAINS, CONF_ENTITIES, CONF_EXCLUDE, CONF_INCLUDE
from homeassistant.helpers.entityfilter import CONF_ENTITY_GLOBS
from homeassistant.helpers.json import json_dumps
from homeassistant.helpers.typing import ConfigType
from .db_schema import ENTITY_ID_IN_EVENT, OLD_ENTITY_ID_IN_EVENT, States
from .db_schema import ENTITY_ID_IN_EVENT, OLD_ENTITY_ID_IN_EVENT, States, StatesMeta
DOMAIN = "history"
HISTORY_FILTERS = "history_filters"
JSON_NULL = json.dumps(None)
JSON_NULL = json_dumps(None)
GLOB_TO_SQL_CHARS = {
ord("*"): "%",
@ -194,7 +194,10 @@ class Filters:
return i_entities
def states_entity_filter(self) -> ColumnElement | None:
"""Generate the entity filter query."""
"""Generate the States.entity_id filter query.
This is no longer used except by the legacy queries.
"""
def _encoder(data: Any) -> Any:
"""Nothing to encode for states since there is no json."""
@ -203,9 +206,19 @@ class Filters:
# The type annotation should be improved so the type ignore can be removed
return self._generate_filter_for_columns((States.entity_id,), _encoder) # type: ignore[arg-type]
def states_metadata_entity_filter(self) -> ColumnElement | None:
"""Generate the StatesMeta.entity_id filter query."""
def _encoder(data: Any) -> Any:
"""Nothing to encode for states since there is no json."""
return data
# The type annotation should be improved so the type ignore can be removed
return self._generate_filter_for_columns((StatesMeta.entity_id,), _encoder) # type: ignore[arg-type]
def events_entity_filter(self) -> ColumnElement:
"""Generate the entity filter query."""
_encoder = json.dumps
_encoder = json_dumps
return or_(
# sqlalchemy's SQLite json implementation always
# wraps everything with JSON_QUOTE so it resolves to 'null'

View file

@ -1,13 +1,23 @@
"""Provide pre-made queries on top of the recorder component."""
from __future__ import annotations
from collections.abc import MutableMapping
from datetime import datetime
from typing import Any
from sqlalchemy.orm.session import Session
from homeassistant.core import HomeAssistant, State
from ... import recorder
from ..filters import Filters
from .const import NEED_ATTRIBUTE_DOMAINS, SIGNIFICANT_DOMAINS
from .legacy import (
get_full_significant_states_with_session,
get_last_state_changes,
get_significant_states,
get_significant_states_with_session,
state_changes_during_period,
from .modern import (
get_full_significant_states_with_session as _modern_get_full_significant_states_with_session,
get_last_state_changes as _modern_get_last_state_changes,
get_significant_states as _modern_get_significant_states,
get_significant_states_with_session as _modern_get_significant_states_with_session,
state_changes_during_period as _modern_state_changes_during_period,
)
# These are the APIs of this package
@ -20,3 +30,154 @@ __all__ = [
"get_significant_states_with_session",
"state_changes_during_period",
]
def get_full_significant_states_with_session(
hass: HomeAssistant,
session: Session,
start_time: datetime,
end_time: datetime | None = None,
entity_ids: list[str] | None = None,
filters: Filters | None = None,
include_start_time_state: bool = True,
significant_changes_only: bool = True,
no_attributes: bool = False,
) -> MutableMapping[str, list[State]]:
"""Return a dict of significant states during a time period."""
if not recorder.get_instance(hass).states_meta_manager.active:
from .legacy import ( # pylint: disable=import-outside-toplevel
get_full_significant_states_with_session as _legacy_get_full_significant_states_with_session,
)
_target = _legacy_get_full_significant_states_with_session
else:
_target = _modern_get_full_significant_states_with_session
return _target(
hass,
session,
start_time,
end_time,
entity_ids,
filters,
include_start_time_state,
significant_changes_only,
no_attributes,
)
def get_last_state_changes(
hass: HomeAssistant, number_of_states: int, entity_id: str
) -> MutableMapping[str, list[State]]:
"""Return the last number_of_states."""
if not recorder.get_instance(hass).states_meta_manager.active:
from .legacy import ( # pylint: disable=import-outside-toplevel
get_last_state_changes as _legacy_get_last_state_changes,
)
_target = _legacy_get_last_state_changes
else:
_target = _modern_get_last_state_changes
return _target(hass, number_of_states, entity_id)
def get_significant_states(
hass: HomeAssistant,
start_time: datetime,
end_time: datetime | None = None,
entity_ids: list[str] | None = None,
filters: Filters | None = None,
include_start_time_state: bool = True,
significant_changes_only: bool = True,
minimal_response: bool = False,
no_attributes: bool = False,
compressed_state_format: bool = False,
) -> MutableMapping[str, list[State | dict[str, Any]]]:
"""Return a dict of significant states during a time period."""
if not recorder.get_instance(hass).states_meta_manager.active:
from .legacy import ( # pylint: disable=import-outside-toplevel
get_significant_states as _legacy_get_significant_states,
)
_target = _legacy_get_significant_states
else:
_target = _modern_get_significant_states
return _target(
hass,
start_time,
end_time,
entity_ids,
filters,
include_start_time_state,
significant_changes_only,
minimal_response,
no_attributes,
compressed_state_format,
)
def get_significant_states_with_session(
hass: HomeAssistant,
session: Session,
start_time: datetime,
end_time: datetime | None = None,
entity_ids: list[str] | None = None,
filters: Filters | None = None,
include_start_time_state: bool = True,
significant_changes_only: bool = True,
minimal_response: bool = False,
no_attributes: bool = False,
compressed_state_format: bool = False,
) -> MutableMapping[str, list[State | dict[str, Any]]]:
"""Return a dict of significant states during a time period."""
if not recorder.get_instance(hass).states_meta_manager.active:
from .legacy import ( # pylint: disable=import-outside-toplevel
get_significant_states_with_session as _legacy_get_significant_states_with_session,
)
_target = _legacy_get_significant_states_with_session
else:
_target = _modern_get_significant_states_with_session
return _target(
hass,
session,
start_time,
end_time,
entity_ids,
filters,
include_start_time_state,
significant_changes_only,
minimal_response,
no_attributes,
compressed_state_format,
)
def state_changes_during_period(
hass: HomeAssistant,
start_time: datetime,
end_time: datetime | None = None,
entity_id: str | None = None,
no_attributes: bool = False,
descending: bool = False,
limit: int | None = None,
include_start_time_state: bool = True,
) -> MutableMapping[str, list[State]]:
"""Return a list of states that changed during a time period."""
if not recorder.get_instance(hass).states_meta_manager.active:
from .legacy import ( # pylint: disable=import-outside-toplevel
state_changes_during_period as _legacy_state_changes_during_period,
)
_target = _legacy_state_changes_during_period
else:
_target = _modern_state_changes_during_period
return _target(
hass,
start_time,
end_time,
entity_id,
no_attributes,
descending,
limit,
include_start_time_state,
)

View file

@ -0,0 +1,783 @@
"""Provide pre-made queries on top of the recorder component."""
from __future__ import annotations
from collections import defaultdict
from collections.abc import Callable, Iterable, Iterator, MutableMapping
from datetime import datetime
from itertools import groupby
import logging
from operator import itemgetter
from typing import Any, cast
from sqlalchemy import Column, and_, func, lambda_stmt, or_, select
from sqlalchemy.engine.row import Row
from sqlalchemy.orm.properties import MappedColumn
from sqlalchemy.orm.query import Query
from sqlalchemy.orm.session import Session
from sqlalchemy.sql.expression import literal
from sqlalchemy.sql.lambdas import StatementLambdaElement
from homeassistant.const import COMPRESSED_STATE_LAST_UPDATED, COMPRESSED_STATE_STATE
from homeassistant.core import HomeAssistant, State, split_entity_id
import homeassistant.util.dt as dt_util
from ... import recorder
from ..db_schema import RecorderRuns, StateAttributes, States, StatesMeta
from ..filters import Filters
from ..models import (
LazyState,
process_timestamp,
process_timestamp_to_utc_isoformat,
row_to_compressed_state,
)
from ..util import execute_stmt_lambda_element, session_scope
from .const import (
IGNORE_DOMAINS_ENTITY_ID_LIKE,
LAST_CHANGED_KEY,
NEED_ATTRIBUTE_DOMAINS,
SIGNIFICANT_DOMAINS,
SIGNIFICANT_DOMAINS_ENTITY_ID_LIKE,
STATE_KEY,
)
_LOGGER = logging.getLogger(__name__)
_BASE_STATES = (
States.metadata_id,
States.state,
States.last_changed_ts,
States.last_updated_ts,
)
_BASE_STATES_NO_LAST_CHANGED = ( # type: ignore[var-annotated]
States.metadata_id,
States.state,
literal(value=None).label("last_changed_ts"),
States.last_updated_ts,
)
_QUERY_STATE_NO_ATTR = (*_BASE_STATES,)
_QUERY_STATE_NO_ATTR_NO_LAST_CHANGED = (*_BASE_STATES_NO_LAST_CHANGED,)
_QUERY_STATES = (
*_BASE_STATES,
# Remove States.attributes once all attributes are in StateAttributes.shared_attrs
States.attributes,
StateAttributes.shared_attrs,
)
_QUERY_STATES_NO_LAST_CHANGED = (
*_BASE_STATES_NO_LAST_CHANGED,
# Remove States.attributes once all attributes are in StateAttributes.shared_attrs
States.attributes,
StateAttributes.shared_attrs,
)
_FIELD_MAP = {
cast(MappedColumn, field).name: idx
for idx, field in enumerate(_QUERY_STATE_NO_ATTR)
}
def _lambda_stmt_and_join_attributes(
no_attributes: bool, include_last_changed: bool = True
) -> tuple[StatementLambdaElement, bool]:
"""Return the lambda_stmt and if StateAttributes should be joined.
Because these are lambda_stmt the values inside the lambdas need
to be explicitly written out to avoid caching the wrong values.
"""
# If no_attributes was requested we do the query
# without the attributes fields and do not join the
# state_attributes table
if no_attributes:
if include_last_changed:
return (
lambda_stmt(lambda: select(*_QUERY_STATE_NO_ATTR)),
False,
)
return (
lambda_stmt(lambda: select(*_QUERY_STATE_NO_ATTR_NO_LAST_CHANGED)),
False,
)
if include_last_changed:
return lambda_stmt(lambda: select(*_QUERY_STATES)), True
return lambda_stmt(lambda: select(*_QUERY_STATES_NO_LAST_CHANGED)), True
def get_significant_states(
hass: HomeAssistant,
start_time: datetime,
end_time: datetime | None = None,
entity_ids: list[str] | None = None,
filters: Filters | None = None,
include_start_time_state: bool = True,
significant_changes_only: bool = True,
minimal_response: bool = False,
no_attributes: bool = False,
compressed_state_format: bool = False,
) -> MutableMapping[str, list[State | dict[str, Any]]]:
"""Wrap get_significant_states_with_session with an sql session."""
with session_scope(hass=hass) as session:
return get_significant_states_with_session(
hass,
session,
start_time,
end_time,
entity_ids,
filters,
include_start_time_state,
significant_changes_only,
minimal_response,
no_attributes,
compressed_state_format,
)
def _ignore_domains_filter(query: Query) -> Query:
"""Add a filter to ignore domains we do not fetch history for."""
return query.filter(
and_(
*[
~StatesMeta.entity_id.like(entity_domain)
for entity_domain in IGNORE_DOMAINS_ENTITY_ID_LIKE
]
)
)
def _significant_states_stmt(
start_time: datetime,
end_time: datetime | None,
entity_ids: list[str] | None,
metadata_ids: list[int] | None,
filters: Filters | None,
significant_changes_only: bool,
no_attributes: bool,
) -> StatementLambdaElement:
"""Query the database for significant state changes."""
stmt, join_attributes = _lambda_stmt_and_join_attributes(
no_attributes, include_last_changed=not significant_changes_only
)
join_states_meta = False
if (
entity_ids
and len(entity_ids) == 1
and significant_changes_only
and split_entity_id(entity_ids[0])[0] not in SIGNIFICANT_DOMAINS
):
stmt += lambda q: q.filter(
(States.last_changed_ts == States.last_updated_ts)
| States.last_changed_ts.is_(None)
)
elif significant_changes_only:
stmt += lambda q: q.filter(
or_(
*[
StatesMeta.entity_id.like(entity_domain)
for entity_domain in SIGNIFICANT_DOMAINS_ENTITY_ID_LIKE
],
(
(States.last_changed_ts == States.last_updated_ts)
| States.last_changed_ts.is_(None)
),
)
)
join_states_meta = True
if metadata_ids:
stmt += lambda q: q.filter(
# https://github.com/python/mypy/issues/2608
States.metadata_id.in_(metadata_ids) # type:ignore[arg-type]
)
else:
stmt += _ignore_domains_filter
if filters and filters.has_config:
entity_filter = filters.states_metadata_entity_filter()
stmt = stmt.add_criteria(
lambda q: q.filter(entity_filter), track_on=[filters]
)
join_states_meta = True
start_time_ts = start_time.timestamp()
stmt += lambda q: q.filter(States.last_updated_ts > start_time_ts)
if end_time:
end_time_ts = end_time.timestamp()
stmt += lambda q: q.filter(States.last_updated_ts < end_time_ts)
if join_states_meta:
stmt += lambda q: q.outerjoin(
StatesMeta, States.metadata_id == StatesMeta.metadata_id
)
if join_attributes:
stmt += lambda q: q.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
stmt += lambda q: q.order_by(States.metadata_id, States.last_updated_ts)
return stmt
def get_significant_states_with_session(
hass: HomeAssistant,
session: Session,
start_time: datetime,
end_time: datetime | None = None,
entity_ids: list[str] | None = None,
filters: Filters | None = None,
include_start_time_state: bool = True,
significant_changes_only: bool = True,
minimal_response: bool = False,
no_attributes: bool = False,
compressed_state_format: bool = False,
) -> MutableMapping[str, list[State | dict[str, Any]]]:
"""Return states changes during UTC period start_time - end_time.
entity_ids is an optional iterable of entities to include in the results.
filters is an optional SQLAlchemy filter which will be applied to the database
queries unless entity_ids is given, in which case its ignored.
Significant states are all states where there is a state change,
as well as all states from certain domains (for instance
thermostat so that we get current temperature in our graphs).
"""
metadata_ids: list[int] | None = None
entity_id_to_metadata_id: dict[str, int | None] | None = None
if entity_ids:
instance = recorder.get_instance(hass)
entity_id_to_metadata_id = instance.states_meta_manager.get_many(
entity_ids, session
)
metadata_ids = [
metadata_id
for metadata_id in entity_id_to_metadata_id.values()
if metadata_id is not None
]
stmt = _significant_states_stmt(
start_time,
end_time,
entity_ids,
metadata_ids,
filters,
significant_changes_only,
no_attributes,
)
states = execute_stmt_lambda_element(
session, stmt, None if entity_ids else start_time, end_time
)
return _sorted_states_to_dict(
hass,
session,
states,
start_time,
entity_ids,
entity_id_to_metadata_id,
filters,
include_start_time_state,
minimal_response,
no_attributes,
compressed_state_format,
)
def get_full_significant_states_with_session(
hass: HomeAssistant,
session: Session,
start_time: datetime,
end_time: datetime | None = None,
entity_ids: list[str] | None = None,
filters: Filters | None = None,
include_start_time_state: bool = True,
significant_changes_only: bool = True,
no_attributes: bool = False,
) -> MutableMapping[str, list[State]]:
"""Variant of get_significant_states_with_session.
Difference with get_significant_states_with_session is that it does not
return minimal responses.
"""
return cast(
MutableMapping[str, list[State]],
get_significant_states_with_session(
hass=hass,
session=session,
start_time=start_time,
end_time=end_time,
entity_ids=entity_ids,
filters=filters,
include_start_time_state=include_start_time_state,
significant_changes_only=significant_changes_only,
minimal_response=False,
no_attributes=no_attributes,
),
)
def _state_changed_during_period_stmt(
start_time: datetime,
end_time: datetime | None,
metadata_id: int | None,
no_attributes: bool,
descending: bool,
limit: int | None,
) -> StatementLambdaElement:
stmt, join_attributes = _lambda_stmt_and_join_attributes(
no_attributes, include_last_changed=False
)
start_time_ts = start_time.timestamp()
stmt += lambda q: q.filter(
(
(States.last_changed_ts == States.last_updated_ts)
| States.last_changed_ts.is_(None)
)
& (States.last_updated_ts > start_time_ts)
)
if end_time:
end_time_ts = end_time.timestamp()
stmt += lambda q: q.filter(States.last_updated_ts < end_time_ts)
if metadata_id:
stmt += lambda q: q.filter(States.metadata_id == metadata_id)
if join_attributes:
stmt += lambda q: q.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
if descending:
stmt += lambda q: q.order_by(States.metadata_id, States.last_updated_ts.desc())
else:
stmt += lambda q: q.order_by(States.metadata_id, States.last_updated_ts)
if limit:
stmt += lambda q: q.limit(limit)
return stmt
def state_changes_during_period(
hass: HomeAssistant,
start_time: datetime,
end_time: datetime | None = None,
entity_id: str | None = None,
no_attributes: bool = False,
descending: bool = False,
limit: int | None = None,
include_start_time_state: bool = True,
) -> MutableMapping[str, list[State]]:
"""Return states changes during UTC period start_time - end_time."""
entity_id = entity_id.lower() if entity_id is not None else None
entity_ids = [entity_id] if entity_id is not None else None
with session_scope(hass=hass) as session:
metadata_id: int | None = None
entity_id_to_metadata_id = None
if entity_id:
instance = recorder.get_instance(hass)
metadata_id = instance.states_meta_manager.get(entity_id, session)
entity_id_to_metadata_id = {entity_id: metadata_id}
stmt = _state_changed_during_period_stmt(
start_time,
end_time,
metadata_id,
no_attributes,
descending,
limit,
)
states = execute_stmt_lambda_element(
session, stmt, None if entity_id else start_time, end_time
)
return cast(
MutableMapping[str, list[State]],
_sorted_states_to_dict(
hass,
session,
states,
start_time,
entity_ids,
entity_id_to_metadata_id,
include_start_time_state=include_start_time_state,
),
)
def _get_last_state_changes_stmt(
number_of_states: int, metadata_id: int
) -> StatementLambdaElement:
stmt, join_attributes = _lambda_stmt_and_join_attributes(
False, include_last_changed=False
)
stmt += lambda q: q.where(
States.state_id
== (
select(States.state_id)
.filter(States.metadata_id == metadata_id)
.order_by(States.last_updated_ts.desc())
.limit(number_of_states)
.subquery()
).c.state_id
)
if join_attributes:
stmt += lambda q: q.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
stmt += lambda q: q.order_by(States.state_id.desc())
return stmt
def get_last_state_changes(
hass: HomeAssistant, number_of_states: int, entity_id: str
) -> MutableMapping[str, list[State]]:
"""Return the last number_of_states."""
entity_id_lower = entity_id.lower()
entity_ids = [entity_id_lower]
with session_scope(hass=hass) as session:
instance = recorder.get_instance(hass)
if not (metadata_id := instance.states_meta_manager.get(entity_id, session)):
return {}
entity_id_to_metadata_id: dict[str, int | None] = {entity_id_lower: metadata_id}
stmt = _get_last_state_changes_stmt(number_of_states, metadata_id)
states = list(execute_stmt_lambda_element(session, stmt))
return cast(
MutableMapping[str, list[State]],
_sorted_states_to_dict(
hass,
session,
reversed(states),
dt_util.utcnow(),
entity_ids,
entity_id_to_metadata_id,
include_start_time_state=False,
),
)
def _get_states_for_entities_stmt(
run_start: datetime,
utc_point_in_time: datetime,
metadata_ids: list[int],
no_attributes: bool,
) -> StatementLambdaElement:
"""Baked query to get states for specific entities."""
stmt, join_attributes = _lambda_stmt_and_join_attributes(
no_attributes, include_last_changed=True
)
# We got an include-list of entities, accelerate the query by filtering already
# in the inner query.
run_start_ts = process_timestamp(run_start).timestamp()
utc_point_in_time_ts = dt_util.utc_to_timestamp(utc_point_in_time)
stmt += lambda q: q.join(
(
most_recent_states_for_entities_by_date := (
select(
States.metadata_id.label("max_metadata_id"),
# https://github.com/sqlalchemy/sqlalchemy/issues/9189
# pylint: disable-next=not-callable
func.max(States.last_updated_ts).label("max_last_updated"),
)
.filter(
(States.last_updated_ts >= run_start_ts)
& (States.last_updated_ts < utc_point_in_time_ts)
)
.filter(States.metadata_id.in_(metadata_ids))
.group_by(States.metadata_id)
.subquery()
)
),
and_(
States.metadata_id
== most_recent_states_for_entities_by_date.c.max_metadata_id,
States.last_updated_ts
== most_recent_states_for_entities_by_date.c.max_last_updated,
),
)
if join_attributes:
stmt += lambda q: q.outerjoin(
StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
)
return stmt
def _get_states_for_all_stmt(
run_start: datetime,
utc_point_in_time: datetime,
filters: Filters | None,
no_attributes: bool,
) -> StatementLambdaElement:
"""Baked query to get states for all entities."""
stmt, join_attributes = _lambda_stmt_and_join_attributes(
no_attributes, include_last_changed=True
)
# We did not get an include-list of entities, query all states in the inner
# query, then filter out unwanted domains as well as applying the custom filter.
# This filtering can't be done in the inner query because the domain column is
# not indexed and we can't control what's in the custom filter.
run_start_ts = process_timestamp(run_start).timestamp()
utc_point_in_time_ts = dt_util.utc_to_timestamp(utc_point_in_time)
stmt += lambda q: q.join(
(
most_recent_states_by_date := (
select(
States.metadata_id.label("max_metadata_id"),
# https://github.com/sqlalchemy/sqlalchemy/issues/9189
# pylint: disable-next=not-callable
func.max(States.last_updated_ts).label("max_last_updated"),
)
.filter(
(States.last_updated_ts >= run_start_ts)
& (States.last_updated_ts < utc_point_in_time_ts)
)
.group_by(States.metadata_id)
.subquery()
)
),
and_(
States.metadata_id == most_recent_states_by_date.c.max_metadata_id,
States.last_updated_ts == most_recent_states_by_date.c.max_last_updated,
),
)
stmt += _ignore_domains_filter
if filters and filters.has_config:
entity_filter = filters.states_metadata_entity_filter()
stmt = stmt.add_criteria(lambda q: q.filter(entity_filter), track_on=[filters])
if join_attributes:
stmt += lambda q: q.outerjoin(
StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
)
stmt += lambda q: q.outerjoin(
StatesMeta, States.metadata_id == StatesMeta.metadata_id
)
return stmt
def _get_rows_with_session(
hass: HomeAssistant,
session: Session,
utc_point_in_time: datetime,
entity_ids: list[str] | None = None,
entity_id_to_metadata_id: dict[str, int | None] | None = None,
run: RecorderRuns | None = None,
filters: Filters | None = None,
no_attributes: bool = False,
) -> Iterable[Row]:
"""Return the states at a specific point in time."""
if entity_ids and len(entity_ids) == 1:
if not entity_id_to_metadata_id or not (
metadata_id := entity_id_to_metadata_id.get(entity_ids[0])
):
return []
return execute_stmt_lambda_element(
session,
_get_single_entity_states_stmt(
utc_point_in_time, metadata_id, no_attributes
),
)
if run is None:
run = recorder.get_instance(hass).run_history.get(utc_point_in_time)
if run is None or process_timestamp(run.start) > utc_point_in_time:
# History did not run before utc_point_in_time
return []
# We have more than one entity to look at so we need to do a query on states
# since the last recorder run started.
if entity_ids:
if not entity_id_to_metadata_id:
return []
metadata_ids = [
metadata_id
for metadata_id in entity_id_to_metadata_id.values()
if metadata_id is not None
]
if not metadata_ids:
return []
stmt = _get_states_for_entities_stmt(
run.start, utc_point_in_time, metadata_ids, no_attributes
)
else:
stmt = _get_states_for_all_stmt(
run.start, utc_point_in_time, filters, no_attributes
)
return execute_stmt_lambda_element(session, stmt)
def _get_single_entity_states_stmt(
utc_point_in_time: datetime,
metadata_id: int,
no_attributes: bool = False,
) -> StatementLambdaElement:
# Use an entirely different (and extremely fast) query if we only
# have a single entity id
stmt, join_attributes = _lambda_stmt_and_join_attributes(
no_attributes, include_last_changed=True
)
utc_point_in_time_ts = dt_util.utc_to_timestamp(utc_point_in_time)
stmt += (
lambda q: q.filter(
States.last_updated_ts < utc_point_in_time_ts,
States.metadata_id == metadata_id,
)
.order_by(States.last_updated_ts.desc())
.limit(1)
)
if join_attributes:
stmt += lambda q: q.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
return stmt
def _sorted_states_to_dict(
hass: HomeAssistant,
session: Session,
states: Iterable[Row],
start_time: datetime,
entity_ids: list[str] | None,
entity_id_to_metadata_id: dict[str, int | None] | None,
filters: Filters | None = None,
include_start_time_state: bool = True,
minimal_response: bool = False,
no_attributes: bool = False,
compressed_state_format: bool = False,
) -> MutableMapping[str, list[State | dict[str, Any]]]:
"""Convert SQL results into JSON friendly data structure.
This takes our state list and turns it into a JSON friendly data
structure {'entity_id': [list of states], 'entity_id2': [list of states]}
States must be sorted by entity_id and last_updated
We also need to go back and create a synthetic zero data point for
each list of states, otherwise our graphs won't start on the Y
axis correctly.
"""
field_map = _FIELD_MAP
state_class: Callable[
[Row, dict[str, dict[str, Any]], datetime | None], State | dict[str, Any]
]
if compressed_state_format:
state_class = row_to_compressed_state
attr_time = COMPRESSED_STATE_LAST_UPDATED
attr_state = COMPRESSED_STATE_STATE
else:
state_class = LazyState
attr_time = LAST_CHANGED_KEY
attr_state = STATE_KEY
result: dict[str, list[State | dict[str, Any]]] = defaultdict(list)
metadata_id_to_entity_id: dict[int, str] = {}
metadata_id_idx = field_map["metadata_id"]
# Set all entity IDs to empty lists in result set to maintain the order
if entity_ids is not None:
for ent_id in entity_ids:
result[ent_id] = []
if entity_id_to_metadata_id:
metadata_id_to_entity_id = {
v: k for k, v in entity_id_to_metadata_id.items() if v is not None
}
else:
metadata_id_to_entity_id = recorder.get_instance(
hass
).states_meta_manager.get_metadata_id_to_entity_id(session)
# Get the states at the start time
initial_states: dict[int, Row] = {}
if include_start_time_state:
initial_states = {
row[metadata_id_idx]: row
for row in _get_rows_with_session(
hass,
session,
start_time,
entity_ids,
entity_id_to_metadata_id,
filters=filters,
no_attributes=no_attributes,
)
}
if entity_ids and len(entity_ids) == 1:
if not entity_id_to_metadata_id or not (
metadata_id := entity_id_to_metadata_id.get(entity_ids[0])
):
return {}
states_iter: Iterable[tuple[int, Iterator[Row]]] = (
(metadata_id, iter(states)),
)
else:
key_func = itemgetter(metadata_id_idx)
states_iter = groupby(states, key_func)
# Append all changes to it
for metadata_id, group in states_iter:
attr_cache: dict[str, dict[str, Any]] = {}
prev_state: Column | str
if not (entity_id := metadata_id_to_entity_id.get(metadata_id)):
continue
ent_results = result[entity_id]
if row := initial_states.pop(metadata_id, None):
prev_state = row.state
ent_results.append(state_class(row, attr_cache, start_time, entity_id=entity_id)) # type: ignore[call-arg]
if (
not minimal_response
or split_entity_id(entity_id)[0] in NEED_ATTRIBUTE_DOMAINS
):
ent_results.extend(
state_class(db_state, attr_cache, None, entity_id=entity_id) # type: ignore[call-arg]
for db_state in group
)
continue
# With minimal response we only provide a native
# State for the first and last response. All the states
# in-between only provide the "state" and the
# "last_changed".
if not ent_results:
if (first_state := next(group, None)) is None:
continue
prev_state = first_state.state
ent_results.append(
state_class(first_state, attr_cache, None, entity_id=entity_id) # type: ignore[call-arg]
)
state_idx = field_map["state"]
#
# minimal_response only makes sense with last_updated == last_updated
#
# We use last_updated for for last_changed since its the same
#
# With minimal response we do not care about attribute
# changes so we can filter out duplicate states
last_updated_ts_idx = field_map["last_updated_ts"]
if compressed_state_format:
for row in group:
if (state := row[state_idx]) != prev_state:
ent_results.append(
{
attr_state: state,
attr_time: row[last_updated_ts_idx],
}
)
prev_state = state
for row in group:
if (state := row[state_idx]) != prev_state:
ent_results.append(
{
attr_state: state,
attr_time: process_timestamp_to_utc_isoformat(
dt_util.utc_from_timestamp(row[last_updated_ts_idx])
),
}
)
prev_state = state
# If there are no states beyond the initial state,
# the state a was never popped from initial_states
for metadata_id, row in initial_states.items():
if entity_id := metadata_id_to_entity_id.get(metadata_id):
result[entity_id].append(
state_class(row, {}, start_time, entity_id=entity_id) # type: ignore[call-arg]
)
# Filter out the empty lists if some states had 0 results.
return {key: val for key, val in result.items() if val}

View file

@ -38,6 +38,7 @@ from .db_schema import (
EventTypes,
SchemaChanges,
States,
StatesMeta,
Statistics,
StatisticsMeta,
StatisticsRuns,
@ -45,6 +46,8 @@ from .db_schema import (
)
from .models import process_timestamp
from .queries import (
batch_cleanup_entity_ids,
find_entity_ids_to_migrate,
find_event_type_to_migrate,
find_events_context_ids_to_migrate,
find_states_context_ids_to_migrate,
@ -68,6 +71,8 @@ if TYPE_CHECKING:
LIVE_MIGRATION_MIN_SCHEMA_VERSION = 0
_EMPTY_CONTEXT_ID = b"\x00" * 16
_EMPTY_ENTITY_ID = "missing.entity_id"
_EMPTY_EVENT_TYPE = "missing_event_type"
_LOGGER = logging.getLogger(__name__)
@ -985,6 +990,10 @@ def _apply_update( # noqa: C901
_create_index(session_maker, "events", "ix_events_event_type_id")
_drop_index(session_maker, "events", "ix_events_event_type_time_fired_ts")
_create_index(session_maker, "events", "ix_events_event_type_id_time_fired_ts")
elif new_version == 38:
_add_columns(session_maker, "states", [f"metadata_id {big_int}"])
_create_index(session_maker, "states", "ix_states_metadata_id")
_create_index(session_maker, "states", "ix_states_metadata_id_last_updated_ts")
else:
raise ValueError(f"No schema migration defined for version {new_version}")
@ -1305,7 +1314,10 @@ def migrate_event_type_ids(instance: Recorder) -> bool:
event_types = {event_type for _, event_type in events}
event_type_to_id = event_type_manager.get_many(event_types, session)
if missing_event_types := {
event_type
# We should never see see None for the event_Type in the events table
# but we need to be defensive so we don't fail the migration
# because of a bad event
_EMPTY_EVENT_TYPE if event_type is None else event_type
for event_type, event_id in event_type_to_id.items()
if event_id is None
}:
@ -1318,7 +1330,9 @@ def migrate_event_type_ids(instance: Recorder) -> bool:
for db_event_type in missing_db_event_types:
# We cannot add the assigned ids to the event_type_manager
# because the commit could get rolled back
assert db_event_type.event_type is not None
assert (
db_event_type.event_type is not None
), "event_type should never be None"
event_type_to_id[
db_event_type.event_type
] = db_event_type.event_type_id
@ -1346,6 +1360,89 @@ def migrate_event_type_ids(instance: Recorder) -> bool:
return is_done
def migrate_entity_ids(instance: Recorder) -> bool:
"""Migrate entity_ids to states_meta.
We do this in two steps because we need the history queries to work
while we are migrating.
1. Link the states to the states_meta table
2. Remove the entity_id column from the states table (in post_migrate_entity_ids)
"""
_LOGGER.debug("Migrating entity_ids")
states_meta_manager = instance.states_meta_manager
with session_scope(session=instance.get_session()) as session:
if states := session.execute(find_entity_ids_to_migrate()).all():
entity_ids = {entity_id for _, entity_id in states}
entity_id_to_metadata_id = states_meta_manager.get_many(entity_ids, session)
if missing_entity_ids := {
# We should never see _EMPTY_ENTITY_ID in the states table
# but we need to be defensive so we don't fail the migration
# because of a bad state
_EMPTY_ENTITY_ID if entity_id is None else entity_id
for entity_id, metadata_id in entity_id_to_metadata_id.items()
if metadata_id is None
}:
missing_states_metadata = [
StatesMeta(entity_id=entity_id) for entity_id in missing_entity_ids
]
session.add_all(missing_states_metadata)
session.flush() # Assign ids
for db_states_metadata in missing_states_metadata:
# We cannot add the assigned ids to the event_type_manager
# because the commit could get rolled back
assert (
db_states_metadata.entity_id is not None
), "entity_id should never be None"
entity_id_to_metadata_id[
db_states_metadata.entity_id
] = db_states_metadata.metadata_id
session.execute(
update(States),
[
{
"state_id": state_id,
# We cannot set "entity_id": None yet since
# the history queries still need to work while the
# migration is in progress and we will do this in
# post_migrate_entity_ids
"metadata_id": entity_id_to_metadata_id[entity_id],
}
for state_id, entity_id in states
],
)
# If there is more work to do return False
# so that we can be called again
is_done = not states
_LOGGER.debug("Migrating entity_ids done=%s", is_done)
return is_done
def post_migrate_entity_ids(instance: Recorder) -> bool:
"""Remove old entity_id strings from states.
We cannot do this in migrate_entity_ids since the history queries
still need to work while the migration is in progress.
"""
session_maker = instance.get_session
_LOGGER.debug("Cleanup legacy entity_ids")
with session_scope(session=session_maker()) as session:
cursor_result = session.connection().execute(batch_cleanup_entity_ids())
is_done = not cursor_result or cursor_result.rowcount == 0
# If there is more work to do return False
# so that we can be called again
if is_done:
# Drop the old indexes since they are no longer needed
_drop_index(session_maker, "states", "ix_states_entity_id_last_updated_ts")
_LOGGER.debug("Cleanup legacy entity_ids done=%s", is_done)
return is_done
def _initialize_database(session: Session) -> bool:
"""Initialize a new database.

View file

@ -41,10 +41,11 @@ class LazyState(State):
row: Row,
attr_cache: dict[str, dict[str, Any]],
start_time: datetime | None,
entity_id: str | None = None,
) -> None:
"""Init the lazy state."""
self._row = row
self.entity_id: str = self._row.entity_id
self.entity_id = entity_id or self._row.entity_id
self.state = self._row.state or ""
self._attributes: dict[str, Any] | None = None
self._last_updated_ts: float | None = self._row.last_updated_ts or (
@ -127,6 +128,7 @@ def row_to_compressed_state(
row: Row,
attr_cache: dict[str, dict[str, Any]],
start_time: datetime | None,
entity_id: str | None = None,
) -> dict[str, Any]:
"""Convert a database row to a compressed state schema 31 and later."""
comp_state = {

View file

@ -17,11 +17,13 @@ def decode_attributes_from_row(
row: Row, attr_cache: dict[str, dict[str, Any]]
) -> dict[str, Any]:
"""Decode attributes from a database row."""
source: str = row.shared_attrs or row.attributes
if (attributes := attr_cache.get(source)) is not None:
return attributes
source: str | None = getattr(row, "shared_attrs", None) or getattr(
row, "attributes", None
)
if not source or source == EMPTY_JSON_OBJECT:
return {}
if (attributes := attr_cache.get(source)) is not None:
return attributes
try:
attr_cache[source] = attributes = json_loads_object(source)
except ValueError:

View file

@ -15,7 +15,7 @@ from homeassistant.const import EVENT_STATE_CHANGED
import homeassistant.util.dt as dt_util
from .const import SQLITE_MAX_BIND_VARS
from .db_schema import Events, StateAttributes, States
from .db_schema import Events, StateAttributes, States, StatesMeta
from .models import DatabaseEngine
from .queries import (
attributes_ids_exist_in_states,
@ -27,10 +27,12 @@ from .queries import (
delete_event_types_rows,
delete_recorder_runs_rows,
delete_states_attributes_rows,
delete_states_meta_rows,
delete_states_rows,
delete_statistics_runs_rows,
delete_statistics_short_term_rows,
disconnect_states_rows,
find_entity_ids_to_purge,
find_event_types_to_purge,
find_events_to_purge,
find_latest_statistics_runs_run_id,
@ -116,6 +118,9 @@ def purge_old_data(
if instance.event_type_manager.active:
_purge_old_event_types(instance, session)
if instance.states_meta_manager.active:
_purge_old_entity_ids(instance, session)
_purge_old_recorder_runs(instance, session, purge_before)
if repack:
repack_database(instance)
@ -590,6 +595,25 @@ def _purge_old_event_types(instance: Recorder, session: Session) -> None:
instance.event_type_manager.evict_purged(purge_event_types)
def _purge_old_entity_ids(instance: Recorder, session: Session) -> None:
"""Purge all old entity_ids."""
# entity_ids are small, no need to batch run it
purge_entity_ids = set()
states_metadata_ids = set()
for metadata_id, entity_id in session.execute(find_entity_ids_to_purge()):
purge_entity_ids.add(entity_id)
states_metadata_ids.add(metadata_id)
if not states_metadata_ids:
return
deleted_rows = session.execute(delete_states_meta_rows(states_metadata_ids))
_LOGGER.debug("Deleted %s states meta", deleted_rows)
# Evict any entries in the event_type cache referring to a purged state
instance.states_meta_manager.evict_purged(purge_entity_ids)
def _purge_filtered_data(instance: Recorder, session: Session) -> bool:
"""Remove filtered states and events that shouldn't be in the database."""
_LOGGER.debug("Cleanup filtered data")
@ -597,13 +621,18 @@ def _purge_filtered_data(instance: Recorder, session: Session) -> bool:
assert database_engine is not None
# Check if excluded entity_ids are in database
excluded_entity_ids: list[str] = [
entity_id
for (entity_id,) in session.query(distinct(States.entity_id)).all()
if not instance.entity_filter(entity_id)
entity_filter = instance.entity_filter
excluded_metadata_ids: list[str] = [
metadata_id
for (metadata_id, entity_id) in session.query(
StatesMeta.metadata_id, StatesMeta.entity_id
).all()
if not entity_filter(entity_id)
]
if len(excluded_entity_ids) > 0:
_purge_filtered_states(instance, session, excluded_entity_ids, database_engine)
if len(excluded_metadata_ids) > 0:
_purge_filtered_states(
instance, session, excluded_metadata_ids, database_engine
)
return False
# Check if excluded event_types are in database
@ -622,7 +651,7 @@ def _purge_filtered_data(instance: Recorder, session: Session) -> bool:
def _purge_filtered_states(
instance: Recorder,
session: Session,
excluded_entity_ids: list[str],
excluded_metadata_ids: list[str],
database_engine: DatabaseEngine,
) -> None:
"""Remove filtered states and linked events."""
@ -632,7 +661,7 @@ def _purge_filtered_states(
state_ids, attributes_ids, event_ids = zip(
*(
session.query(States.state_id, States.attributes_id, States.event_id)
.filter(States.entity_id.in_(excluded_entity_ids))
.filter(States.metadata_id.in_(excluded_metadata_ids))
.limit(SQLITE_MAX_BIND_VARS)
.all()
)
@ -687,17 +716,19 @@ def purge_entity_data(instance: Recorder, entity_filter: Callable[[str], bool])
database_engine = instance.database_engine
assert database_engine is not None
with session_scope(session=instance.get_session()) as session:
selected_entity_ids: list[str] = [
entity_id
for (entity_id,) in session.query(distinct(States.entity_id)).all()
selected_metadata_ids: list[str] = [
metadata_id
for (metadata_id, entity_id) in session.query(
StatesMeta.metadata_id, StatesMeta.entity_id
).all()
if entity_filter(entity_id)
]
_LOGGER.debug("Purging entity data for %s", selected_entity_ids)
if len(selected_entity_ids) > 0:
_LOGGER.debug("Purging entity data for %s", selected_metadata_ids)
if len(selected_metadata_ids) > 0:
# Purge a max of SQLITE_MAX_BIND_VARS, based on the oldest states
# or events record.
_purge_filtered_states(
instance, session, selected_entity_ids, database_engine
instance, session, selected_metadata_ids, database_engine
)
_LOGGER.debug("Purging entity data hasn't fully completed yet")
return False

View file

@ -16,6 +16,7 @@ from .db_schema import (
RecorderRuns,
StateAttributes,
States,
StatesMeta,
StatisticsRuns,
StatisticsShortTerm,
)
@ -59,6 +60,20 @@ def find_event_type_ids(event_types: Iterable[str]) -> StatementLambdaElement:
)
def find_all_states_metadata_ids() -> StatementLambdaElement:
"""Find all metadata_ids and entity_ids."""
return lambda_stmt(lambda: select(StatesMeta.metadata_id, StatesMeta.entity_id))
def find_states_metadata_ids(entity_ids: Iterable[str]) -> StatementLambdaElement:
"""Find metadata_ids by entity_ids."""
return lambda_stmt(
lambda: select(StatesMeta.metadata_id, StatesMeta.entity_id).filter(
StatesMeta.entity_id.in_(entity_ids)
)
)
def find_shared_attributes_id(
data_hash: int, shared_attrs: str
) -> StatementLambdaElement:
@ -716,6 +731,54 @@ def find_event_type_to_migrate() -> StatementLambdaElement:
)
def find_entity_ids_to_migrate() -> StatementLambdaElement:
"""Find entity_id to migrate."""
return lambda_stmt(
lambda: select(
States.state_id,
States.entity_id,
)
.filter(States.metadata_id.is_(None))
.limit(SQLITE_MAX_BIND_VARS)
)
def batch_cleanup_entity_ids() -> StatementLambdaElement:
"""Find entity_id to cleanup."""
# Self join because This version of MariaDB doesn't yet support 'LIMIT & IN/ALL/ANY/SOME subquery'
return lambda_stmt(
lambda: update(States)
.where(
States.state_id.in_(
select(States.state_id).join(
states_with_entity_ids := select(
States.state_id.label("state_id_with_entity_id")
)
.filter(States.entity_id.is_not(None))
.limit(5000)
.subquery(),
States.state_id == states_with_entity_ids.c.state_id_with_entity_id,
)
)
)
.values(entity_id=None)
)
def has_events_context_ids_to_migrate() -> StatementLambdaElement:
"""Check if there are events context ids to migrate."""
return lambda_stmt(
lambda: select(Events.event_id).filter(Events.context_id_bin.is_(None)).limit(1)
)
def has_states_context_ids_to_migrate() -> StatementLambdaElement:
"""Check if there are states context ids to migrate."""
return lambda_stmt(
lambda: select(States.state_id).filter(States.context_id_bin.is_(None)).limit(1)
)
def has_event_type_to_migrate() -> StatementLambdaElement:
"""Check if there are event_types to migrate."""
return lambda_stmt(
@ -723,6 +786,13 @@ def has_event_type_to_migrate() -> StatementLambdaElement:
)
def has_entity_ids_to_migrate() -> StatementLambdaElement:
"""Check if there are entity_id to migrate."""
return lambda_stmt(
lambda: select(States.state_id).filter(States.metadata_id.is_(None)).limit(1)
)
def find_states_context_ids_to_migrate() -> StatementLambdaElement:
"""Find events context_ids to migrate."""
return lambda_stmt(
@ -754,6 +824,23 @@ def find_event_types_to_purge() -> StatementLambdaElement:
)
def find_entity_ids_to_purge() -> StatementLambdaElement:
"""Find entity_ids to purge."""
return lambda_stmt(
lambda: select(StatesMeta.metadata_id, StatesMeta.entity_id).where(
StatesMeta.metadata_id.not_in(
select(StatesMeta.metadata_id).join(
used_states_metadata_id := select(
distinct(States.metadata_id).label("used_states_metadata_id")
).subquery(),
StatesMeta.metadata_id
== used_states_metadata_id.c.used_states_metadata_id,
)
)
)
)
def delete_event_types_rows(event_type_ids: Iterable[int]) -> StatementLambdaElement:
"""Delete EventTypes rows."""
return lambda_stmt(
@ -761,3 +848,12 @@ def delete_event_types_rows(event_type_ids: Iterable[int]) -> StatementLambdaEle
.where(EventTypes.event_type_id.in_(event_type_ids))
.execution_options(synchronize_session=False)
)
def delete_states_meta_rows(metadata_ids: Iterable[int]) -> StatementLambdaElement:
"""Delete StatesMeta rows."""
return lambda_stmt(
lambda: delete(StatesMeta)
.where(StatesMeta.metadata_id.in_(metadata_ids))
.execution_options(synchronize_session=False)
)

View file

@ -0,0 +1,94 @@
"""Support managing StatesMeta."""
from __future__ import annotations
from collections.abc import Iterable
from typing import cast
from lru import LRU # pylint: disable=no-name-in-module
from sqlalchemy.orm.session import Session
from homeassistant.core import Event
from ..db_schema import StatesMeta
from ..queries import find_all_states_metadata_ids, find_states_metadata_ids
CACHE_SIZE = 8192
class StatesMetaManager:
"""Manage the StatesMeta table."""
def __init__(self) -> None:
"""Initialize the states meta manager."""
self._id_map: dict[str, int] = LRU(CACHE_SIZE)
self._pending: dict[str, StatesMeta] = {}
self.active = False
def load(self, events: list[Event], session: Session) -> None:
"""Load the entity_id to metadata_id mapping into memory."""
self.get_many(
(
event.data["new_state"].entity_id
for event in events
if event.data.get("new_state") is not None
),
session,
)
def get(self, entity_id: str, session: Session) -> int | None:
"""Resolve entity_id to the metadata_id."""
return self.get_many((entity_id,), session)[entity_id]
def get_metadata_id_to_entity_id(self, session: Session) -> dict[int, str]:
"""Resolve all entity_ids to metadata_ids."""
with session.no_autoflush:
return dict(tuple(session.execute(find_all_states_metadata_ids()))) # type: ignore[arg-type]
def get_many(
self, entity_ids: Iterable[str], session: Session
) -> dict[str, int | None]:
"""Resolve entity_id to metadata_id."""
results: dict[str, int | None] = {}
missing: list[str] = []
for entity_id in entity_ids:
if (metadata_id := self._id_map.get(entity_id)) is None:
missing.append(entity_id)
results[entity_id] = metadata_id
if not missing:
return results
with session.no_autoflush:
for metadata_id, entity_id in session.execute(
find_states_metadata_ids(missing)
):
results[entity_id] = self._id_map[entity_id] = cast(int, metadata_id)
return results
def get_pending(self, entity_id: str) -> StatesMeta | None:
"""Get pending StatesMeta that have not be assigned ids yet."""
return self._pending.get(entity_id)
def add_pending(self, db_states_meta: StatesMeta) -> None:
"""Add a pending StatesMeta that will be committed at the next interval."""
assert db_states_meta.entity_id is not None
entity_id: str = db_states_meta.entity_id
self._pending[entity_id] = db_states_meta
def post_commit_pending(self) -> None:
"""Call after commit to load the metadata_ids of the new StatesMeta into the LRU."""
for entity_id, db_states_meta in self._pending.items():
self._id_map[entity_id] = db_states_meta.metadata_id
self._pending.clear()
def reset(self) -> None:
"""Reset the states meta manager after the database has been reset or changed."""
self._id_map.clear()
self._pending.clear()
def evict_purged(self, entity_ids: Iterable[str]) -> None:
"""Evict purged event_types from the cache when they are no longer used."""
for entity_id in entity_ids:
self._id_map.pop(entity_id, None)

View file

@ -372,3 +372,39 @@ class EventTypeIDMigrationTask(RecorderTask):
if not instance._migrate_event_type_ids(): # pylint: disable=[protected-access]
# Schedule a new migration task if this one didn't finish
instance.queue_task(EventTypeIDMigrationTask())
@dataclass
class EntityIDMigrationTask(RecorderTask):
"""An object to insert into the recorder queue to migrate entity_ids to StatesMeta."""
commit_before = True
# We have to commit before to make sure there are
# no new pending states_meta about to be added to
# the db since this happens live
def run(self, instance: Recorder) -> None:
"""Run entity_id migration task."""
if not instance._migrate_entity_ids(): # pylint: disable=[protected-access]
# Schedule a new migration task if this one didn't finish
instance.queue_task(EntityIDMigrationTask())
else:
# The migration has finished, now we start the post migration
# to remove the old entity_id data from the states table
# at this point we can also start using the StatesMeta table
# so we set active to True
instance.states_meta_manager.active = True
instance.queue_task(EntityIDPostMigrationTask())
@dataclass
class EntityIDPostMigrationTask(RecorderTask):
"""An object to insert into the recorder queue to cleanup after entity_ids migration."""
def run(self, instance: Recorder) -> None:
"""Run entity_id post migration task."""
if (
not instance._post_migrate_entity_ids() # pylint: disable=[protected-access]
):
# Schedule a new migration task if this one didn't finish
instance.queue_task(EntityIDPostMigrationTask())

File diff suppressed because it is too large Load diff

View file

@ -62,6 +62,7 @@ DB_TIMEZONE = "+00:00"
TABLE_EVENTS = "events"
TABLE_STATES = "states"
TABLE_STATES_META = "states_meta"
TABLE_RECORDER_RUNS = "recorder_runs"
TABLE_SCHEMA_CHANGES = "schema_changes"
TABLE_STATISTICS = "statistics"
@ -73,6 +74,7 @@ TABLE_EVENT_TYPES = "event_types"
ALL_TABLES = [
TABLE_STATES,
TABLE_STATES_META,
TABLE_EVENTS,
TABLE_EVENT_TYPES,
TABLE_RECORDER_RUNS,
@ -266,6 +268,10 @@ class States(Base): # type: ignore
context_parent_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v23, only added for recorder to startup ok
metadata_id = Column(
Integer, ForeignKey("states_meta.metadata_id"), index=True
) # *** Not originally in v23, only added for recorder to startup ok
states_meta_rel = relationship("StatesMeta")
event = relationship("Events", uselist=False)
old_state = relationship("States", remote_side=[state_id])
@ -326,6 +332,27 @@ class States(Base): # type: ignore
return None
# *** Not originally in v23, only added for recorder to startup ok
# This is not being tested by the v23 statistics migration tests
class StatesMeta(Base): # type: ignore[misc,valid-type]
"""Metadata for states."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATES_META
metadata_id = Column(Integer, Identity(), primary_key=True)
entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.StatesMeta("
f"id={self.metadata_id}, entity_id='{self.entity_id}'"
")>"
)
class StatisticResult(TypedDict):
"""Statistic result data class.

View file

@ -8,6 +8,7 @@ from __future__ import annotations
from datetime import datetime, timedelta
import json
import logging
import time
from typing import Any, TypedDict, cast, overload
from fnvhash import fnv1a_32
@ -57,6 +58,7 @@ TABLE_EVENTS = "events"
TABLE_EVENT_DATA = "event_data"
TABLE_EVENT_TYPES = "event_types"
TABLE_STATES = "states"
TABLE_STATES_META = "states_meta"
TABLE_STATE_ATTRIBUTES = "state_attributes"
TABLE_RECORDER_RUNS = "recorder_runs"
TABLE_SCHEMA_CHANGES = "schema_changes"
@ -132,7 +134,7 @@ class Events(Base): # type: ignore[misc,valid-type]
time_fired = Column(DATETIME_TYPE, index=True)
time_fired_ts = Column(
TIMESTAMP_TYPE, index=True
) # *** Not originally in v30, only added for recorder to startup ok
) # *** Not originally in v28, only added for recorder to startup ok
context_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID), index=True)
context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
@ -275,7 +277,13 @@ class States(Base): # type: ignore[misc,valid-type]
Integer, ForeignKey("events.event_id", ondelete="CASCADE"), index=True
)
last_changed = Column(DATETIME_TYPE, default=dt_util.utcnow)
last_changed_ts = Column(
TIMESTAMP_TYPE
) # *** Not originally in v30, only added for recorder to startup ok
last_updated = Column(DATETIME_TYPE, default=dt_util.utcnow, index=True)
last_updated_ts = Column(
TIMESTAMP_TYPE, default=time.time, index=True
) # *** Not originally in v30, only added for recorder to startup ok
old_state_id = Column(Integer, ForeignKey("states.state_id"), index=True)
attributes_id = Column(
Integer, ForeignKey("state_attributes.attributes_id"), index=True
@ -284,6 +292,10 @@ class States(Base): # type: ignore[misc,valid-type]
context_user_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
context_parent_id = Column(String(MAX_LENGTH_EVENT_CONTEXT_ID))
origin_idx = Column(SmallInteger) # 0 is local, 1 is remote
metadata_id = Column(
Integer, ForeignKey("states_meta.metadata_id"), index=True
) # *** Not originally in v28, only added for recorder to startup ok
states_meta_rel = relationship("StatesMeta")
old_state = relationship("States", remote_side=[state_id])
state_attributes = relationship("StateAttributes")
@ -412,6 +424,27 @@ class StateAttributes(Base): # type: ignore[misc,valid-type]
return {}
# *** Not originally in v23, only added for recorder to startup ok
# This is not being tested by the v23 statistics migration tests
class StatesMeta(Base): # type: ignore[misc,valid-type]
"""Metadata for states."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATES_META
metadata_id = Column(Integer, Identity(), primary_key=True)
entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.StatesMeta("
f"id={self.metadata_id}, entity_id='{self.entity_id}'"
")>"
)
class StatisticResult(TypedDict):
"""Statistic result data class.

View file

@ -8,6 +8,7 @@ from __future__ import annotations
from collections.abc import Callable
from datetime import datetime, timedelta
import logging
import time
from typing import Any, TypedDict, cast, overload
import ciso8601
@ -67,6 +68,7 @@ TABLE_EVENT_DATA = "event_data"
TABLE_EVENT_TYPES = "event_types"
TABLE_STATES = "states"
TABLE_STATE_ATTRIBUTES = "state_attributes"
TABLE_STATES_META = "states_meta"
TABLE_RECORDER_RUNS = "recorder_runs"
TABLE_SCHEMA_CHANGES = "schema_changes"
TABLE_STATISTICS = "statistics"
@ -77,6 +79,7 @@ TABLE_STATISTICS_SHORT_TERM = "statistics_short_term"
ALL_TABLES = [
TABLE_STATES,
TABLE_STATE_ATTRIBUTES,
TABLE_STATES_META,
TABLE_EVENTS,
TABLE_EVENT_DATA,
TABLE_EVENT_TYPES,
@ -370,7 +373,13 @@ class States(Base): # type: ignore[misc,valid-type]
Integer, ForeignKey("events.event_id", ondelete="CASCADE"), index=True
)
last_changed = Column(DATETIME_TYPE)
last_changed_ts = Column(
TIMESTAMP_TYPE
) # *** Not originally in v30, only added for recorder to startup ok
last_updated = Column(DATETIME_TYPE, default=dt_util.utcnow, index=True)
last_updated_ts = Column(
TIMESTAMP_TYPE, default=time.time, index=True
) # *** Not originally in v30, only added for recorder to startup ok
old_state_id = Column(Integer, ForeignKey("states.state_id"), index=True)
attributes_id = Column(
Integer, ForeignKey("state_attributes.attributes_id"), index=True
@ -388,6 +397,10 @@ class States(Base): # type: ignore[misc,valid-type]
context_parent_id_bin = Column(
LargeBinary(CONTEXT_ID_BIN_MAX_LENGTH)
) # *** Not originally in v30, only added for recorder to startup ok
metadata_id = Column(
Integer, ForeignKey("states_meta.metadata_id"), index=True
) # *** Not originally in v30, only added for recorder to startup ok
states_meta_rel = relationship("StatesMeta")
old_state = relationship("States", remote_side=[state_id])
state_attributes = relationship("StateAttributes")
@ -525,6 +538,27 @@ class StateAttributes(Base): # type: ignore[misc,valid-type]
return {}
# *** Not originally in v30, only added for recorder to startup ok
# This is not being tested by the v30 statistics migration tests
class StatesMeta(Base): # type: ignore[misc,valid-type]
"""Metadata for states."""
__table_args__ = (
{"mysql_default_charset": "utf8mb4", "mysql_collate": "utf8mb4_unicode_ci"},
)
__tablename__ = TABLE_STATES_META
metadata_id = Column(Integer, Identity(), primary_key=True)
entity_id = Column(String(MAX_LENGTH_STATE_ENTITY_ID))
def __repr__(self) -> str:
"""Return string representation of instance for debugging."""
return (
"<recorder.StatesMeta("
f"id={self.metadata_id}, entity_id='{self.entity_id}'"
")>"
)
class StatisticsBase:
"""Statistics base class."""

View file

@ -5,7 +5,7 @@ from sqlalchemy import select
from sqlalchemy.engine.row import Row
from homeassistant.components.recorder import Recorder, get_instance
from homeassistant.components.recorder.db_schema import EventData, Events, States
from homeassistant.components.recorder.db_schema import EventData, Events, StatesMeta
from homeassistant.components.recorder.filters import (
Filters,
extract_include_exclude_filter_conf,
@ -39,8 +39,8 @@ async def _async_get_states_and_events_with_filter(
def _get_states_with_session():
with session_scope(hass=hass) as session:
return session.execute(
select(States.entity_id).filter(
sqlalchemy_filter.states_entity_filter()
select(StatesMeta.entity_id).filter(
sqlalchemy_filter.states_metadata_entity_filter()
)
).all()

View file

@ -0,0 +1,670 @@
"""The tests for the recorder filter matching the EntityFilter component."""
import json
from unittest.mock import patch
import pytest
from sqlalchemy import select
from sqlalchemy.engine.row import Row
from homeassistant.components.recorder import Recorder, get_instance
from homeassistant.components.recorder.db_schema import EventData, Events, States
from homeassistant.components.recorder.filters import (
Filters,
extract_include_exclude_filter_conf,
sqlalchemy_filter_from_include_exclude_conf,
)
from homeassistant.components.recorder.util import session_scope
from homeassistant.const import ATTR_ENTITY_ID, STATE_ON
from homeassistant.core import HomeAssistant
from homeassistant.helpers.entityfilter import (
CONF_DOMAINS,
CONF_ENTITIES,
CONF_ENTITY_GLOBS,
CONF_EXCLUDE,
CONF_INCLUDE,
convert_include_exclude_filter,
)
from .common import async_wait_recording_done
@pytest.fixture(name="legacy_recorder_mock")
async def legacy_recorder_mock_fixture(recorder_mock):
"""Fixture for legacy recorder mock."""
with patch.object(recorder_mock.states_meta_manager, "active", False):
yield recorder_mock
async def _async_get_states_and_events_with_filter(
hass: HomeAssistant, sqlalchemy_filter: Filters, entity_ids: set[str]
) -> tuple[list[Row], list[Row]]:
"""Get states from the database based on a filter."""
for entity_id in entity_ids:
hass.states.async_set(entity_id, STATE_ON)
hass.bus.async_fire("any", {ATTR_ENTITY_ID: entity_id})
await async_wait_recording_done(hass)
def _get_states_with_session():
with session_scope(hass=hass) as session:
return session.execute(
select(States.entity_id).filter(
sqlalchemy_filter.states_entity_filter()
)
).all()
filtered_states_entity_ids = {
row[0]
for row in await get_instance(hass).async_add_executor_job(
_get_states_with_session
)
}
def _get_events_with_session():
with session_scope(hass=hass) as session:
return session.execute(
select(EventData.shared_data)
.outerjoin(Events, EventData.data_id == Events.data_id)
.filter(sqlalchemy_filter.events_entity_filter())
).all()
filtered_events_entity_ids = set()
for row in await get_instance(hass).async_add_executor_job(
_get_events_with_session
):
event_data = json.loads(row[0])
if ATTR_ENTITY_ID not in event_data:
continue
filtered_events_entity_ids.add(json.loads(row[0])[ATTR_ENTITY_ID])
return filtered_states_entity_ids, filtered_events_entity_ids
async def test_included_and_excluded_simple_case_no_domains(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with included and excluded without domains."""
filter_accept = {"sensor.kitchen4", "switch.kitchen"}
filter_reject = {
"light.any",
"switch.other",
"cover.any",
"sensor.weather5",
"light.kitchen",
}
conf = {
CONF_INCLUDE: {
CONF_ENTITY_GLOBS: ["sensor.kitchen*"],
CONF_ENTITIES: ["switch.kitchen"],
},
CONF_EXCLUDE: {
CONF_ENTITY_GLOBS: ["sensor.weather*"],
CONF_ENTITIES: ["light.kitchen"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
assert not entity_filter.explicitly_included("light.any")
assert not entity_filter.explicitly_included("switch.other")
assert entity_filter.explicitly_included("sensor.kitchen4")
assert entity_filter.explicitly_included("switch.kitchen")
assert not entity_filter.explicitly_excluded("light.any")
assert not entity_filter.explicitly_excluded("switch.other")
assert entity_filter.explicitly_excluded("sensor.weather5")
assert entity_filter.explicitly_excluded("light.kitchen")
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_included_and_excluded_simple_case_no_globs(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with included and excluded without globs."""
filter_accept = {"switch.bla", "sensor.blu", "sensor.keep"}
filter_reject = {"sensor.bli"}
conf = {
CONF_INCLUDE: {
CONF_DOMAINS: ["sensor", "homeassistant"],
CONF_ENTITIES: ["switch.bla"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["switch"],
CONF_ENTITIES: ["sensor.bli"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_included_and_excluded_simple_case_without_underscores(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with included and excluded without underscores."""
filter_accept = {"light.any", "sensor.kitchen4", "switch.kitchen"}
filter_reject = {"switch.other", "cover.any", "sensor.weather5", "light.kitchen"}
conf = {
CONF_INCLUDE: {
CONF_DOMAINS: ["light"],
CONF_ENTITY_GLOBS: ["sensor.kitchen*"],
CONF_ENTITIES: ["switch.kitchen"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["cover"],
CONF_ENTITY_GLOBS: ["sensor.weather*"],
CONF_ENTITIES: ["light.kitchen"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
assert not entity_filter.explicitly_included("light.any")
assert not entity_filter.explicitly_included("switch.other")
assert entity_filter.explicitly_included("sensor.kitchen4")
assert entity_filter.explicitly_included("switch.kitchen")
assert not entity_filter.explicitly_excluded("light.any")
assert not entity_filter.explicitly_excluded("switch.other")
assert entity_filter.explicitly_excluded("sensor.weather5")
assert entity_filter.explicitly_excluded("light.kitchen")
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_included_and_excluded_simple_case_with_underscores(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with included and excluded with underscores."""
filter_accept = {"light.any", "sensor.kitchen_4", "switch.kitchen"}
filter_reject = {"switch.other", "cover.any", "sensor.weather_5", "light.kitchen"}
conf = {
CONF_INCLUDE: {
CONF_DOMAINS: ["light"],
CONF_ENTITY_GLOBS: ["sensor.kitchen_*"],
CONF_ENTITIES: ["switch.kitchen"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["cover"],
CONF_ENTITY_GLOBS: ["sensor.weather_*"],
CONF_ENTITIES: ["light.kitchen"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
assert not entity_filter.explicitly_included("light.any")
assert not entity_filter.explicitly_included("switch.other")
assert entity_filter.explicitly_included("sensor.kitchen_4")
assert entity_filter.explicitly_included("switch.kitchen")
assert not entity_filter.explicitly_excluded("light.any")
assert not entity_filter.explicitly_excluded("switch.other")
assert entity_filter.explicitly_excluded("sensor.weather_5")
assert entity_filter.explicitly_excluded("light.kitchen")
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_included_and_excluded_complex_case(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with included and excluded with a complex filter."""
filter_accept = {"light.any", "sensor.kitchen_4", "switch.kitchen"}
filter_reject = {
"camera.one",
"notify.any",
"automation.update_readme",
"automation.update_utilities_cost",
"binary_sensor.iss",
}
conf = {
CONF_INCLUDE: {
CONF_ENTITIES: ["group.trackers"],
},
CONF_EXCLUDE: {
CONF_ENTITIES: [
"automation.update_readme",
"automation.update_utilities_cost",
"binary_sensor.iss",
],
CONF_DOMAINS: [
"camera",
"group",
"media_player",
"notify",
"scene",
"sun",
"zone",
],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_included_entities_and_excluded_domain(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with included entities and excluded domain."""
filter_accept = {
"media_player.test",
"media_player.test3",
"thermostat.test",
"zone.home",
"script.can_cancel_this_one",
}
filter_reject = {
"thermostat.test2",
}
conf = {
CONF_INCLUDE: {
CONF_ENTITIES: ["media_player.test", "thermostat.test"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["thermostat"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_same_domain_included_excluded(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with the same domain included and excluded."""
filter_accept = {
"media_player.test",
"media_player.test3",
}
filter_reject = {
"thermostat.test2",
"thermostat.test",
"zone.home",
"script.can_cancel_this_one",
}
conf = {
CONF_INCLUDE: {
CONF_DOMAINS: ["media_player"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["media_player"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_same_entity_included_excluded(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with the same entity included and excluded."""
filter_accept = {
"media_player.test",
}
filter_reject = {
"media_player.test3",
"thermostat.test2",
"thermostat.test",
"zone.home",
"script.can_cancel_this_one",
}
conf = {
CONF_INCLUDE: {
CONF_ENTITIES: ["media_player.test"],
},
CONF_EXCLUDE: {
CONF_ENTITIES: ["media_player.test"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_same_entity_included_excluded_include_domain_wins(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test filters with domain and entities and the include domain wins."""
filter_accept = {
"media_player.test2",
"media_player.test3",
"thermostat.test",
}
filter_reject = {
"thermostat.test2",
"zone.home",
"script.can_cancel_this_one",
}
conf = {
CONF_INCLUDE: {
CONF_DOMAINS: ["media_player"],
CONF_ENTITIES: ["thermostat.test"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["thermostat"],
CONF_ENTITIES: ["media_player.test"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_specificly_included_entity_always_wins(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test specificlly included entity always wins."""
filter_accept = {
"media_player.test2",
"media_player.test3",
"thermostat.test",
"binary_sensor.specific_include",
}
filter_reject = {
"binary_sensor.test2",
"binary_sensor.home",
"binary_sensor.can_cancel_this_one",
}
conf = {
CONF_INCLUDE: {
CONF_ENTITIES: ["binary_sensor.specific_include"],
},
CONF_EXCLUDE: {
CONF_DOMAINS: ["binary_sensor"],
CONF_ENTITY_GLOBS: ["binary_sensor.*"],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)
async def test_specificly_included_entity_always_wins_over_glob(
legacy_recorder_mock: Recorder, hass: HomeAssistant
) -> None:
"""Test specificlly included entity always wins over a glob."""
filter_accept = {
"sensor.apc900va_status",
"sensor.apc900va_battery_charge",
"sensor.apc900va_battery_runtime",
"sensor.apc900va_load",
"sensor.energy_x",
}
filter_reject = {
"sensor.apc900va_not_included",
}
conf = {
CONF_EXCLUDE: {
CONF_DOMAINS: [
"updater",
"camera",
"group",
"media_player",
"script",
"sun",
"automation",
"zone",
"weblink",
"scene",
"calendar",
"weather",
"remote",
"notify",
"switch",
"shell_command",
"media_player",
],
CONF_ENTITY_GLOBS: ["sensor.apc900va_*"],
},
CONF_INCLUDE: {
CONF_DOMAINS: [
"binary_sensor",
"climate",
"device_tracker",
"input_boolean",
"sensor",
],
CONF_ENTITY_GLOBS: ["sensor.energy_*"],
CONF_ENTITIES: [
"sensor.apc900va_status",
"sensor.apc900va_battery_charge",
"sensor.apc900va_battery_runtime",
"sensor.apc900va_load",
],
},
}
extracted_filter = extract_include_exclude_filter_conf(conf)
entity_filter = convert_include_exclude_filter(extracted_filter)
sqlalchemy_filter = sqlalchemy_filter_from_include_exclude_conf(extracted_filter)
assert sqlalchemy_filter is not None
for entity_id in filter_accept:
assert entity_filter(entity_id) is True
for entity_id in filter_reject:
assert entity_filter(entity_id) is False
(
filtered_states_entity_ids,
filtered_events_entity_ids,
) = await _async_get_states_and_events_with_filter(
hass, sqlalchemy_filter, filter_accept | filter_reject
)
assert filtered_states_entity_ids == filter_accept
assert not filtered_states_entity_ids.intersection(filter_reject)
assert filtered_events_entity_ids == filter_accept
assert not filtered_events_entity_ids.intersection(filter_reject)

View file

@ -19,6 +19,7 @@ from homeassistant.components.recorder.db_schema import (
RecorderRuns,
StateAttributes,
States,
StatesMeta,
)
from homeassistant.components.recorder.history import legacy
from homeassistant.components.recorder.models import LazyState, process_timestamp
@ -802,34 +803,15 @@ async def test_state_changes_during_period_query_during_migration_to_schema_25(
instance = await async_setup_recorder_instance(hass, {})
start = dt_util.utcnow()
point = start + timedelta(seconds=1)
end = point + timedelta(seconds=1)
entity_id = "light.test"
await recorder.get_instance(hass).async_add_executor_job(
_add_db_entries, hass, point, [entity_id]
)
with patch.object(instance.states_meta_manager, "active", False):
start = dt_util.utcnow()
point = start + timedelta(seconds=1)
end = point + timedelta(seconds=1)
entity_id = "light.test"
await recorder.get_instance(hass).async_add_executor_job(
_add_db_entries, hass, point, [entity_id]
)
no_attributes = True
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes, include_start_time_state=False
)
state = hist[entity_id][0]
assert state.attributes == {}
no_attributes = False
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes, include_start_time_state=False
)
state = hist[entity_id][0]
assert state.attributes == {"name": "the shared light"}
with instance.engine.connect() as conn:
conn.execute(text("update states set attributes_id=NULL;"))
conn.execute(text("drop table state_attributes;"))
conn.commit()
with patch.object(instance, "schema_version", 24):
no_attributes = True
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes, include_start_time_state=False
@ -842,7 +824,37 @@ async def test_state_changes_during_period_query_during_migration_to_schema_25(
hass, start, end, entity_id, no_attributes, include_start_time_state=False
)
state = hist[entity_id][0]
assert state.attributes == {"name": "the light"}
assert state.attributes == {"name": "the shared light"}
with instance.engine.connect() as conn:
conn.execute(text("update states set attributes_id=NULL;"))
conn.execute(text("drop table state_attributes;"))
conn.commit()
with patch.object(instance, "schema_version", 24):
no_attributes = True
hist = history.state_changes_during_period(
hass,
start,
end,
entity_id,
no_attributes,
include_start_time_state=False,
)
state = hist[entity_id][0]
assert state.attributes == {}
no_attributes = False
hist = history.state_changes_during_period(
hass,
start,
end,
entity_id,
no_attributes,
include_start_time_state=False,
)
state = hist[entity_id][0]
assert state.attributes == {"name": "the light"}
async def test_get_states_query_during_migration_to_schema_25(
@ -993,7 +1005,14 @@ async def test_get_full_significant_states_handles_empty_last_changed(
state_attributes.attributes_id: state_attributes
for state_attributes in session.query(StateAttributes)
}
metadata_id_to_entity_id = {
states_meta.metadata_id: states_meta
for states_meta in session.query(StatesMeta)
}
for db_state in session.query(States):
db_state.entity_id = metadata_id_to_entity_id[
db_state.metadata_id
].entity_id
state = db_state.to_native()
state.attributes = db_state_attributes[
db_state.attributes_id

View file

@ -65,7 +65,9 @@ def db_schema_30():
with patch.object(recorder, "db_schema", old_db_schema), patch.object(
recorder.migration, "SCHEMA_VERSION", old_db_schema.SCHEMA_VERSION
), patch.object(core, "EventTypes", old_db_schema.EventTypes), patch.object(
), patch.object(core, "StatesMeta", old_db_schema.StatesMeta), patch.object(
core, "EventTypes", old_db_schema.EventTypes
), patch.object(
core, "EventData", old_db_schema.EventData
), patch.object(
core, "States", old_db_schema.States
@ -86,7 +88,10 @@ def test_get_full_significant_states_with_session_entity_no_matches(
hass = hass_recorder()
now = dt_util.utcnow()
time_before_recorder_ran = now - timedelta(days=1000)
with session_scope(hass=hass) as session:
instance = recorder.get_instance(hass)
with session_scope(hass=hass) as session, patch.object(
instance.states_meta_manager, "active", False
):
assert (
history.get_full_significant_states_with_session(
hass, session, time_before_recorder_ran, now, entity_ids=["demo.id"]
@ -112,7 +117,10 @@ def test_significant_states_with_session_entity_minimal_response_no_matches(
hass = hass_recorder()
now = dt_util.utcnow()
time_before_recorder_ran = now - timedelta(days=1000)
with session_scope(hass=hass) as session:
instance = recorder.get_instance(hass)
with session_scope(hass=hass) as session, patch.object(
instance.states_meta_manager, "active", False
):
assert (
history.get_significant_states_with_session(
hass,
@ -152,44 +160,46 @@ def test_state_changes_during_period(
"""Test state change during period."""
hass = hass_recorder()
entity_id = "media_player.test"
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state, attributes)
wait_recording_done(hass)
return hass.states.get(entity_id)
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state, attributes)
wait_recording_done(hass)
return hass.states.get(entity_id)
start = dt_util.utcnow()
point = start + timedelta(seconds=1)
end = point + timedelta(seconds=1)
start = dt_util.utcnow()
point = start + timedelta(seconds=1)
end = point + timedelta(seconds=1)
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("idle")
set_state("YouTube")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("idle")
set_state("YouTube")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
states = [
set_state("idle"),
set_state("Netflix"),
set_state("Plex"),
set_state("YouTube"),
]
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
states = [
set_state("idle"),
set_state("Netflix"),
set_state("Plex"),
set_state("YouTube"),
]
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=end
):
set_state("Netflix")
set_state("Plex")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=end
):
set_state("Netflix")
set_state("Plex")
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes, limit=limit
)
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes, limit=limit
)
assert_multiple_states_equal_without_context(states[:limit], hist[entity_id])
assert_multiple_states_equal_without_context(states[:limit], hist[entity_id])
def test_state_changes_during_period_descending(
@ -198,96 +208,100 @@ def test_state_changes_during_period_descending(
"""Test state change during period descending."""
hass = hass_recorder()
entity_id = "media_player.test"
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state, {"any": 1})
wait_recording_done(hass)
return hass.states.get(entity_id)
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state, {"any": 1})
wait_recording_done(hass)
return hass.states.get(entity_id)
start = dt_util.utcnow()
point = start + timedelta(seconds=1)
point2 = start + timedelta(seconds=1, microseconds=2)
point3 = start + timedelta(seconds=1, microseconds=3)
point4 = start + timedelta(seconds=1, microseconds=4)
end = point + timedelta(seconds=1)
start = dt_util.utcnow()
point = start + timedelta(seconds=1)
point2 = start + timedelta(seconds=1, microseconds=2)
point3 = start + timedelta(seconds=1, microseconds=3)
point4 = start + timedelta(seconds=1, microseconds=4)
end = point + timedelta(seconds=1)
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("idle")
set_state("YouTube")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("idle")
set_state("YouTube")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
states = [set_state("idle")]
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point2
):
states.append(set_state("Netflix"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point3
):
states.append(set_state("Plex"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point4
):
states.append(set_state("YouTube"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
states = [set_state("idle")]
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point2
):
states.append(set_state("Netflix"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point3
):
states.append(set_state("Plex"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point4
):
states.append(set_state("YouTube"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=end
):
set_state("Netflix")
set_state("Plex")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=end
):
set_state("Netflix")
set_state("Plex")
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes=False, descending=False
)
assert_multiple_states_equal_without_context(states, hist[entity_id])
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes=False, descending=False
)
assert_multiple_states_equal_without_context(states, hist[entity_id])
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes=False, descending=True
)
assert_multiple_states_equal_without_context(
states, list(reversed(list(hist[entity_id])))
)
hist = history.state_changes_during_period(
hass, start, end, entity_id, no_attributes=False, descending=True
)
assert_multiple_states_equal_without_context(
states, list(reversed(list(hist[entity_id])))
)
def test_get_last_state_changes(hass_recorder: Callable[..., HomeAssistant]) -> None:
"""Test number of state changes."""
hass = hass_recorder()
entity_id = "sensor.test"
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state)
wait_recording_done(hass)
return hass.states.get(entity_id)
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state)
wait_recording_done(hass)
return hass.states.get(entity_id)
start = dt_util.utcnow() - timedelta(minutes=2)
point = start + timedelta(minutes=1)
point2 = point + timedelta(minutes=1, seconds=1)
start = dt_util.utcnow() - timedelta(minutes=2)
point = start + timedelta(minutes=1)
point2 = point + timedelta(minutes=1, seconds=1)
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("1")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("1")
states = []
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
states.append(set_state("2"))
states = []
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
states.append(set_state("2"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point2
):
states.append(set_state("3"))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point2
):
states.append(set_state("3"))
hist = history.get_last_state_changes(hass, 2, entity_id)
hist = history.get_last_state_changes(hass, 2, entity_id)
assert_multiple_states_equal_without_context(states, hist[entity_id])
assert_multiple_states_equal_without_context(states, hist[entity_id])
def test_ensure_state_can_be_copied(
@ -300,30 +314,36 @@ def test_ensure_state_can_be_copied(
"""
hass = hass_recorder()
entity_id = "sensor.test"
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state)
wait_recording_done(hass)
return hass.states.get(entity_id)
def set_state(state):
"""Set the state."""
hass.states.set(entity_id, state)
wait_recording_done(hass)
return hass.states.get(entity_id)
start = dt_util.utcnow() - timedelta(minutes=2)
point = start + timedelta(minutes=1)
start = dt_util.utcnow() - timedelta(minutes=2)
point = start + timedelta(minutes=1)
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("1")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("1")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
set_state("2")
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=point
):
set_state("2")
hist = history.get_last_state_changes(hass, 2, entity_id)
hist = history.get_last_state_changes(hass, 2, entity_id)
assert_states_equal_without_context(copy(hist[entity_id][0]), hist[entity_id][0])
assert_states_equal_without_context(copy(hist[entity_id][1]), hist[entity_id][1])
assert_states_equal_without_context(
copy(hist[entity_id][0]), hist[entity_id][0]
)
assert_states_equal_without_context(
copy(hist[entity_id][1]), hist[entity_id][1]
)
def test_get_significant_states(hass_recorder: Callable[..., HomeAssistant]) -> None:
@ -334,9 +354,11 @@ def test_get_significant_states(hass_recorder: Callable[..., HomeAssistant]) ->
media player (attribute changes are not significant and not returned).
"""
hass = hass_recorder()
zero, four, states = record_states(hass)
hist = history.get_significant_states(hass, zero, four)
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, states = record_states(hass)
hist = history.get_significant_states(hass, zero, four)
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
def test_get_significant_states_minimal_response(
@ -351,57 +373,59 @@ def test_get_significant_states_minimal_response(
media player (attribute changes are not significant and not returned).
"""
hass = hass_recorder()
zero, four, states = record_states(hass)
hist = history.get_significant_states(hass, zero, four, minimal_response=True)
entites_with_reducable_states = [
"media_player.test",
"media_player.test3",
]
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, states = record_states(hass)
hist = history.get_significant_states(hass, zero, four, minimal_response=True)
entites_with_reducable_states = [
"media_player.test",
"media_player.test3",
]
# All states for media_player.test state are reduced
# down to last_changed and state when minimal_response
# is set except for the first state.
# is set. We use JSONEncoder to make sure that are
# pre-encoded last_changed is always the same as what
# will happen with encoding a native state
for entity_id in entites_with_reducable_states:
entity_states = states[entity_id]
for state_idx in range(1, len(entity_states)):
input_state = entity_states[state_idx]
orig_last_changed = orig_last_changed = json.dumps(
process_timestamp(input_state.last_changed),
cls=JSONEncoder,
).replace('"', "")
orig_state = input_state.state
entity_states[state_idx] = {
"last_changed": orig_last_changed,
"state": orig_state,
}
# All states for media_player.test state are reduced
# down to last_changed and state when minimal_response
# is set except for the first state.
# is set. We use JSONEncoder to make sure that are
# pre-encoded last_changed is always the same as what
# will happen with encoding a native state
for entity_id in entites_with_reducable_states:
entity_states = states[entity_id]
for state_idx in range(1, len(entity_states)):
input_state = entity_states[state_idx]
orig_last_changed = orig_last_changed = json.dumps(
process_timestamp(input_state.last_changed),
cls=JSONEncoder,
).replace('"', "")
orig_state = input_state.state
entity_states[state_idx] = {
"last_changed": orig_last_changed,
"state": orig_state,
}
assert len(hist) == len(states)
assert_states_equal_without_context(
states["media_player.test"][0], hist["media_player.test"][0]
)
assert states["media_player.test"][1] == hist["media_player.test"][1]
assert states["media_player.test"][2] == hist["media_player.test"][2]
assert len(hist) == len(states)
assert_states_equal_without_context(
states["media_player.test"][0], hist["media_player.test"][0]
)
assert states["media_player.test"][1] == hist["media_player.test"][1]
assert states["media_player.test"][2] == hist["media_player.test"][2]
assert_multiple_states_equal_without_context(
states["media_player.test2"], hist["media_player.test2"]
)
assert_states_equal_without_context(
states["media_player.test3"][0], hist["media_player.test3"][0]
)
assert states["media_player.test3"][1] == hist["media_player.test3"][1]
assert_multiple_states_equal_without_context(
states["media_player.test2"], hist["media_player.test2"]
)
assert_states_equal_without_context(
states["media_player.test3"][0], hist["media_player.test3"][0]
)
assert states["media_player.test3"][1] == hist["media_player.test3"][1]
assert_multiple_states_equal_without_context(
states["script.can_cancel_this_one"], hist["script.can_cancel_this_one"]
)
assert_multiple_states_equal_without_context_and_last_changed(
states["thermostat.test"], hist["thermostat.test"]
)
assert_multiple_states_equal_without_context_and_last_changed(
states["thermostat.test2"], hist["thermostat.test2"]
)
assert_multiple_states_equal_without_context(
states["script.can_cancel_this_one"], hist["script.can_cancel_this_one"]
)
assert_multiple_states_equal_without_context_and_last_changed(
states["thermostat.test"], hist["thermostat.test"]
)
assert_multiple_states_equal_without_context_and_last_changed(
states["thermostat.test2"], hist["thermostat.test2"]
)
def test_get_significant_states_with_initial(
@ -414,25 +438,30 @@ def test_get_significant_states_with_initial(
media player (attribute changes are not significant and not returned).
"""
hass = hass_recorder()
zero, four, states = record_states(hass)
one = zero + timedelta(seconds=1)
one_with_microsecond = zero + timedelta(seconds=1, microseconds=1)
one_and_half = zero + timedelta(seconds=1.5)
for entity_id in states:
if entity_id == "media_player.test":
states[entity_id] = states[entity_id][1:]
for state in states[entity_id]:
if state.last_changed == one or state.last_changed == one_with_microsecond:
state.last_changed = one_and_half
state.last_updated = one_and_half
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, states = record_states(hass)
one = zero + timedelta(seconds=1)
one_with_microsecond = zero + timedelta(seconds=1, microseconds=1)
one_and_half = zero + timedelta(seconds=1.5)
for entity_id in states:
if entity_id == "media_player.test":
states[entity_id] = states[entity_id][1:]
for state in states[entity_id]:
if (
state.last_changed == one
or state.last_changed == one_with_microsecond
):
state.last_changed = one_and_half
state.last_updated = one_and_half
hist = history.get_significant_states(
hass,
one_and_half,
four,
include_start_time_state=True,
)
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
hist = history.get_significant_states(
hass,
one_and_half,
four,
include_start_time_state=True,
)
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
def test_get_significant_states_without_initial(
@ -445,27 +474,29 @@ def test_get_significant_states_without_initial(
media player (attribute changes are not significant and not returned).
"""
hass = hass_recorder()
zero, four, states = record_states(hass)
one = zero + timedelta(seconds=1)
one_with_microsecond = zero + timedelta(seconds=1, microseconds=1)
one_and_half = zero + timedelta(seconds=1.5)
for entity_id in states:
states[entity_id] = list(
filter(
lambda s: s.last_changed != one
and s.last_changed != one_with_microsecond,
states[entity_id],
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, states = record_states(hass)
one = zero + timedelta(seconds=1)
one_with_microsecond = zero + timedelta(seconds=1, microseconds=1)
one_and_half = zero + timedelta(seconds=1.5)
for entity_id in states:
states[entity_id] = list(
filter(
lambda s: s.last_changed != one
and s.last_changed != one_with_microsecond,
states[entity_id],
)
)
)
del states["media_player.test2"]
del states["media_player.test2"]
hist = history.get_significant_states(
hass,
one_and_half,
four,
include_start_time_state=False,
)
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
hist = history.get_significant_states(
hass,
one_and_half,
four,
include_start_time_state=False,
)
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
def test_get_significant_states_entity_id(
@ -473,15 +504,17 @@ def test_get_significant_states_entity_id(
) -> None:
"""Test that only significant states are returned for one entity."""
hass = hass_recorder()
zero, four, states = record_states(hass)
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, states = record_states(hass)
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
hist = history.get_significant_states(hass, zero, four, ["media_player.test"])
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
hist = history.get_significant_states(hass, zero, four, ["media_player.test"])
assert_dict_of_states_equal_without_context_and_last_changed(states, hist)
def test_get_significant_states_multiple_entity_ids(
@ -489,24 +522,26 @@ def test_get_significant_states_multiple_entity_ids(
) -> None:
"""Test that only significant states are returned for one entity."""
hass = hass_recorder()
zero, four, states = record_states(hass)
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, states = record_states(hass)
del states["media_player.test2"]
del states["media_player.test3"]
del states["thermostat.test2"]
del states["script.can_cancel_this_one"]
hist = history.get_significant_states(
hass,
zero,
four,
["media_player.test", "thermostat.test"],
)
assert_multiple_states_equal_without_context_and_last_changed(
states["media_player.test"], hist["media_player.test"]
)
assert_multiple_states_equal_without_context_and_last_changed(
states["thermostat.test"], hist["thermostat.test"]
)
hist = history.get_significant_states(
hass,
zero,
four,
["media_player.test", "thermostat.test"],
)
assert_multiple_states_equal_without_context_and_last_changed(
states["media_player.test"], hist["media_player.test"]
)
assert_multiple_states_equal_without_context_and_last_changed(
states["thermostat.test"], hist["thermostat.test"]
)
def test_get_significant_states_are_ordered(
@ -518,13 +553,16 @@ def test_get_significant_states_are_ordered(
in the same order.
"""
hass = hass_recorder()
zero, four, _states = record_states(hass)
entity_ids = ["media_player.test", "media_player.test2"]
hist = history.get_significant_states(hass, zero, four, entity_ids)
assert list(hist.keys()) == entity_ids
entity_ids = ["media_player.test2", "media_player.test"]
hist = history.get_significant_states(hass, zero, four, entity_ids)
assert list(hist.keys()) == entity_ids
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
zero, four, _states = record_states(hass)
entity_ids = ["media_player.test", "media_player.test2"]
hist = history.get_significant_states(hass, zero, four, entity_ids)
assert list(hist.keys()) == entity_ids
entity_ids = ["media_player.test2", "media_player.test"]
hist = history.get_significant_states(hass, zero, four, entity_ids)
assert list(hist.keys()) == entity_ids
def test_get_significant_states_only(
@ -533,64 +571,70 @@ def test_get_significant_states_only(
"""Test significant states when significant_states_only is set."""
hass = hass_recorder()
entity_id = "sensor.test"
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
def set_state(state, **kwargs):
"""Set the state."""
hass.states.set(entity_id, state, **kwargs)
wait_recording_done(hass)
return hass.states.get(entity_id)
def set_state(state, **kwargs):
"""Set the state."""
hass.states.set(entity_id, state, **kwargs)
wait_recording_done(hass)
return hass.states.get(entity_id)
start = dt_util.utcnow() - timedelta(minutes=4)
points = []
for i in range(1, 4):
points.append(start + timedelta(minutes=i))
start = dt_util.utcnow() - timedelta(minutes=4)
points = []
for i in range(1, 4):
points.append(start + timedelta(minutes=i))
states = []
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("123", attributes={"attribute": 10.64})
states = []
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow", return_value=start
):
set_state("123", attributes={"attribute": 10.64})
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow",
return_value=points[0],
):
# Attributes are different, state not
states.append(set_state("123", attributes={"attribute": 21.42}))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow",
return_value=points[0],
):
# Attributes are different, state not
states.append(set_state("123", attributes={"attribute": 21.42}))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow",
return_value=points[1],
):
# state is different, attributes not
states.append(set_state("32", attributes={"attribute": 21.42}))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow",
return_value=points[1],
):
# state is different, attributes not
states.append(set_state("32", attributes={"attribute": 21.42}))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow",
return_value=points[2],
):
# everything is different
states.append(set_state("412", attributes={"attribute": 54.23}))
with patch(
"homeassistant.components.recorder.core.dt_util.utcnow",
return_value=points[2],
):
# everything is different
states.append(set_state("412", attributes={"attribute": 54.23}))
hist = history.get_significant_states(hass, start, significant_changes_only=True)
hist = history.get_significant_states(
hass, start, significant_changes_only=True
)
assert len(hist[entity_id]) == 2
assert not any(
state.last_updated == states[0].last_updated for state in hist[entity_id]
)
assert any(
state.last_updated == states[1].last_updated for state in hist[entity_id]
)
assert any(
state.last_updated == states[2].last_updated for state in hist[entity_id]
)
assert len(hist[entity_id]) == 2
assert not any(
state.last_updated == states[0].last_updated for state in hist[entity_id]
)
assert any(
state.last_updated == states[1].last_updated for state in hist[entity_id]
)
assert any(
state.last_updated == states[2].last_updated for state in hist[entity_id]
)
hist = history.get_significant_states(hass, start, significant_changes_only=False)
hist = history.get_significant_states(
hass, start, significant_changes_only=False
)
assert len(hist[entity_id]) == 3
assert_multiple_states_equal_without_context_and_last_changed(
states, hist[entity_id]
)
assert len(hist[entity_id]) == 3
assert_multiple_states_equal_without_context_and_last_changed(
states, hist[entity_id]
)
def record_states(hass) -> tuple[datetime, datetime, dict[str, list[State]]]:
@ -687,23 +731,25 @@ def test_state_changes_during_period_multiple_entities_single_test(
generate incorrect results.
"""
hass = hass_recorder()
start = dt_util.utcnow()
test_entites = {f"sensor.{i}": str(i) for i in range(30)}
for entity_id, value in test_entites.items():
hass.states.set(entity_id, value)
instance = recorder.get_instance(hass)
with patch.object(instance.states_meta_manager, "active", False):
start = dt_util.utcnow()
test_entites = {f"sensor.{i}": str(i) for i in range(30)}
for entity_id, value in test_entites.items():
hass.states.set(entity_id, value)
wait_recording_done(hass)
end = dt_util.utcnow()
wait_recording_done(hass)
end = dt_util.utcnow()
hist = history.state_changes_during_period(hass, start, end, None)
for entity_id, value in test_entites.items():
hist[entity_id][0].state == value
hist = history.state_changes_during_period(hass, start, end, None)
for entity_id, value in test_entites.items():
hist[entity_id][0].state == value
for entity_id, value in test_entites.items():
hist = history.state_changes_during_period(hass, start, end, entity_id)
assert len(hist) == 1
hist[entity_id][0].state == value
for entity_id, value in test_entites.items():
hist = history.state_changes_during_period(hass, start, end, entity_id)
assert len(hist) == 1
hist[entity_id][0].state == value
hist = history.state_changes_during_period(hass, start, end, None)
for entity_id, value in test_entites.items():
hist[entity_id][0].state == value
hist = history.state_changes_during_period(hass, start, end, None)
for entity_id, value in test_entites.items():
hist[entity_id][0].state == value

View file

@ -43,6 +43,7 @@ from homeassistant.components.recorder.db_schema import (
RecorderRuns,
StateAttributes,
States,
StatesMeta,
StatisticsRuns,
)
from homeassistant.components.recorder.models import process_timestamp
@ -235,11 +236,14 @@ async def test_saving_state(recorder_mock: Recorder, hass: HomeAssistant) -> Non
with session_scope(hass=hass) as session:
db_states = []
for db_state, db_state_attributes in session.query(
States, StateAttributes
).outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
for db_state, db_state_attributes, states_meta in (
session.query(States, StateAttributes, StatesMeta)
.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
):
db_state.entity_id = states_meta.entity_id
db_states.append(db_state)
state = db_state.to_native()
state.attributes = db_state_attributes.to_native()
@ -273,11 +277,14 @@ async def test_saving_state_with_nul(
with session_scope(hass=hass) as session:
db_states = []
for db_state, db_state_attributes in session.query(
States, StateAttributes
).outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
for db_state, db_state_attributes, states_meta in (
session.query(States, StateAttributes, StatesMeta)
.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
):
db_state.entity_id = states_meta.entity_id
db_states.append(db_state)
state = db_state.to_native()
state.attributes = db_state_attributes.to_native()
@ -542,11 +549,16 @@ def _add_entities(hass, entity_ids):
with session_scope(hass=hass) as session:
states = []
for state, state_attributes in session.query(States, StateAttributes).outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
for db_state, db_state_attributes, states_meta in (
session.query(States, StateAttributes, StatesMeta)
.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
):
native_state = state.to_native()
native_state.attributes = state_attributes.to_native()
db_state.entity_id = states_meta.entity_id
native_state = db_state.to_native()
native_state.attributes = db_state_attributes.to_native()
states.append(native_state)
return states
@ -761,7 +773,11 @@ def test_saving_state_and_removing_entity(
wait_recording_done(hass)
with session_scope(hass=hass) as session:
states = list(session.query(States))
states = list(
session.query(StatesMeta.entity_id, States.state)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
.order_by(States.last_updated_ts)
)
assert len(states) == 3
assert states[0].entity_id == entity_id
assert states[0].state == STATE_LOCKED
@ -784,11 +800,16 @@ def test_saving_state_with_oversized_attributes(
states = []
with session_scope(hass=hass) as session:
for state, state_attributes in session.query(States, StateAttributes).outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
for db_state, db_state_attributes, states_meta in (
session.query(States, StateAttributes, StatesMeta)
.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
):
native_state = state.to_native()
native_state.attributes = state_attributes.to_native()
db_state.entity_id = states_meta.entity_id
native_state = db_state.to_native()
native_state.attributes = db_state_attributes.to_native()
states.append(native_state)
assert "switch.too_big" in caplog.text
@ -1267,26 +1288,31 @@ def test_saving_sets_old_state(hass_recorder: Callable[..., HomeAssistant]) -> N
"""Test saving sets old state."""
hass = hass_recorder()
hass.states.set("test.one", "on", {})
hass.states.set("test.two", "on", {})
hass.states.set("test.one", "s1", {})
hass.states.set("test.two", "s2", {})
wait_recording_done(hass)
hass.states.set("test.one", "off", {})
hass.states.set("test.two", "off", {})
hass.states.set("test.one", "s3", {})
hass.states.set("test.two", "s4", {})
wait_recording_done(hass)
with session_scope(hass=hass) as session:
states = list(session.query(States))
states = list(
session.query(
StatesMeta.entity_id, States.state_id, States.old_state_id, States.state
).outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
)
assert len(states) == 4
states_by_state = {state.state: state for state in states}
assert states[0].entity_id == "test.one"
assert states[1].entity_id == "test.two"
assert states[2].entity_id == "test.one"
assert states[3].entity_id == "test.two"
assert states_by_state["s1"].entity_id == "test.one"
assert states_by_state["s2"].entity_id == "test.two"
assert states_by_state["s3"].entity_id == "test.one"
assert states_by_state["s4"].entity_id == "test.two"
assert states[0].old_state_id is None
assert states[1].old_state_id is None
assert states[2].old_state_id == states[0].state_id
assert states[3].old_state_id == states[1].state_id
assert states_by_state["s1"].old_state_id is None
assert states_by_state["s2"].old_state_id is None
assert states_by_state["s3"].old_state_id == states_by_state["s1"].state_id
assert states_by_state["s4"].old_state_id == states_by_state["s2"].state_id
def test_saving_state_with_serializable_data(
@ -1296,21 +1322,25 @@ def test_saving_state_with_serializable_data(
hass = hass_recorder()
hass.bus.fire("bad_event", {"fail": CannotSerializeMe()})
hass.states.set("test.one", "on", {"fail": CannotSerializeMe()})
hass.states.set("test.one", "s1", {"fail": CannotSerializeMe()})
wait_recording_done(hass)
hass.states.set("test.two", "on", {})
hass.states.set("test.two", "s2", {})
wait_recording_done(hass)
hass.states.set("test.two", "off", {})
hass.states.set("test.two", "s3", {})
wait_recording_done(hass)
with session_scope(hass=hass) as session:
states = list(session.query(States))
states = list(
session.query(
StatesMeta.entity_id, States.state_id, States.old_state_id, States.state
).outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
)
assert len(states) == 2
assert states[0].entity_id == "test.two"
assert states[1].entity_id == "test.two"
assert states[0].old_state_id is None
assert states[1].old_state_id == states[0].state_id
states_by_state = {state.state: state for state in states}
assert states_by_state["s2"].entity_id == "test.two"
assert states_by_state["s3"].entity_id == "test.two"
assert states_by_state["s2"].old_state_id is None
assert states_by_state["s3"].old_state_id == states_by_state["s2"].state_id
assert "State is not JSON serializable" in caplog.text
@ -1442,6 +1472,7 @@ def test_service_disable_states_not_recording(
db_states = list(session.query(States))
assert len(db_states) == 1
assert db_states[0].event_id is None
db_states[0].entity_id = "test.two"
assert (
db_states[0].to_native().as_dict()
== _state_with_context(hass, "test.two").as_dict()
@ -1554,6 +1585,7 @@ async def test_database_corruption_while_running(
with session_scope(hass=hass) as session:
db_states = list(session.query(States))
assert len(db_states) == 1
db_states[0].entity_id = "test.two"
assert db_states[0].event_id is None
return db_states[0].to_native()
@ -1868,9 +1900,7 @@ def test_deduplication_state_attributes_inside_commit_interval(
with session_scope(hass=hass) as session:
states = list(
session.query(States)
.filter(States.entity_id == entity_id)
.outerjoin(
session.query(States).outerjoin(
StateAttributes, (States.attributes_id == StateAttributes.attributes_id)
)
)
@ -1895,7 +1925,7 @@ async def test_async_block_till_done(
def _fetch_states():
with session_scope(hass=hass) as session:
return list(session.query(States).filter(States.entity_id == entity_id))
return list(session.query(States))
await async_block_recorder(hass, 0.1)
await instance.async_block_till_done()
@ -2098,11 +2128,14 @@ async def test_excluding_attributes_by_integration(
with session_scope(hass=hass) as session:
db_states = []
for db_state, db_state_attributes in session.query(
States, StateAttributes
).outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
for db_state, db_state_attributes, states_meta in (
session.query(States, StateAttributes, StatesMeta)
.outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
):
db_state.entity_id = states_meta.entity_id
db_states.append(db_state)
state = db_state.to_native()
state.attributes = db_state_attributes.to_native()

View file

@ -28,10 +28,13 @@ from homeassistant.components.recorder.db_schema import (
EventTypes,
RecorderRuns,
States,
StatesMeta,
)
from homeassistant.components.recorder.queries import select_event_type_ids
from homeassistant.components.recorder.tasks import (
ContextIDMigrationTask,
EntityIDMigrationTask,
EntityIDPostMigrationTask,
EventTypeIDMigrationTask,
)
from homeassistant.components.recorder.util import session_scope
@ -54,10 +57,13 @@ ORIG_TZ = dt_util.DEFAULT_TIME_ZONE
def _get_native_states(hass, entity_id):
with session_scope(hass=hass) as session:
return [
state.to_native()
for state in session.query(States).filter(States.entity_id == entity_id)
]
instance = recorder.get_instance(hass)
metadata_id = instance.states_meta_manager.get(entity_id, session)
states = []
for dbstate in session.query(States).filter(States.metadata_id == metadata_id):
dbstate.entity_id = entity_id
states.append(dbstate.to_native())
return states
async def test_schema_update_calls(recorder_db_url: str, hass: HomeAssistant) -> None:
@ -764,3 +770,121 @@ async def test_migrate_event_type_ids(
events_by_type = await instance.async_add_executor_job(_fetch_migrated_events)
assert len(events_by_type["event_type_one"]) == 2
assert len(events_by_type["event_type_two"]) == 1
@pytest.mark.parametrize("enable_migrate_entity_ids", [True])
async def test_migrate_entity_ids(
async_setup_recorder_instance: RecorderInstanceGenerator, hass: HomeAssistant
) -> None:
"""Test we can migrate entity_ids to the StatesMeta table."""
instance = await async_setup_recorder_instance(hass)
await async_wait_recording_done(hass)
def _insert_events():
with session_scope(hass=hass) as session:
session.add_all(
(
States(
entity_id="sensor.one",
state="one_1",
last_updated_ts=1.452529,
),
States(
entity_id="sensor.two",
state="two_2",
last_updated_ts=2.252529,
),
States(
entity_id="sensor.two",
state="two_1",
last_updated_ts=3.152529,
),
)
)
await instance.async_add_executor_job(_insert_events)
await async_wait_recording_done(hass)
# This is a threadsafe way to add a task to the recorder
instance.queue_task(EntityIDMigrationTask())
await async_recorder_block_till_done(hass)
def _fetch_migrated_states():
with session_scope(hass=hass) as session:
states = (
session.query(
States.state,
States.metadata_id,
States.last_updated_ts,
StatesMeta.entity_id,
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
.all()
)
assert len(states) == 3
result = {}
for state in states:
result.setdefault(state.entity_id, []).append(
{
"state_id": state.entity_id,
"last_updated_ts": state.last_updated_ts,
"state": state.state,
}
)
return result
states_by_entity_id = await instance.async_add_executor_job(_fetch_migrated_states)
assert len(states_by_entity_id["sensor.two"]) == 2
assert len(states_by_entity_id["sensor.one"]) == 1
@pytest.mark.parametrize("enable_migrate_entity_ids", [True])
async def test_post_migrate_entity_ids(
async_setup_recorder_instance: RecorderInstanceGenerator, hass: HomeAssistant
) -> None:
"""Test we can migrate entity_ids to the StatesMeta table."""
instance = await async_setup_recorder_instance(hass)
await async_wait_recording_done(hass)
def _insert_events():
with session_scope(hass=hass) as session:
session.add_all(
(
States(
entity_id="sensor.one",
state="one_1",
last_updated_ts=1.452529,
),
States(
entity_id="sensor.two",
state="two_2",
last_updated_ts=2.252529,
),
States(
entity_id="sensor.two",
state="two_1",
last_updated_ts=3.152529,
),
)
)
await instance.async_add_executor_job(_insert_events)
await async_wait_recording_done(hass)
# This is a threadsafe way to add a task to the recorder
instance.queue_task(EntityIDPostMigrationTask())
await async_recorder_block_till_done(hass)
def _fetch_migrated_states():
with session_scope(hass=hass) as session:
states = session.query(
States.state,
States.entity_id,
).all()
assert len(states) == 3
return {state.state: state.entity_id for state in states}
states_by_state = await instance.async_add_executor_job(_fetch_migrated_states)
assert states_by_state["one_1"] is None
assert states_by_state["two_2"] is None
assert states_by_state["two_1"] is None

View file

@ -9,6 +9,7 @@ from sqlalchemy.exc import DatabaseError, OperationalError
from sqlalchemy.orm.session import Session
from homeassistant.components import recorder
from homeassistant.components.recorder import Recorder
from homeassistant.components.recorder.const import (
SQLITE_MAX_BIND_VARS,
SupportedDialect,
@ -20,6 +21,7 @@ from homeassistant.components.recorder.db_schema import (
RecorderRuns,
StateAttributes,
States,
StatesMeta,
StatisticsRuns,
StatisticsShortTerm,
)
@ -670,6 +672,31 @@ async def test_purge_cutoff_date(
assert state_attributes.count() == 0
def _convert_pending_states_to_meta(instance: Recorder, session: Session) -> None:
"""Convert pending states to use states_metadata."""
entity_ids: set[str] = set()
states: set[States] = set()
for object in session:
states_meta_objects: dict[str, StatesMeta] = {}
if isinstance(object, States):
entity_ids.add(object.entity_id)
states.add(object)
entity_id_to_metadata_ids = instance.states_meta_manager.get_many(
entity_ids, session
)
for state in states:
entity_id = state.entity_id
state.entity_id = None
if metadata_id := entity_id_to_metadata_ids.get(entity_id):
state.metadata_id = metadata_id
continue
if entity_id not in states_meta_objects:
states_meta_objects[entity_id] = StatesMeta(entity_id=entity_id)
state.states_meta_rel = states_meta_objects[entity_id]
@pytest.mark.parametrize("use_sqlite", (True, False), indirect=True)
async def test_purge_filtered_states(
async_setup_recorder_instance: RecorderInstanceGenerator,
@ -762,6 +789,7 @@ async def test_purge_filtered_states(
time_fired_ts=dt_util.utc_to_timestamp(timestamp),
)
)
_convert_pending_states_to_meta(instance, session)
service_data = {"keep_days": 10}
_add_db_entries(hass)
@ -815,8 +843,10 @@ async def test_purge_filtered_states(
events_keep = session.query(Events).filter(Events.event_type == "EVENT_KEEP")
assert events_keep.count() == 1
states_sensor_excluded = session.query(States).filter(
States.entity_id == "sensor.excluded"
states_sensor_excluded = (
session.query(States)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
.filter(StatesMeta.entity_id == "sensor.excluded")
)
assert states_sensor_excluded.count() == 0
@ -880,6 +910,7 @@ async def test_purge_filtered_states_to_empty(
timestamp,
event_id * days,
)
_convert_pending_states_to_meta(instance, session)
service_data = {"keep_days": 10}
_add_db_entries(hass)
@ -955,6 +986,7 @@ async def test_purge_without_state_attributes_filtered_states_to_empty(
time_fired_ts=dt_util.utc_to_timestamp(timestamp),
)
)
_convert_pending_states_to_meta(instance, session)
service_data = {"keep_days": 10}
_add_db_entries(hass)
@ -1179,7 +1211,7 @@ async def test_purge_entities(
async_setup_recorder_instance: RecorderInstanceGenerator, hass: HomeAssistant
) -> None:
"""Test purging of specific entities."""
await async_setup_recorder_instance(hass)
instance = await async_setup_recorder_instance(hass)
async def _purge_entities(hass, entity_ids, domains, entity_globs):
service_data = {
@ -1227,6 +1259,7 @@ async def test_purge_entities(
timestamp,
event_id * days,
)
_convert_pending_states_to_meta(instance, session)
def _add_keep_records(hass: HomeAssistant) -> None:
with session_scope(hass=hass) as session:
@ -1240,6 +1273,7 @@ async def test_purge_entities(
timestamp,
event_id,
)
_convert_pending_states_to_meta(instance, session)
_add_purge_records(hass)
_add_keep_records(hass)
@ -1255,8 +1289,10 @@ async def test_purge_entities(
states = session.query(States)
assert states.count() == 10
states_sensor_kept = session.query(States).filter(
States.entity_id == "sensor.keep"
states_sensor_kept = (
session.query(States)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
.filter(StatesMeta.entity_id == "sensor.keep")
)
assert states_sensor_kept.count() == 10
@ -1285,8 +1321,10 @@ async def test_purge_entities(
states = session.query(States)
assert states.count() == 10
states_sensor_kept = session.query(States).filter(
States.entity_id == "sensor.keep"
states_sensor_kept = (
session.query(States)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
.filter(StatesMeta.entity_id == "sensor.keep")
)
assert states_sensor_kept.count() == 10
@ -1796,3 +1834,103 @@ async def test_purge_old_events_purges_the_event_type_ids(
assert finished
assert events.count() == 0
assert event_types.count() == 0
async def test_purge_old_states_purges_the_state_metadata_ids(
async_setup_recorder_instance: RecorderInstanceGenerator, hass: HomeAssistant
) -> None:
"""Test deleting old states purges state metadata_ids."""
instance = await async_setup_recorder_instance(hass)
assert instance.states_meta_manager.active is True
utcnow = dt_util.utcnow()
five_days_ago = utcnow - timedelta(days=5)
eleven_days_ago = utcnow - timedelta(days=11)
far_past = utcnow - timedelta(days=1000)
await hass.async_block_till_done()
await async_wait_recording_done(hass)
def _insert_states():
with session_scope(hass=hass) as session:
states_meta_sensor_one = StatesMeta(entity_id="sensor.one")
states_meta_sensor_two = StatesMeta(entity_id="sensor.two")
states_meta_sensor_three = StatesMeta(entity_id="sensor.three")
states_meta_sensor_unused = StatesMeta(entity_id="sensor.unused")
session.add_all(
(
states_meta_sensor_one,
states_meta_sensor_two,
states_meta_sensor_three,
states_meta_sensor_unused,
)
)
session.flush()
for _ in range(5):
for event_id in range(6):
if event_id < 2:
timestamp = eleven_days_ago
metadata_id = states_meta_sensor_one.metadata_id
elif event_id < 4:
timestamp = five_days_ago
metadata_id = states_meta_sensor_two.metadata_id
else:
timestamp = utcnow
metadata_id = states_meta_sensor_three.metadata_id
session.add(
States(
metadata_id=metadata_id,
state="any",
last_updated_ts=dt_util.utc_to_timestamp(timestamp),
)
)
return instance.states_meta_manager.get_many(
["sensor.one", "sensor.two", "sensor.three", "sensor.unused"],
session,
)
entity_id_to_metadata_id = await instance.async_add_executor_job(_insert_states)
test_metadata_ids = entity_id_to_metadata_id.values()
with session_scope(hass=hass) as session:
states = session.query(States).where(States.metadata_id.in_(test_metadata_ids))
states_meta = session.query(StatesMeta).where(
StatesMeta.metadata_id.in_(test_metadata_ids)
)
assert states.count() == 30
assert states_meta.count() == 4
# run purge_old_data()
finished = purge_old_data(
instance,
far_past,
repack=False,
)
assert finished
assert states.count() == 30
# We should remove the unused entity_id
assert states_meta.count() == 3
assert "sensor.unused" not in instance.event_type_manager._id_map
# we should only have 10 states left since
# only one event type was recorded now
finished = purge_old_data(
instance,
utcnow,
repack=False,
)
assert finished
assert states.count() == 10
assert states_meta.count() == 1
# Purge everything
finished = purge_old_data(
instance,
utcnow + timedelta(seconds=1),
repack=False,
)
assert finished
assert states.count() == 0
assert states_meta.count() == 0

View file

@ -18,7 +18,7 @@ from homeassistant.components import recorder
from homeassistant.components.recorder import util
from homeassistant.components.recorder.const import DOMAIN, SQLITE_URL_PREFIX
from homeassistant.components.recorder.db_schema import RecorderRuns
from homeassistant.components.recorder.history.legacy import (
from homeassistant.components.recorder.history.modern import (
_get_single_entity_states_stmt,
)
from homeassistant.components.recorder.models import (
@ -908,26 +908,25 @@ def test_execute_stmt_lambda_element(
with session_scope(hass=hass) as session:
# No time window, we always get a list
stmt = _get_single_entity_states_stmt(
instance.schema_version, dt_util.utcnow(), "sensor.on", False
)
metadata_id = instance.states_meta_manager.get("sensor.on", session)
stmt = _get_single_entity_states_stmt(dt_util.utcnow(), metadata_id, False)
rows = util.execute_stmt_lambda_element(session, stmt)
assert isinstance(rows, list)
assert rows[0].state == new_state.state
assert rows[0].entity_id == new_state.entity_id
assert rows[0].metadata_id == metadata_id
# Time window >= 2 days, we get a ChunkedIteratorResult
rows = util.execute_stmt_lambda_element(session, stmt, now, one_week_from_now)
assert isinstance(rows, ChunkedIteratorResult)
row = next(rows)
assert row.state == new_state.state
assert row.entity_id == new_state.entity_id
assert row.metadata_id == metadata_id
# Time window < 2 days, we get a list
rows = util.execute_stmt_lambda_element(session, stmt, now, tomorrow)
assert isinstance(rows, list)
assert rows[0].state == new_state.state
assert rows[0].entity_id == new_state.entity_id
assert rows[0].metadata_id == metadata_id
with patch.object(session, "execute", MockExecutor):
rows = util.execute_stmt_lambda_element(session, stmt, now, tomorrow)

View file

@ -1,5 +1,6 @@
"""The tests for recorder platform migrating data from v30."""
# pylint: disable=invalid-name
import asyncio
from datetime import timedelta
import importlib
import sys
@ -15,12 +16,12 @@ from homeassistant.components.recorder.queries import select_event_type_ids
from homeassistant.components.recorder.util import session_scope
from homeassistant.core import EVENT_STATE_CHANGED, Event, EventOrigin, State
from homeassistant.helpers import recorder as recorder_helper
from homeassistant.setup import setup_component
from homeassistant.setup import async_setup_component
import homeassistant.util.dt as dt_util
from .common import wait_recording_done
from .common import async_wait_recording_done
from tests.common import get_test_home_assistant
from tests.common import async_test_home_assistant
ORIG_TZ = dt_util.DEFAULT_TIME_ZONE
@ -50,7 +51,7 @@ def _create_engine_test(*args, **kwargs):
return engine
def test_migrate_times(caplog: pytest.LogCaptureFixture, tmpdir) -> None:
async def test_migrate_times(caplog: pytest.LogCaptureFixture, tmpdir) -> None:
"""Test we can migrate times."""
test_db_file = tmpdir.mkdir("sqlite").join("test_run_info.db")
dburl = f"{SQLITE_URL_PREFIX}//{test_db_file}"
@ -88,7 +89,9 @@ def test_migrate_times(caplog: pytest.LogCaptureFixture, tmpdir) -> None:
with patch.object(recorder, "db_schema", old_db_schema), patch.object(
recorder.migration, "SCHEMA_VERSION", old_db_schema.SCHEMA_VERSION
), patch.object(core, "EventTypes", old_db_schema.EventTypes), patch.object(
), patch.object(core, "StatesMeta", old_db_schema.StatesMeta), patch.object(
core, "EventTypes", old_db_schema.EventTypes
), patch.object(
core, "EventData", old_db_schema.EventData
), patch.object(
core, "States", old_db_schema.States
@ -96,46 +99,77 @@ def test_migrate_times(caplog: pytest.LogCaptureFixture, tmpdir) -> None:
core, "Events", old_db_schema.Events
), patch(
CREATE_ENGINE_TARGET, new=_create_engine_test
), patch(
"homeassistant.components.recorder.Recorder._migrate_context_ids",
), patch(
"homeassistant.components.recorder.Recorder._migrate_event_type_ids",
), patch(
"homeassistant.components.recorder.Recorder._migrate_entity_ids",
):
hass = get_test_home_assistant()
hass = await async_test_home_assistant(asyncio.get_running_loop())
recorder_helper.async_initialize_recorder(hass)
setup_component(hass, "recorder", {"recorder": {"db_url": dburl}})
wait_recording_done(hass)
wait_recording_done(hass)
assert await async_setup_component(
hass, "recorder", {"recorder": {"db_url": dburl}}
)
await hass.async_block_till_done()
await async_wait_recording_done(hass)
await async_wait_recording_done(hass)
with session_scope(hass=hass) as session:
session.add(old_db_schema.Events.from_event(custom_event))
session.add(old_db_schema.States.from_event(state_changed_event))
def _add_data():
with session_scope(hass=hass) as session:
session.add(old_db_schema.Events.from_event(custom_event))
session.add(old_db_schema.States.from_event(state_changed_event))
hass.stop()
await recorder.get_instance(hass).async_add_executor_job(_add_data)
await hass.async_block_till_done()
await hass.async_stop()
dt_util.DEFAULT_TIME_ZONE = ORIG_TZ
# Test that the duplicates are removed during migration from schema 23
hass = get_test_home_assistant()
hass = await async_test_home_assistant(asyncio.get_running_loop())
recorder_helper.async_initialize_recorder(hass)
setup_component(hass, "recorder", {"recorder": {"db_url": dburl}})
hass.start()
wait_recording_done(hass)
wait_recording_done(hass)
with session_scope(hass=hass) as session:
result = list(
session.query(recorder.db_schema.Events).filter(
recorder.db_schema.Events.event_type_id.in_(
select_event_type_ids(("custom_event",))
assert await async_setup_component(
hass, "recorder", {"recorder": {"db_url": dburl}}
)
await hass.async_block_till_done()
# We need to wait for all the migration tasks to complete
# before we can check the database.
for _ in range(5):
await async_wait_recording_done(hass)
def _get_test_data_from_db():
with session_scope(hass=hass) as session:
events_result = list(
session.query(recorder.db_schema.Events).filter(
recorder.db_schema.Events.event_type_id.in_(
select_event_type_ids(("custom_event",))
)
)
)
)
assert len(result) == 1
assert result[0].time_fired_ts == now_timestamp
result = list(
session.query(recorder.db_schema.States).where(
recorder.db_schema.States.entity_id == "sensor.test"
states_result = list(
session.query(recorder.db_schema.States)
.join(
recorder.db_schema.StatesMeta,
recorder.db_schema.States.metadata_id
== recorder.db_schema.StatesMeta.metadata_id,
)
.where(recorder.db_schema.StatesMeta.entity_id == "sensor.test")
)
)
assert len(result) == 1
assert result[0].last_changed_ts == one_second_past_timestamp
assert result[0].last_updated_ts == now_timestamp
session.expunge_all()
return events_result, states_result
hass.stop()
events_result, states_result = await recorder.get_instance(
hass
).async_add_executor_job(_get_test_data_from_db)
assert len(events_result) == 1
assert events_result[0].time_fired_ts == now_timestamp
assert len(states_result) == 1
assert states_result[0].last_changed_ts == one_second_past_timestamp
assert states_result[0].last_updated_ts == now_timestamp
await hass.async_stop()
dt_util.DEFAULT_TIME_ZONE = ORIG_TZ

View file

@ -18,6 +18,7 @@ from homeassistant.components.recorder import (
from homeassistant.components.recorder.db_schema import (
StateAttributes,
States,
StatesMeta,
StatisticsMeta,
)
from homeassistant.components.recorder.models import (
@ -4735,11 +4736,15 @@ async def test_exclude_attributes(recorder_mock: Recorder, hass: HomeAssistant)
def _fetch_states() -> list[State]:
with session_scope(hass=hass) as session:
native_states = []
for db_state, db_state_attributes in session.query(
States, StateAttributes
).outerjoin(
StateAttributes, States.attributes_id == StateAttributes.attributes_id
for db_state, db_state_attributes, db_states_meta in (
session.query(States, StateAttributes, StatesMeta)
.outerjoin(
StateAttributes,
States.attributes_id == StateAttributes.attributes_id,
)
.outerjoin(StatesMeta, States.metadata_id == StatesMeta.metadata_id)
):
db_state.entity_id = db_states_meta.entity_id
state = db_state.to_native()
state.attributes = db_state_attributes.to_native()
native_states.append(state)

View file

@ -1158,6 +1158,16 @@ def enable_migrate_event_type_ids() -> bool:
return False
@pytest.fixture
def enable_migrate_entity_ids() -> bool:
"""Fixture to control enabling of recorder's entity_id migration.
To enable context id migration, tests can be marked with:
@pytest.mark.parametrize("enable_migrate_entity_ids", [True])
"""
return False
@pytest.fixture
def recorder_config() -> dict[str, Any] | None:
"""Fixture to override recorder config.
@ -1221,6 +1231,9 @@ def hass_recorder(
enable_nightly_purge: bool,
enable_statistics: bool,
enable_statistics_table_validation: bool,
enable_migrate_context_ids: bool,
enable_migrate_event_type_ids: bool,
enable_migrate_entity_ids: bool,
hass_storage,
) -> Generator[Callable[..., HomeAssistant], None, None]:
"""Home Assistant fixture with in-memory recorder."""
@ -1237,6 +1250,17 @@ def hass_recorder(
if enable_statistics_table_validation
else itertools.repeat(set())
)
migrate_context_ids = (
recorder.Recorder._migrate_context_ids if enable_migrate_context_ids else None
)
migrate_event_type_ids = (
recorder.Recorder._migrate_event_type_ids
if enable_migrate_event_type_ids
else None
)
migrate_entity_ids = (
recorder.Recorder._migrate_entity_ids if enable_migrate_entity_ids else None
)
with patch(
"homeassistant.components.recorder.Recorder.async_nightly_tasks",
side_effect=nightly,
@ -1249,6 +1273,18 @@ def hass_recorder(
"homeassistant.components.recorder.migration.statistics_validate_db_schema",
side_effect=stats_validate,
autospec=True,
), patch(
"homeassistant.components.recorder.Recorder._migrate_context_ids",
side_effect=migrate_context_ids,
autospec=True,
), patch(
"homeassistant.components.recorder.Recorder._migrate_event_type_ids",
side_effect=migrate_event_type_ids,
autospec=True,
), patch(
"homeassistant.components.recorder.Recorder._migrate_entity_ids",
side_effect=migrate_entity_ids,
autospec=True,
):
def setup_recorder(config: dict[str, Any] | None = None) -> HomeAssistant:
@ -1302,6 +1338,7 @@ async def async_setup_recorder_instance(
enable_statistics_table_validation: bool,
enable_migrate_context_ids: bool,
enable_migrate_event_type_ids: bool,
enable_migrate_entity_ids: bool,
) -> AsyncGenerator[RecorderInstanceGenerator, None]:
"""Yield callable to setup recorder instance."""
# pylint: disable-next=import-outside-toplevel
@ -1325,6 +1362,9 @@ async def async_setup_recorder_instance(
if enable_migrate_event_type_ids
else None
)
migrate_entity_ids = (
recorder.Recorder._migrate_entity_ids if enable_migrate_entity_ids else None
)
with patch(
"homeassistant.components.recorder.Recorder.async_nightly_tasks",
side_effect=nightly,
@ -1345,6 +1385,10 @@ async def async_setup_recorder_instance(
"homeassistant.components.recorder.Recorder._migrate_event_type_ids",
side_effect=migrate_event_type_ids,
autospec=True,
), patch(
"homeassistant.components.recorder.Recorder._migrate_entity_ids",
side_effect=migrate_entity_ids,
autospec=True,
):
async def async_setup_recorder(