Deduplicate entity_id in the states table (#89557)

This commit is contained in:
J. Nick Koston 2023-03-12 10:01:58 -10:00 committed by GitHub
parent 8d88b02c2e
commit c41f91be89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3715 additions and 1018 deletions

View file

@ -1,7 +1,7 @@
"""Entities queries for logbook."""
from __future__ import annotations
from collections.abc import Iterable
from collections.abc import Collection, Iterable
import sqlalchemy
from sqlalchemy import lambda_stmt, select, union_all
@ -11,12 +11,13 @@ from sqlalchemy.sql.selectable import CTE, CompoundSelect, Select
from homeassistant.components.recorder.db_schema import (
ENTITY_ID_IN_EVENT,
ENTITY_ID_LAST_UPDATED_INDEX_TS,
METADATA_ID_LAST_UPDATED_INDEX_TS,
OLD_ENTITY_ID_IN_EVENT,
EventData,
Events,
EventTypes,
States,
StatesMeta,
)
from .common import (
@ -35,7 +36,7 @@ def _select_entities_context_ids_sub_query(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
) -> Select:
"""Generate a subquery to find context ids for multiple entities."""
@ -47,7 +48,7 @@ def _select_entities_context_ids_sub_query(
.filter(
(States.last_updated_ts > start_day) & (States.last_updated_ts < end_day)
)
.where(States.entity_id.in_(entity_ids)),
.where(States.metadata_id.in_(states_metadata_ids)),
).subquery()
return select(union.c.context_id_bin).group_by(union.c.context_id_bin)
@ -57,7 +58,7 @@ def _apply_entities_context_union(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
) -> CompoundSelect:
"""Generate a CTE to find the entity and device context ids and a query to find linked row."""
@ -65,16 +66,16 @@ def _apply_entities_context_union(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids,
json_quoted_entity_ids,
).cte()
# We used to optimize this to exclude rows we already in the union with
# a States.entity_id.not_in(entity_ids) but that made the
# a StatesMeta.metadata_ids.not_in(states_metadata_ids) but that made the
# query much slower on MySQL, and since we already filter them away
# in the python code anyways since they will have context_only
# set on them the impact is minimal.
return sel.union_all(
states_select_for_entity_ids(start_day, end_day, entity_ids),
states_select_for_entity_ids(start_day, end_day, states_metadata_ids),
apply_events_context_hints(
select_events_context_only()
.select_from(entities_cte)
@ -86,6 +87,7 @@ def _apply_entities_context_union(
select_states_context_only()
.select_from(entities_cte)
.outerjoin(States, entities_cte.c.context_id_bin == States.context_id_bin)
.outerjoin(StatesMeta, (States.metadata_id == StatesMeta.metadata_id))
),
)
@ -94,7 +96,7 @@ def entities_stmt(
start_day: float,
end_day: float,
event_types: tuple[str, ...],
entity_ids: list[str],
states_metadata_ids: Collection[int],
json_quoted_entity_ids: list[str],
) -> StatementLambdaElement:
"""Generate a logbook query for multiple entities."""
@ -106,19 +108,19 @@ def entities_stmt(
start_day,
end_day,
event_types,
entity_ids,
states_metadata_ids,
json_quoted_entity_ids,
).order_by(Events.time_fired_ts)
)
def states_select_for_entity_ids(
start_day: float, end_day: float, entity_ids: list[str]
start_day: float, end_day: float, states_metadata_ids: Collection[int]
) -> Select:
"""Generate a select for states from the States table for specific entities."""
return apply_states_filters(
apply_entities_hints(select_states()), start_day, end_day
).where(States.entity_id.in_(entity_ids))
).where(States.metadata_id.in_(states_metadata_ids))
def apply_event_entity_id_matchers(
@ -140,9 +142,11 @@ def apply_event_entity_id_matchers(
def apply_entities_hints(sel: Select) -> Select:
"""Force mysql to use the right index on large selects."""
return sel.with_hint(
States, f"FORCE INDEX ({ENTITY_ID_LAST_UPDATED_INDEX_TS})", dialect_name="mysql"
States,
f"FORCE INDEX ({METADATA_ID_LAST_UPDATED_INDEX_TS})",
dialect_name="mysql",
).with_hint(
States,
f"FORCE INDEX ({ENTITY_ID_LAST_UPDATED_INDEX_TS})",
f"FORCE INDEX ({METADATA_ID_LAST_UPDATED_INDEX_TS})",
dialect_name="mariadb",
)