Fix performance of logbook entity and devices queries with large MySQL databases (#72898)
This commit is contained in:
parent
b97d346df7
commit
9fbde245d0
5 changed files with 93 additions and 44 deletions
|
@ -12,9 +12,11 @@ from sqlalchemy.sql.selectable import Select
|
|||
|
||||
from homeassistant.components.proximity import DOMAIN as PROXIMITY_DOMAIN
|
||||
from homeassistant.components.recorder.models import (
|
||||
EVENTS_CONTEXT_ID_INDEX,
|
||||
OLD_FORMAT_ATTRS_JSON,
|
||||
OLD_STATE,
|
||||
SHARED_ATTRS_JSON,
|
||||
STATES_CONTEXT_ID_INDEX,
|
||||
EventData,
|
||||
Events,
|
||||
StateAttributes,
|
||||
|
@ -121,9 +123,7 @@ def select_events_context_only() -> Select:
|
|||
By marking them as context_only we know they are only for
|
||||
linking context ids and we can avoid processing them.
|
||||
"""
|
||||
return select(*EVENT_ROWS_NO_STATES, CONTEXT_ONLY).outerjoin(
|
||||
EventData, (Events.data_id == EventData.data_id)
|
||||
)
|
||||
return select(*EVENT_ROWS_NO_STATES, CONTEXT_ONLY)
|
||||
|
||||
|
||||
def select_states_context_only() -> Select:
|
||||
|
@ -252,3 +252,17 @@ def _not_uom_attributes_matcher() -> ClauseList:
|
|||
return ~StateAttributes.shared_attrs.like(
|
||||
UNIT_OF_MEASUREMENT_JSON_LIKE
|
||||
) | ~States.attributes.like(UNIT_OF_MEASUREMENT_JSON_LIKE)
|
||||
|
||||
|
||||
def apply_states_context_hints(query: Query) -> Query:
|
||||
"""Force mysql to use the right index on large context_id selects."""
|
||||
return query.with_hint(
|
||||
States, f"FORCE INDEX ({STATES_CONTEXT_ID_INDEX})", dialect_name="mysql"
|
||||
)
|
||||
|
||||
|
||||
def apply_events_context_hints(query: Query) -> Query:
|
||||
"""Force mysql to use the right index on large context_id selects."""
|
||||
return query.with_hint(
|
||||
Events, f"FORCE INDEX ({EVENTS_CONTEXT_ID_INDEX})", dialect_name="mysql"
|
||||
)
|
||||
|
|
|
@ -4,15 +4,22 @@ from __future__ import annotations
|
|||
from collections.abc import Iterable
|
||||
from datetime import datetime as dt
|
||||
|
||||
from sqlalchemy import lambda_stmt, select, union_all
|
||||
from sqlalchemy import lambda_stmt, select
|
||||
from sqlalchemy.orm import Query
|
||||
from sqlalchemy.sql.elements import ClauseList
|
||||
from sqlalchemy.sql.lambdas import StatementLambdaElement
|
||||
from sqlalchemy.sql.selectable import CTE, CompoundSelect
|
||||
|
||||
from homeassistant.components.recorder.models import DEVICE_ID_IN_EVENT, Events, States
|
||||
from homeassistant.components.recorder.models import (
|
||||
DEVICE_ID_IN_EVENT,
|
||||
EventData,
|
||||
Events,
|
||||
States,
|
||||
)
|
||||
|
||||
from .common import (
|
||||
apply_events_context_hints,
|
||||
apply_states_context_hints,
|
||||
select_events_context_id_subquery,
|
||||
select_events_context_only,
|
||||
select_events_without_states,
|
||||
|
@ -27,13 +34,10 @@ def _select_device_id_context_ids_sub_query(
|
|||
json_quotable_device_ids: list[str],
|
||||
) -> CompoundSelect:
|
||||
"""Generate a subquery to find context ids for multiple devices."""
|
||||
return select(
|
||||
union_all(
|
||||
select_events_context_id_subquery(start_day, end_day, event_types).where(
|
||||
apply_event_device_id_matchers(json_quotable_device_ids)
|
||||
),
|
||||
).c.context_id
|
||||
inner = select_events_context_id_subquery(start_day, end_day, event_types).where(
|
||||
apply_event_device_id_matchers(json_quotable_device_ids)
|
||||
)
|
||||
return select(inner.c.context_id).group_by(inner.c.context_id)
|
||||
|
||||
|
||||
def _apply_devices_context_union(
|
||||
|
@ -51,8 +55,16 @@ def _apply_devices_context_union(
|
|||
json_quotable_device_ids,
|
||||
).cte()
|
||||
return query.union_all(
|
||||
select_events_context_only().where(Events.context_id.in_(devices_cte.select())),
|
||||
select_states_context_only().where(States.context_id.in_(devices_cte.select())),
|
||||
apply_events_context_hints(
|
||||
select_events_context_only()
|
||||
.select_from(devices_cte)
|
||||
.outerjoin(Events, devices_cte.c.context_id == Events.context_id)
|
||||
).outerjoin(EventData, (Events.data_id == EventData.data_id)),
|
||||
apply_states_context_hints(
|
||||
select_states_context_only()
|
||||
.select_from(devices_cte)
|
||||
.outerjoin(States, devices_cte.c.context_id == States.context_id)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -14,11 +14,14 @@ from homeassistant.components.recorder.models import (
|
|||
ENTITY_ID_IN_EVENT,
|
||||
ENTITY_ID_LAST_UPDATED_INDEX,
|
||||
OLD_ENTITY_ID_IN_EVENT,
|
||||
EventData,
|
||||
Events,
|
||||
States,
|
||||
)
|
||||
|
||||
from .common import (
|
||||
apply_events_context_hints,
|
||||
apply_states_context_hints,
|
||||
apply_states_filters,
|
||||
select_events_context_id_subquery,
|
||||
select_events_context_only,
|
||||
|
@ -36,16 +39,15 @@ def _select_entities_context_ids_sub_query(
|
|||
json_quotable_entity_ids: list[str],
|
||||
) -> CompoundSelect:
|
||||
"""Generate a subquery to find context ids for multiple entities."""
|
||||
return select(
|
||||
union_all(
|
||||
select_events_context_id_subquery(start_day, end_day, event_types).where(
|
||||
apply_event_entity_id_matchers(json_quotable_entity_ids)
|
||||
),
|
||||
apply_entities_hints(select(States.context_id))
|
||||
.filter((States.last_updated > start_day) & (States.last_updated < end_day))
|
||||
.where(States.entity_id.in_(entity_ids)),
|
||||
).c.context_id
|
||||
union = union_all(
|
||||
select_events_context_id_subquery(start_day, end_day, event_types).where(
|
||||
apply_event_entity_id_matchers(json_quotable_entity_ids)
|
||||
),
|
||||
apply_entities_hints(select(States.context_id))
|
||||
.filter((States.last_updated > start_day) & (States.last_updated < end_day))
|
||||
.where(States.entity_id.in_(entity_ids)),
|
||||
)
|
||||
return select(union.c.context_id).group_by(union.c.context_id)
|
||||
|
||||
|
||||
def _apply_entities_context_union(
|
||||
|
@ -64,14 +66,23 @@ def _apply_entities_context_union(
|
|||
entity_ids,
|
||||
json_quotable_entity_ids,
|
||||
).cte()
|
||||
# We used to optimize this to exclude rows we already in the union with
|
||||
# a States.entity_id.not_in(entity_ids) but that made the
|
||||
# query much slower on MySQL, and since we already filter them away
|
||||
# in the python code anyways since they will have context_only
|
||||
# set on them the impact is minimal.
|
||||
return query.union_all(
|
||||
states_query_for_entity_ids(start_day, end_day, entity_ids),
|
||||
select_events_context_only().where(
|
||||
Events.context_id.in_(entities_cte.select())
|
||||
apply_events_context_hints(
|
||||
select_events_context_only()
|
||||
.select_from(entities_cte)
|
||||
.outerjoin(Events, entities_cte.c.context_id == Events.context_id)
|
||||
).outerjoin(EventData, (Events.data_id == EventData.data_id)),
|
||||
apply_states_context_hints(
|
||||
select_states_context_only()
|
||||
.select_from(entities_cte)
|
||||
.outerjoin(States, entities_cte.c.context_id == States.context_id)
|
||||
),
|
||||
select_states_context_only()
|
||||
.where(States.entity_id.not_in(entity_ids))
|
||||
.where(States.context_id.in_(entities_cte.select())),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -10,9 +10,11 @@ from sqlalchemy.orm import Query
|
|||
from sqlalchemy.sql.lambdas import StatementLambdaElement
|
||||
from sqlalchemy.sql.selectable import CTE, CompoundSelect
|
||||
|
||||
from homeassistant.components.recorder.models import Events, States
|
||||
from homeassistant.components.recorder.models import EventData, Events, States
|
||||
|
||||
from .common import (
|
||||
apply_events_context_hints,
|
||||
apply_states_context_hints,
|
||||
select_events_context_id_subquery,
|
||||
select_events_context_only,
|
||||
select_events_without_states,
|
||||
|
@ -35,18 +37,17 @@ def _select_entities_device_id_context_ids_sub_query(
|
|||
json_quotable_device_ids: list[str],
|
||||
) -> CompoundSelect:
|
||||
"""Generate a subquery to find context ids for multiple entities and multiple devices."""
|
||||
return select(
|
||||
union_all(
|
||||
select_events_context_id_subquery(start_day, end_day, event_types).where(
|
||||
_apply_event_entity_id_device_id_matchers(
|
||||
json_quotable_entity_ids, json_quotable_device_ids
|
||||
)
|
||||
),
|
||||
apply_entities_hints(select(States.context_id))
|
||||
.filter((States.last_updated > start_day) & (States.last_updated < end_day))
|
||||
.where(States.entity_id.in_(entity_ids)),
|
||||
).c.context_id
|
||||
union = union_all(
|
||||
select_events_context_id_subquery(start_day, end_day, event_types).where(
|
||||
_apply_event_entity_id_device_id_matchers(
|
||||
json_quotable_entity_ids, json_quotable_device_ids
|
||||
)
|
||||
),
|
||||
apply_entities_hints(select(States.context_id))
|
||||
.filter((States.last_updated > start_day) & (States.last_updated < end_day))
|
||||
.where(States.entity_id.in_(entity_ids)),
|
||||
)
|
||||
return select(union.c.context_id).group_by(union.c.context_id)
|
||||
|
||||
|
||||
def _apply_entities_devices_context_union(
|
||||
|
@ -66,14 +67,23 @@ def _apply_entities_devices_context_union(
|
|||
json_quotable_entity_ids,
|
||||
json_quotable_device_ids,
|
||||
).cte()
|
||||
# We used to optimize this to exclude rows we already in the union with
|
||||
# a States.entity_id.not_in(entity_ids) but that made the
|
||||
# query much slower on MySQL, and since we already filter them away
|
||||
# in the python code anyways since they will have context_only
|
||||
# set on them the impact is minimal.
|
||||
return query.union_all(
|
||||
states_query_for_entity_ids(start_day, end_day, entity_ids),
|
||||
select_events_context_only().where(
|
||||
Events.context_id.in_(devices_entities_cte.select())
|
||||
apply_events_context_hints(
|
||||
select_events_context_only()
|
||||
.select_from(devices_entities_cte)
|
||||
.outerjoin(Events, devices_entities_cte.c.context_id == Events.context_id)
|
||||
).outerjoin(EventData, (Events.data_id == EventData.data_id)),
|
||||
apply_states_context_hints(
|
||||
select_states_context_only()
|
||||
.select_from(devices_entities_cte)
|
||||
.outerjoin(States, devices_entities_cte.c.context_id == States.context_id)
|
||||
),
|
||||
select_states_context_only()
|
||||
.where(States.entity_id.not_in(entity_ids))
|
||||
.where(States.context_id.in_(devices_entities_cte.select())),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -93,6 +93,8 @@ TABLES_TO_CHECK = [
|
|||
|
||||
LAST_UPDATED_INDEX = "ix_states_last_updated"
|
||||
ENTITY_ID_LAST_UPDATED_INDEX = "ix_states_entity_id_last_updated"
|
||||
EVENTS_CONTEXT_ID_INDEX = "ix_events_context_id"
|
||||
STATES_CONTEXT_ID_INDEX = "ix_states_context_id"
|
||||
|
||||
EMPTY_JSON_OBJECT = "{}"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue