From 6af1a835e6e583e341213067a0ae0886b5e49b01 Mon Sep 17 00:00:00 2001 From: Erik Montnemery Date: Thu, 30 Sep 2021 17:14:36 +0200 Subject: [PATCH] Optimize statistics generation (#56821) * Optimize statistics generation * pylint --- homeassistant/components/history/__init__.py | 24 ++++----- homeassistant/components/recorder/history.py | 6 +-- .../components/recorder/migration.py | 4 +- .../components/recorder/statistics.py | 47 ++++++++-------- homeassistant/components/sensor/recorder.py | 54 ++++++++++++++----- tests/components/sensor/test_recorder.py | 30 +++++++---- 6 files changed, 101 insertions(+), 64 deletions(-) diff --git a/homeassistant/components/history/__init__.py b/homeassistant/components/history/__init__.py index 3ae71602dc1..7c3087d471f 100644 --- a/homeassistant/components/history/__init__.py +++ b/homeassistant/components/history/__init__.py @@ -54,7 +54,7 @@ CONFIG_SCHEMA = vol.Schema( @deprecated_function("homeassistant.components.recorder.history.get_significant_states") def get_significant_states(hass, *args, **kwargs): - """Wrap _get_significant_states with an sql session.""" + """Wrap get_significant_states_with_session with an sql session.""" return history.get_significant_states(hass, *args, **kwargs) @@ -268,18 +268,16 @@ class HistoryPeriodView(HomeAssistantView): timer_start = time.perf_counter() with session_scope(hass=hass) as session: - result = ( - history._get_significant_states( # pylint: disable=protected-access - hass, - session, - start_time, - end_time, - entity_ids, - self.filters, - include_start_time_state, - significant_changes_only, - minimal_response, - ) + result = history.get_significant_states_with_session( + hass, + session, + start_time, + end_time, + entity_ids, + self.filters, + include_start_time_state, + significant_changes_only, + minimal_response, ) result = list(result.values()) diff --git a/homeassistant/components/recorder/history.py b/homeassistant/components/recorder/history.py index 36a4f6d0696..72f820a0d3b 100644 --- a/homeassistant/components/recorder/history.py +++ b/homeassistant/components/recorder/history.py @@ -61,12 +61,12 @@ def async_setup(hass): def get_significant_states(hass, *args, **kwargs): - """Wrap _get_significant_states with a sql session.""" + """Wrap get_significant_states_with_session with an sql session.""" with session_scope(hass=hass) as session: - return _get_significant_states(hass, session, *args, **kwargs) + return get_significant_states_with_session(hass, session, *args, **kwargs) -def _get_significant_states( +def get_significant_states_with_session( hass, session, start_time, diff --git a/homeassistant/components/recorder/migration.py b/homeassistant/components/recorder/migration.py index 0c9b7d767d8..0d40707d825 100644 --- a/homeassistant/components/recorder/migration.py +++ b/homeassistant/components/recorder/migration.py @@ -24,7 +24,7 @@ from .models import ( StatisticsShortTerm, process_timestamp, ) -from .statistics import _get_metadata, get_start_time +from .statistics import get_metadata_with_session, get_start_time from .util import session_scope _LOGGER = logging.getLogger(__name__) @@ -564,7 +564,7 @@ def _apply_update(instance, session, new_version, old_version): # noqa: C901 fake_start_time += timedelta(minutes=5) # Copy last hourly statistic to the newly created 5-minute statistics table - sum_statistics = _get_metadata( + sum_statistics = get_metadata_with_session( instance.hass, session, None, statistic_type="sum" ) for metadata_id in sum_statistics: diff --git a/homeassistant/components/recorder/statistics.py b/homeassistant/components/recorder/statistics.py index 5a35625e837..7b7e349b843 100644 --- a/homeassistant/components/recorder/statistics.py +++ b/homeassistant/components/recorder/statistics.py @@ -195,7 +195,7 @@ def _update_or_add_metadata( Updating metadata source is not possible. """ statistic_id = new_metadata["statistic_id"] - old_metadata_dict = _get_metadata(hass, session, [statistic_id], None) + old_metadata_dict = get_metadata_with_session(hass, session, [statistic_id], None) if not old_metadata_dict: unit = new_metadata["unit_of_measurement"] has_mean = new_metadata["has_mean"] @@ -210,7 +210,7 @@ def _update_or_add_metadata( ) return meta.id # type: ignore[no-any-return] - metadata_id, old_metadata = next(iter(old_metadata_dict.items())) + metadata_id, old_metadata = old_metadata_dict[statistic_id] if ( old_metadata["has_mean"] != new_metadata["has_mean"] or old_metadata["has_sum"] != new_metadata["has_sum"] @@ -361,13 +361,15 @@ def compile_statistics(instance: Recorder, start: datetime) -> bool: return True -def _get_metadata( +def get_metadata_with_session( hass: HomeAssistant, session: scoped_session, - statistic_ids: list[str] | None, + statistic_ids: Iterable[str] | None, statistic_type: Literal["mean"] | Literal["sum"] | None, -) -> dict[int, StatisticMetaData]: - """Fetch meta data, returns a dict of StatisticMetaData indexed by statistic_id. +) -> dict[str, tuple[int, StatisticMetaData]]: + """Fetch meta data. + + Returns a dict of (metadata_id, StatisticMetaData) indexed by statistic_id. If statistic_ids is given, fetch metadata only for the listed statistics_ids. If statistic_type is given, fetch metadata only for statistic_ids supporting it. @@ -403,24 +405,21 @@ def _get_metadata( metadata_ids = [metadata[0] for metadata in result] # Prepare the result dict - metadata: dict[int, StatisticMetaData] = {} + metadata: dict[str, tuple[int, StatisticMetaData]] = {} for _id in metadata_ids: meta = _meta(result, _id) if meta: - metadata[_id] = meta + metadata[meta["statistic_id"]] = (_id, meta) return metadata def get_metadata( hass: HomeAssistant, - statistic_id: str, -) -> StatisticMetaData | None: - """Return metadata for a statistic_id.""" + statistic_ids: Iterable[str], +) -> dict[str, tuple[int, StatisticMetaData]]: + """Return metadata for statistic_ids.""" with session_scope(hass=hass) as session: - metadata = _get_metadata(hass, session, [statistic_id], None) - if not metadata: - return None - return next(iter(metadata.values())) + return get_metadata_with_session(hass, session, statistic_ids, None) def _configured_unit(unit: str, units: UnitSystem) -> str: @@ -469,9 +468,9 @@ def list_statistic_ids( # Query the database with session_scope(hass=hass) as session: - metadata = _get_metadata(hass, session, None, statistic_type) + metadata = get_metadata_with_session(hass, session, None, statistic_type) - for meta in metadata.values(): + for _, meta in metadata.values(): unit = meta["unit_of_measurement"] if unit is not None: # Display unit according to user settings @@ -480,7 +479,7 @@ def list_statistic_ids( statistic_ids = { meta["statistic_id"]: meta["unit_of_measurement"] - for meta in metadata.values() + for _, meta in metadata.values() } # Query all integrations with a registered recorder platform @@ -548,13 +547,13 @@ def statistics_during_period( metadata = None with session_scope(hass=hass) as session: # Fetch metadata for the given (or all) statistic_ids - metadata = _get_metadata(hass, session, statistic_ids, None) + metadata = get_metadata_with_session(hass, session, statistic_ids, None) if not metadata: return {} metadata_ids = None if statistic_ids is not None: - metadata_ids = list(metadata.keys()) + metadata_ids = [metadata_id for metadata_id, _ in metadata.values()] if period == "hour": bakery = STATISTICS_BAKERY @@ -589,7 +588,7 @@ def get_last_statistics( statistic_ids = [statistic_id] with session_scope(hass=hass) as session: # Fetch metadata for the given statistic_id - metadata = _get_metadata(hass, session, statistic_ids, None) + metadata = get_metadata_with_session(hass, session, statistic_ids, None) if not metadata: return {} @@ -598,7 +597,7 @@ def get_last_statistics( ) baked_query += lambda q: q.filter_by(metadata_id=bindparam("metadata_id")) - metadata_id = next(iter(metadata.keys())) + metadata_id = metadata[statistic_id][0] baked_query += lambda q: q.order_by( StatisticsShortTerm.metadata_id, StatisticsShortTerm.start.desc() @@ -629,7 +628,7 @@ def _sorted_statistics_to_dict( hass: HomeAssistant, stats: list, statistic_ids: list[str] | None, - metadata: dict[int, StatisticMetaData], + _metadata: dict[str, tuple[int, StatisticMetaData]], convert_units: bool, duration: timedelta, ) -> dict[str, list[dict]]: @@ -646,6 +645,8 @@ def _sorted_statistics_to_dict( for stat_id in statistic_ids: result[stat_id] = [] + metadata = dict(_metadata.values()) + # Append all statistic entries, and optionally do unit conversion for meta_id, group in groupby(stats, lambda stat: stat.metadata_id): # type: ignore unit = metadata[meta_id]["unit_of_measurement"] diff --git a/homeassistant/components/sensor/recorder.py b/homeassistant/components/sensor/recorder.py index 3f3e88a0166..fd6cf5e0f2f 100644 --- a/homeassistant/components/sensor/recorder.py +++ b/homeassistant/components/sensor/recorder.py @@ -8,7 +8,14 @@ import itertools import logging import math -from homeassistant.components.recorder import history, is_entity_recorded, statistics +from sqlalchemy.orm.session import Session + +from homeassistant.components.recorder import ( + history, + is_entity_recorded, + statistics, + util as recorder_util, +) from homeassistant.components.recorder.models import ( StatisticData, StatisticMetaData, @@ -196,6 +203,8 @@ def _parse_float(state: str) -> float: def _normalize_states( hass: HomeAssistant, + session: Session, + old_metadatas: dict[str, tuple[int, StatisticMetaData]], entity_history: Iterable[State], device_class: str | None, entity_id: str, @@ -221,10 +230,10 @@ def _normalize_states( if entity_id not in hass.data[WARN_UNSTABLE_UNIT]: hass.data[WARN_UNSTABLE_UNIT].add(entity_id) extra = "" - if old_metadata := statistics.get_metadata(hass, entity_id): + if old_metadata := old_metadatas.get(entity_id): extra = ( " and matches the unit of already compiled statistics " - f"({old_metadata['unit_of_measurement']})" + f"({old_metadata[1]['unit_of_measurement']})" ) _LOGGER.warning( "The unit of %s is changing, got multiple %s, generation of long term " @@ -368,17 +377,32 @@ def _last_reset_as_utc_isoformat( return dt_util.as_utc(last_reset).isoformat() -def compile_statistics( # noqa: C901 +def compile_statistics( hass: HomeAssistant, start: datetime.datetime, end: datetime.datetime ) -> list[StatisticResult]: """Compile statistics for all entities during start-end. Note: This will query the database and must not be run in the event loop """ + with recorder_util.session_scope(hass=hass) as session: + result = _compile_statistics(hass, session, start, end) + return result + + +def _compile_statistics( # noqa: C901 + hass: HomeAssistant, + session: Session, + start: datetime.datetime, + end: datetime.datetime, +) -> list[StatisticResult]: + """Compile statistics for all entities during start-end.""" result: list[StatisticResult] = [] sensor_states = _get_sensor_states(hass) wanted_statistics = _wanted_statistics(sensor_states) + old_metadatas = statistics.get_metadata_with_session( + hass, session, [i.entity_id for i in sensor_states], None + ) # Get history between start and end entities_full_history = [ @@ -386,8 +410,9 @@ def compile_statistics( # noqa: C901 ] history_list = {} if entities_full_history: - history_list = history.get_significant_states( # type: ignore + history_list = history.get_significant_states_with_session( # type: ignore hass, + session, start - datetime.timedelta.resolution, end, entity_ids=entities_full_history, @@ -399,8 +424,9 @@ def compile_statistics( # noqa: C901 if "sum" not in wanted_statistics[i.entity_id] ] if entities_significant_history: - _history_list = history.get_significant_states( # type: ignore + _history_list = history.get_significant_states_with_session( # type: ignore hass, + session, start - datetime.timedelta.resolution, end, entity_ids=entities_significant_history, @@ -420,14 +446,16 @@ def compile_statistics( # noqa: C901 state_class = _state.attributes[ATTR_STATE_CLASS] device_class = _state.attributes.get(ATTR_DEVICE_CLASS) entity_history = history_list[entity_id] - unit, fstates = _normalize_states(hass, entity_history, device_class, entity_id) + unit, fstates = _normalize_states( + hass, session, old_metadatas, entity_history, device_class, entity_id + ) if not fstates: continue # Check metadata - if old_metadata := statistics.get_metadata(hass, entity_id): - if old_metadata["unit_of_measurement"] != unit: + if old_metadata := old_metadatas.get(entity_id): + if old_metadata[1]["unit_of_measurement"] != unit: if WARN_UNSTABLE_UNIT not in hass.data: hass.data[WARN_UNSTABLE_UNIT] = set() if entity_id not in hass.data[WARN_UNSTABLE_UNIT]: @@ -438,8 +466,8 @@ def compile_statistics( # noqa: C901 "will be suppressed unless the unit changes back to %s", entity_id, unit, - old_metadata["unit_of_measurement"], - old_metadata["unit_of_measurement"], + old_metadata[1]["unit_of_measurement"], + old_metadata[1]["unit_of_measurement"], ) continue @@ -617,10 +645,10 @@ def validate_statistics( state_unit = state.attributes.get(ATTR_UNIT_OF_MEASUREMENT) if device_class not in UNIT_CONVERSIONS: - metadata = statistics.get_metadata(hass, entity_id) + metadata = statistics.get_metadata(hass, (entity_id,)) if not metadata: continue - metadata_unit = metadata["unit_of_measurement"] + metadata_unit = metadata[entity_id][1]["unit_of_measurement"] if state_unit != metadata_unit: validation_result[entity_id].append( statistics.ValidationIssue( diff --git a/tests/components/sensor/test_recorder.py b/tests/components/sensor/test_recorder.py index 9baa0aaf460..f13dc5084bb 100644 --- a/tests/components/sensor/test_recorder.py +++ b/tests/components/sensor/test_recorder.py @@ -1725,12 +1725,17 @@ def test_compile_hourly_statistics_changing_statistics( assert statistic_ids == [ {"statistic_id": "sensor.test1", "unit_of_measurement": None} ] - metadata = get_metadata(hass, "sensor.test1") + metadata = get_metadata(hass, ("sensor.test1",)) assert metadata == { - "has_mean": True, - "has_sum": False, - "statistic_id": "sensor.test1", - "unit_of_measurement": None, + "sensor.test1": ( + 1, + { + "has_mean": True, + "has_sum": False, + "statistic_id": "sensor.test1", + "unit_of_measurement": None, + }, + ) } # Add more states, with changed state class @@ -1745,12 +1750,17 @@ def test_compile_hourly_statistics_changing_statistics( assert statistic_ids == [ {"statistic_id": "sensor.test1", "unit_of_measurement": None} ] - metadata = get_metadata(hass, "sensor.test1") + metadata = get_metadata(hass, ("sensor.test1",)) assert metadata == { - "has_mean": False, - "has_sum": True, - "statistic_id": "sensor.test1", - "unit_of_measurement": None, + "sensor.test1": ( + 1, + { + "has_mean": False, + "has_sum": True, + "statistic_id": "sensor.test1", + "unit_of_measurement": None, + }, + ) } stats = statistics_during_period(hass, period0, period="5minute") assert stats == {