Recover from previously failed statistics migrations (#101781)

* Handle statistics columns being unmigrated from previous downgrades If the user downgraded HA from 2023.3.x to an older version without restoring the database and they upgrade again with the same database they will have unmigrated statistics columns since we only migrate them once. As its expensive to check, we do not want to check every time at startup, so we will only do this one more time since the risk that someone will downgrade to an older version is very low at this point. * add guard to sqlite to prevent re-migrate * test * move test to insert with old schema * use helper * normalize timestamps * remove * add check * add fallback migration * add fallback migration * commit * remove useless logging * remove useless logging * do the other columns at the same time * coverage * dry * comment * Update tests/components/recorder/test_migration_from_schema_32.py
2023-10-22 17:34:43 -10:00 · 2023-10-22 17:34:43 -10:00 · 268425b5e3
commit 268425b5e3
parent 4ee9a6f130
6 changed files with 819 additions and 53 deletions
--- a/homeassistant/components/recorder/migration.py
+++ b/homeassistant/components/recorder/migration.py
@ -68,13 +68,20 @@ from .db_schema import (
    StatisticsShortTerm,
 )
 from .models import process_timestamp
+from .models.time import datetime_to_timestamp_or_none
 from .queries import (
    batch_cleanup_entity_ids,
+    delete_duplicate_short_term_statistics_row,
+    delete_duplicate_statistics_row,
    find_entity_ids_to_migrate,
    find_event_type_to_migrate,
    find_events_context_ids_to_migrate,
    find_states_context_ids_to_migrate,
+    find_unmigrated_short_term_statistics_rows,
+    find_unmigrated_statistics_rows,
    has_used_states_event_ids,
+    migrate_single_short_term_statistics_row_to_timestamp,
+    migrate_single_statistics_row_to_timestamp,
 )
 from .statistics import get_start_time
 from .tasks import (
@ -950,26 +957,9 @@ def _apply_update(  # noqa: C901
            "statistics_short_term",
            "ix_statistics_short_term_statistic_id_start_ts",
        )
-        try:
-            _migrate_statistics_columns_to_timestamp(instance, session_maker, engine)
-        except IntegrityError as ex:
-            _LOGGER.error(
-                "Statistics table contains duplicate entries: %s; "
-                "Cleaning up duplicates and trying again; "
-                "This will take a while; "
-                "Please be patient!",
-                ex,
-            )
-            # There may be duplicated statistics entries, delete duplicates
-            # and try again
-            with session_scope(session=session_maker()) as session:
-                delete_statistics_duplicates(instance, hass, session)
-            _migrate_statistics_columns_to_timestamp(instance, session_maker, engine)
-            # Log at error level to ensure the user sees this message in the log
-            # since we logged the error above.
-            _LOGGER.error(
-                "Statistics migration successfully recovered after statistics table duplicate cleanup"
-            )
+        _migrate_statistics_columns_to_timestamp_removing_duplicates(
+            hass, instance, session_maker, engine
+        )
    elif new_version == 35:
        # Migration is done in two steps to ensure we can start using
        # the new columns before we wipe the old ones.
@ -1060,10 +1050,55 @@ def _apply_update(  # noqa: C901
    elif new_version == 41:
        _create_index(session_maker, "event_types", "ix_event_types_event_type")
        _create_index(session_maker, "states_meta", "ix_states_meta_entity_id")
+    elif new_version == 42:
+        # If the user had a previously failed migration, or they
+        # downgraded from 2023.3.x to an older version we will have
+        # unmigrated statistics columns so we want to clean this up
+        # one last time since compiling the statistics will be slow
+        # or fail if we have unmigrated statistics.
+        _migrate_statistics_columns_to_timestamp_removing_duplicates(
+            hass, instance, session_maker, engine
+        )
    else:
        raise ValueError(f"No schema migration defined for version {new_version}")


+def _migrate_statistics_columns_to_timestamp_removing_duplicates(
+    hass: HomeAssistant,
+    instance: Recorder,
+    session_maker: Callable[[], Session],
+    engine: Engine,
+) -> None:
+    """Migrate statistics columns to timestamp or cleanup duplicates."""
+    try:
+        _migrate_statistics_columns_to_timestamp(instance, session_maker, engine)
+    except IntegrityError as ex:
+        _LOGGER.error(
+            "Statistics table contains duplicate entries: %s; "
+            "Cleaning up duplicates and trying again; "
+            "This will take a while; "
+            "Please be patient!",
+            ex,
+        )
+        # There may be duplicated statistics entries, delete duplicates
+        # and try again
+        with session_scope(session=session_maker()) as session:
+            delete_statistics_duplicates(instance, hass, session)
+        try:
+            _migrate_statistics_columns_to_timestamp(instance, session_maker, engine)
+        except IntegrityError:
+            _LOGGER.warning(
+                "Statistics table still contains duplicate entries after cleanup; "
+                "Falling back to a one by one migration"
+            )
+            _migrate_statistics_columns_to_timestamp_one_by_one(instance, session_maker)
+        # Log at error level to ensure the user sees this message in the log
+        # since we logged the error above.
+        _LOGGER.error(
+            "Statistics migration successfully recovered after statistics table duplicate cleanup"
+        )
+
+
 def _correct_table_character_set_and_collation(
    table: str,
    session_maker: Callable[[], Session],
@ -1269,6 +1304,59 @@ def _migrate_columns_to_timestamp(
                )


+@database_job_retry_wrapper("Migrate statistics columns to timestamp one by one", 3)
+def _migrate_statistics_columns_to_timestamp_one_by_one(
+    instance: Recorder, session_maker: Callable[[], Session]
+) -> None:
+    """Migrate statistics columns to use timestamp on by one.
+
+    If something manually inserted data into the statistics table
+    in the past it may have inserted duplicate rows.
+
+    Before we had the unique index on (statistic_id, start) this
+    the data could have been inserted without any errors and we
+    could end up with duplicate rows that go undetected (even by
+    our current duplicate cleanup code) until we try to migrate the
+    data to use timestamps.
+
+    This will migrate the data one by one to ensure we do not hit any
+    duplicate rows, and remove the duplicate rows as they are found.
+    """
+    for find_func, migrate_func, delete_func in (
+        (
+            find_unmigrated_statistics_rows,
+            migrate_single_statistics_row_to_timestamp,
+            delete_duplicate_statistics_row,
+        ),
+        (
+            find_unmigrated_short_term_statistics_rows,
+            migrate_single_short_term_statistics_row_to_timestamp,
+            delete_duplicate_short_term_statistics_row,
+        ),
+    ):
+        with session_scope(session=session_maker()) as session:
+            while stats := session.execute(find_func(instance.max_bind_vars)).all():
+                for statistic_id, start, created, last_reset in stats:
+                    start_ts = datetime_to_timestamp_or_none(process_timestamp(start))
+                    created_ts = datetime_to_timestamp_or_none(
+                        process_timestamp(created)
+                    )
+                    last_reset_ts = datetime_to_timestamp_or_none(
+                        process_timestamp(last_reset)
+                    )
+                    try:
+                        session.execute(
+                            migrate_func(
+                                statistic_id, start_ts, created_ts, last_reset_ts
+                            )
+                        )
+                    except IntegrityError:
+                        # This can happen if we have duplicate rows
+                        # in the statistics table.
+                        session.execute(delete_func(statistic_id))
+                session.commit()
+
+
@database_job_retry_wrapper("Migrate statistics columns to timestamp", 3)
 def _migrate_statistics_columns_to_timestamp(
    instance: Recorder, session_maker: Callable[[], Session], engine: Engine
@ -1292,7 +1380,7 @@ def _migrate_statistics_columns_to_timestamp(
                        f"created_ts=strftime('%s',created) + "
                        "cast(substr(created,-7) AS FLOAT), "
                        f"last_reset_ts=strftime('%s',last_reset) + "
-                        "cast(substr(last_reset,-7) AS FLOAT);"
+                        "cast(substr(last_reset,-7) AS FLOAT) where start_ts is NULL;"
                    )
                )
    elif engine.dialect.name == SupportedDialect.MYSQL: