From c5804d362c221695379345dfe764478b15ecfa0d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 29 Jun 2024 07:50:53 -0500 Subject: [PATCH] Remove legacy foreign key constraint from sqlite states table (#120779) --- .../components/recorder/migration.py | 76 ++++++++++++++-- tests/components/recorder/test_migrate.py | 89 ++++++++++++++++++- .../components/recorder/test_v32_migration.py | 9 +- 3 files changed, 162 insertions(+), 12 deletions(-) diff --git a/homeassistant/components/recorder/migration.py b/homeassistant/components/recorder/migration.py index 561b446f493..cf003f72af4 100644 --- a/homeassistant/components/recorder/migration.py +++ b/homeassistant/components/recorder/migration.py @@ -24,7 +24,7 @@ from sqlalchemy.exc import ( SQLAlchemyError, ) from sqlalchemy.orm.session import Session -from sqlalchemy.schema import AddConstraint, DropConstraint +from sqlalchemy.schema import AddConstraint, CreateTable, DropConstraint from sqlalchemy.sql.expression import true from sqlalchemy.sql.lambdas import StatementLambdaElement @@ -1738,14 +1738,15 @@ def cleanup_legacy_states_event_ids(instance: Recorder) -> bool: # Only drop the index if there are no more event_ids in the states table # ex all NULL assert instance.engine is not None, "engine should never be None" - if instance.dialect_name != SupportedDialect.SQLITE: + if instance.dialect_name == SupportedDialect.SQLITE: # SQLite does not support dropping foreign key constraints - # so we can't drop the index at this time but we can avoid - # looking for legacy rows during purge + # so we have to rebuild the table + rebuild_sqlite_table(session_maker, instance.engine, States) + else: _drop_foreign_key_constraints( session_maker, instance.engine, TABLE_STATES, ["event_id"] ) - _drop_index(session_maker, "states", LEGACY_STATES_EVENT_ID_INDEX) + _drop_index(session_maker, "states", LEGACY_STATES_EVENT_ID_INDEX) instance.use_legacy_events_index = False return True @@ -1894,3 +1895,68 @@ def _mark_migration_done( migration_id=migration.migration_id, version=migration.migration_version ) ) + + +def rebuild_sqlite_table( + session_maker: Callable[[], Session], engine: Engine, table: type[Base] +) -> None: + """Rebuild an SQLite table. + + This must only be called after all migrations are complete + and the database is in a consistent state. + + If the table is not migrated to the current schema this + will likely fail. + """ + table_table = cast(Table, table.__table__) + orig_name = table_table.name + temp_name = f"{table_table.name}_temp_{int(time())}" + + _LOGGER.warning( + "Rebuilding SQLite table %s; This will take a while; Please be patient!", + orig_name, + ) + + try: + # 12 step SQLite table rebuild + # https://www.sqlite.org/lang_altertable.html + with session_scope(session=session_maker()) as session: + # Step 1 - Disable foreign keys + session.connection().execute(text("PRAGMA foreign_keys=OFF")) + # Step 2 - create a transaction + with session_scope(session=session_maker()) as session: + # Step 3 - we know all the indexes, triggers, and views associated with table X + new_sql = str(CreateTable(table_table).compile(engine)).strip("\n") + ";" + source_sql = f"CREATE TABLE {orig_name}" + replacement_sql = f"CREATE TABLE {temp_name}" + assert source_sql in new_sql, f"{source_sql} should be in new_sql" + new_sql = new_sql.replace(source_sql, replacement_sql) + # Step 4 - Create temp table + session.execute(text(new_sql)) + column_names = ",".join([column.name for column in table_table.columns]) + # Step 5 - Transfer content + sql = f"INSERT INTO {temp_name} SELECT {column_names} FROM {orig_name};" # noqa: S608 + session.execute(text(sql)) + # Step 6 - Drop the original table + session.execute(text(f"DROP TABLE {orig_name}")) + # Step 7 - Rename the temp table + session.execute(text(f"ALTER TABLE {temp_name} RENAME TO {orig_name}")) + # Step 8 - Recreate indexes + for index in table_table.indexes: + index.create(session.connection()) + # Step 9 - Recreate views (there are none) + # Step 10 - Check foreign keys + session.execute(text("PRAGMA foreign_key_check")) + # Step 11 - Commit transaction + session.commit() + except SQLAlchemyError: + _LOGGER.exception("Error recreating SQLite table %s", table_table.name) + # Swallow the exception since we do not want to ever raise + # an integrity error as it would cause the database + # to be discarded and recreated from scratch + else: + _LOGGER.warning("Rebuilding SQLite table %s finished", orig_name) + finally: + with session_scope(session=session_maker()) as session: + # Step 12 - Re-enable foreign keys + session.connection().execute(text("PRAGMA foreign_keys=ON")) diff --git a/tests/components/recorder/test_migrate.py b/tests/components/recorder/test_migrate.py index a21f4771616..cb8e402f65a 100644 --- a/tests/components/recorder/test_migrate.py +++ b/tests/components/recorder/test_migrate.py @@ -16,7 +16,7 @@ from sqlalchemy.exc import ( ProgrammingError, SQLAlchemyError, ) -from sqlalchemy.orm import Session +from sqlalchemy.orm import Session, scoped_session, sessionmaker from sqlalchemy.pool import StaticPool from homeassistant.bootstrap import async_setup_component @@ -24,6 +24,7 @@ from homeassistant.components import persistent_notification as pn, recorder from homeassistant.components.recorder import db_schema, migration from homeassistant.components.recorder.db_schema import ( SCHEMA_VERSION, + Events, RecorderRuns, States, ) @@ -633,3 +634,89 @@ def test_raise_if_exception_missing_empty_cause_str() -> None: with pytest.raises(ProgrammingError): migration.raise_if_exception_missing_str(programming_exc, ["not present"]) + + +def test_rebuild_sqlite_states_table(recorder_db_url: str) -> None: + """Test that we can rebuild the states table in SQLite.""" + if not recorder_db_url.startswith("sqlite://"): + # This test is specific for SQLite + return + + engine = create_engine(recorder_db_url) + session_maker = scoped_session(sessionmaker(bind=engine, future=True)) + with session_scope(session=session_maker()) as session: + db_schema.Base.metadata.create_all(engine) + with session_scope(session=session_maker()) as session: + session.add(States(state="on")) + session.commit() + + migration.rebuild_sqlite_table(session_maker, engine, States) + + with session_scope(session=session_maker()) as session: + assert session.query(States).count() == 1 + assert session.query(States).first().state == "on" + + engine.dispose() + + +def test_rebuild_sqlite_states_table_missing_fails( + recorder_db_url: str, caplog: pytest.LogCaptureFixture +) -> None: + """Test handling missing states table when attempting rebuild.""" + if not recorder_db_url.startswith("sqlite://"): + # This test is specific for SQLite + return + + engine = create_engine(recorder_db_url) + session_maker = scoped_session(sessionmaker(bind=engine, future=True)) + with session_scope(session=session_maker()) as session: + db_schema.Base.metadata.create_all(engine) + + with session_scope(session=session_maker()) as session: + session.add(Events(event_type="state_changed", event_data="{}")) + session.connection().execute(text("DROP TABLE states")) + session.commit() + + migration.rebuild_sqlite_table(session_maker, engine, States) + assert "Error recreating SQLite table states" in caplog.text + caplog.clear() + + # Now rebuild the events table to make sure the database did not + # get corrupted + migration.rebuild_sqlite_table(session_maker, engine, Events) + + with session_scope(session=session_maker()) as session: + assert session.query(Events).count() == 1 + assert session.query(Events).first().event_type == "state_changed" + assert session.query(Events).first().event_data == "{}" + + engine.dispose() + + +def test_rebuild_sqlite_states_table_extra_columns( + recorder_db_url: str, caplog: pytest.LogCaptureFixture +) -> None: + """Test handling extra columns when rebuilding the states table.""" + if not recorder_db_url.startswith("sqlite://"): + # This test is specific for SQLite + return + + engine = create_engine(recorder_db_url) + session_maker = scoped_session(sessionmaker(bind=engine, future=True)) + with session_scope(session=session_maker()) as session: + db_schema.Base.metadata.create_all(engine) + with session_scope(session=session_maker()) as session: + session.add(States(state="on")) + session.commit() + session.connection().execute( + text("ALTER TABLE states ADD COLUMN extra_column TEXT") + ) + + migration.rebuild_sqlite_table(session_maker, engine, States) + assert "Error recreating SQLite table states" not in caplog.text + + with session_scope(session=session_maker()) as session: + assert session.query(States).count() == 1 + assert session.query(States).first().state == "on" + + engine.dispose() diff --git a/tests/components/recorder/test_v32_migration.py b/tests/components/recorder/test_v32_migration.py index a07c63b3376..e3398fbf0e3 100644 --- a/tests/components/recorder/test_v32_migration.py +++ b/tests/components/recorder/test_v32_migration.py @@ -211,10 +211,9 @@ async def test_migrate_times(caplog: pytest.LogCaptureFixture, tmp_path: Path) - ) states_index_names = {index["name"] for index in states_indexes} - # sqlite does not support dropping foreign keys so the - # ix_states_event_id index is not dropped in this case - # but use_legacy_events_index is still False - assert "ix_states_event_id" in states_index_names + # sqlite does not support dropping foreign keys so we had to + # create a new table and copy the data over + assert "ix_states_event_id" not in states_index_names assert recorder.get_instance(hass).use_legacy_events_index is False @@ -342,8 +341,6 @@ async def test_migrate_can_resume_entity_id_post_migration( await hass.async_stop() await hass.async_block_till_done() - assert "ix_states_entity_id_last_updated_ts" in states_index_names - async with async_test_home_assistant() as hass: recorder_helper.async_initialize_recorder(hass) assert await async_setup_component(