2017-02-26 14:38:06 -08:00
|
|
|
"""Schema migration helpers."""
|
2022-10-12 14:59:10 +02:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
from collections.abc import Callable, Iterable
|
2021-09-09 19:26:28 +02:00
|
|
|
import contextlib
|
2022-11-29 10:16:08 +01:00
|
|
|
from dataclasses import dataclass, replace as dataclass_replace
|
2021-09-24 09:19:22 +02:00
|
|
|
from datetime import timedelta
|
2017-02-26 14:38:06 -08:00
|
|
|
import logging
|
2023-02-08 15:17:32 +01:00
|
|
|
from typing import TYPE_CHECKING, cast
|
2023-03-08 14:51:45 -10:00
|
|
|
from uuid import UUID
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2021-04-21 20:29:36 -10:00
|
|
|
import sqlalchemy
|
2023-03-08 14:51:45 -10:00
|
|
|
from sqlalchemy import ForeignKeyConstraint, MetaData, Table, func, text, update
|
2023-01-27 17:39:45 -10:00
|
|
|
from sqlalchemy.engine import CursorResult, Engine
|
2021-02-08 12:22:38 -10:00
|
|
|
from sqlalchemy.exc import (
|
2022-02-04 20:31:12 +01:00
|
|
|
DatabaseError,
|
2023-02-24 11:41:44 -06:00
|
|
|
IntegrityError,
|
2021-02-08 12:22:38 -10:00
|
|
|
InternalError,
|
|
|
|
OperationalError,
|
|
|
|
ProgrammingError,
|
|
|
|
SQLAlchemyError,
|
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
from sqlalchemy.orm.session import Session
|
2020-11-28 19:42:29 +01:00
|
|
|
from sqlalchemy.schema import AddConstraint, DropConstraint
|
2021-11-04 10:29:10 +01:00
|
|
|
from sqlalchemy.sql.expression import true
|
2019-10-19 04:14:54 +11:00
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
from homeassistant.core import HomeAssistant
|
2023-03-21 15:06:10 -10:00
|
|
|
from homeassistant.util.enum import try_parse_enum
|
2023-03-08 14:51:45 -10:00
|
|
|
from homeassistant.util.ulid import ulid_to_bytes
|
2022-05-04 12:22:50 -05:00
|
|
|
|
2023-03-22 15:17:36 -10:00
|
|
|
from .auto_repairs.events.schema import (
|
|
|
|
correct_db_schema as events_correct_db_schema,
|
|
|
|
validate_db_schema as events_validate_db_schema,
|
|
|
|
)
|
2023-03-22 10:05:23 -10:00
|
|
|
from .auto_repairs.states.schema import (
|
|
|
|
correct_db_schema as states_correct_db_schema,
|
|
|
|
validate_db_schema as states_validate_db_schema,
|
|
|
|
)
|
2023-03-21 15:08:06 -10:00
|
|
|
from .auto_repairs.statistics.duplicates import (
|
|
|
|
delete_statistics_duplicates,
|
|
|
|
delete_statistics_meta_duplicates,
|
|
|
|
)
|
|
|
|
from .auto_repairs.statistics.schema import (
|
|
|
|
correct_db_schema as statistics_correct_db_schema,
|
|
|
|
validate_db_schema as statistics_validate_db_schema,
|
|
|
|
)
|
2022-05-07 23:02:54 -05:00
|
|
|
from .const import SupportedDialect
|
2022-06-07 14:41:43 +02:00
|
|
|
from .db_schema import (
|
2023-03-08 14:51:45 -10:00
|
|
|
CONTEXT_ID_BIN_MAX_LENGTH,
|
2023-03-22 10:05:23 -10:00
|
|
|
DOUBLE_PRECISION_TYPE_SQL,
|
2023-03-17 14:27:33 -10:00
|
|
|
LEGACY_STATES_EVENT_ID_INDEX,
|
2023-03-22 10:05:23 -10:00
|
|
|
MYSQL_COLLATE,
|
|
|
|
MYSQL_DEFAULT_CHARSET,
|
2021-07-02 13:17:00 +02:00
|
|
|
SCHEMA_VERSION,
|
2023-02-09 12:24:19 -06:00
|
|
|
STATISTICS_TABLES,
|
2021-07-02 13:17:00 +02:00
|
|
|
TABLE_STATES,
|
|
|
|
Base,
|
2023-03-08 14:51:45 -10:00
|
|
|
Events,
|
2023-03-11 09:54:55 -10:00
|
|
|
EventTypes,
|
2021-07-02 13:17:00 +02:00
|
|
|
SchemaChanges,
|
2023-03-08 14:51:45 -10:00
|
|
|
States,
|
2023-03-12 10:01:58 -10:00
|
|
|
StatesMeta,
|
2021-07-02 13:17:00 +02:00
|
|
|
Statistics,
|
|
|
|
StatisticsMeta,
|
2021-08-20 07:10:45 +02:00
|
|
|
StatisticsRuns,
|
2021-09-21 00:38:42 +02:00
|
|
|
StatisticsShortTerm,
|
2021-07-02 13:17:00 +02:00
|
|
|
)
|
2022-06-07 14:41:43 +02:00
|
|
|
from .models import process_timestamp
|
2023-03-08 14:51:45 -10:00
|
|
|
from .queries import (
|
2023-03-12 10:01:58 -10:00
|
|
|
batch_cleanup_entity_ids,
|
|
|
|
find_entity_ids_to_migrate,
|
2023-03-11 09:54:55 -10:00
|
|
|
find_event_type_to_migrate,
|
2023-03-08 14:51:45 -10:00
|
|
|
find_events_context_ids_to_migrate,
|
|
|
|
find_states_context_ids_to_migrate,
|
2023-03-17 14:27:33 -10:00
|
|
|
has_used_states_event_ids,
|
2023-03-08 14:51:45 -10:00
|
|
|
)
|
2023-03-21 15:08:06 -10:00
|
|
|
from .statistics import get_start_time
|
2023-02-09 12:24:19 -06:00
|
|
|
from .tasks import (
|
|
|
|
CommitTask,
|
|
|
|
PostSchemaMigrationTask,
|
|
|
|
StatisticsTimestampMigrationCleanupTask,
|
|
|
|
)
|
2023-03-17 14:27:33 -10:00
|
|
|
from .util import (
|
|
|
|
database_job_retry_wrapper,
|
|
|
|
get_index_by_name,
|
|
|
|
retryable_database_job,
|
|
|
|
session_scope,
|
|
|
|
)
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2022-10-12 14:59:10 +02:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from . import Recorder
|
|
|
|
|
2022-07-22 15:11:34 +02:00
|
|
|
LIVE_MIGRATION_MIN_SCHEMA_VERSION = 0
|
2023-03-08 14:51:45 -10:00
|
|
|
_EMPTY_CONTEXT_ID = b"\x00" * 16
|
2023-03-12 10:01:58 -10:00
|
|
|
_EMPTY_ENTITY_ID = "missing.entity_id"
|
|
|
|
_EMPTY_EVENT_TYPE = "missing_event_type"
|
2023-02-09 12:24:19 -06:00
|
|
|
|
2017-02-26 14:38:06 -08:00
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2023-03-21 15:06:10 -10:00
|
|
|
@dataclass
|
|
|
|
class _ColumnTypesForDialect:
|
|
|
|
big_int_type: str
|
|
|
|
timestamp_type: str
|
|
|
|
context_bin_type: str
|
|
|
|
|
|
|
|
|
|
|
|
_MYSQL_COLUMN_TYPES = _ColumnTypesForDialect(
|
|
|
|
big_int_type="INTEGER(20)",
|
2023-03-22 10:05:23 -10:00
|
|
|
timestamp_type=DOUBLE_PRECISION_TYPE_SQL,
|
2023-03-21 15:06:10 -10:00
|
|
|
context_bin_type=f"BLOB({CONTEXT_ID_BIN_MAX_LENGTH})",
|
|
|
|
)
|
|
|
|
|
|
|
|
_POSTGRESQL_COLUMN_TYPES = _ColumnTypesForDialect(
|
|
|
|
big_int_type="INTEGER",
|
2023-03-22 10:05:23 -10:00
|
|
|
timestamp_type=DOUBLE_PRECISION_TYPE_SQL,
|
2023-03-21 15:06:10 -10:00
|
|
|
context_bin_type="BYTEA",
|
|
|
|
)
|
|
|
|
|
|
|
|
_SQLITE_COLUMN_TYPES = _ColumnTypesForDialect(
|
|
|
|
big_int_type="INTEGER",
|
|
|
|
timestamp_type="FLOAT",
|
|
|
|
context_bin_type="BLOB",
|
|
|
|
)
|
|
|
|
|
|
|
|
_COLUMN_TYPES_FOR_DIALECT: dict[SupportedDialect | None, _ColumnTypesForDialect] = {
|
|
|
|
SupportedDialect.MYSQL: _MYSQL_COLUMN_TYPES,
|
|
|
|
SupportedDialect.POSTGRESQL: _POSTGRESQL_COLUMN_TYPES,
|
|
|
|
SupportedDialect.SQLITE: _SQLITE_COLUMN_TYPES,
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def raise_if_exception_missing_str(ex: Exception, match_substrs: Iterable[str]) -> None:
|
2023-01-16 19:51:11 +01:00
|
|
|
"""Raise if the exception and cause do not contain the match substrs."""
|
2021-05-02 17:57:42 -10:00
|
|
|
lower_ex_strs = [str(ex).lower(), str(ex.__cause__).lower()]
|
|
|
|
for str_sub in match_substrs:
|
|
|
|
for exc_str in lower_ex_strs:
|
|
|
|
if exc_str and str_sub in exc_str:
|
|
|
|
return
|
|
|
|
|
|
|
|
raise ex
|
|
|
|
|
|
|
|
|
2022-10-13 13:01:27 +02:00
|
|
|
def _get_schema_version(session: Session) -> int | None:
|
|
|
|
"""Get the schema version."""
|
2023-03-28 17:28:24 -10:00
|
|
|
res = (
|
|
|
|
session.query(SchemaChanges.schema_version)
|
|
|
|
.order_by(SchemaChanges.change_id.desc())
|
|
|
|
.first()
|
|
|
|
)
|
2022-10-13 13:01:27 +02:00
|
|
|
return getattr(res, "schema_version", None)
|
|
|
|
|
|
|
|
|
2022-10-13 08:11:54 +02:00
|
|
|
def get_schema_version(session_maker: Callable[[], Session]) -> int | None:
|
2021-04-11 20:43:54 -10:00
|
|
|
"""Get the schema version."""
|
2022-10-13 08:11:54 +02:00
|
|
|
try:
|
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-10-13 13:01:27 +02:00
|
|
|
return _get_schema_version(session)
|
2022-10-13 08:11:54 +02:00
|
|
|
except Exception as err: # pylint: disable=broad-except
|
|
|
|
_LOGGER.exception("Error when determining DB schema version: %s", err)
|
|
|
|
return None
|
2019-03-24 20:00:29 +01:00
|
|
|
|
2021-04-11 20:43:54 -10:00
|
|
|
|
2022-10-13 08:11:54 +02:00
|
|
|
@dataclass
|
|
|
|
class SchemaValidationStatus:
|
|
|
|
"""Store schema validation status."""
|
2021-04-11 20:43:54 -10:00
|
|
|
|
2022-10-13 08:11:54 +02:00
|
|
|
current_version: int
|
2023-03-22 10:05:23 -10:00
|
|
|
schema_errors: set[str]
|
2022-11-29 10:16:08 +01:00
|
|
|
valid: bool
|
2022-10-13 08:11:54 +02:00
|
|
|
|
|
|
|
|
|
|
|
def _schema_is_current(current_version: int) -> bool:
|
2021-04-11 20:43:54 -10:00
|
|
|
"""Check if the schema is current."""
|
|
|
|
return current_version == SCHEMA_VERSION
|
|
|
|
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2022-10-13 08:11:54 +02:00
|
|
|
def validate_db_schema(
|
2023-02-08 15:17:32 +01:00
|
|
|
hass: HomeAssistant, instance: Recorder, session_maker: Callable[[], Session]
|
2022-10-13 08:11:54 +02:00
|
|
|
) -> SchemaValidationStatus | None:
|
|
|
|
"""Check if the schema is valid.
|
|
|
|
|
|
|
|
This checks that the schema is the current version as well as for some common schema
|
|
|
|
errors caused by manual migration between database engines, for example importing an
|
|
|
|
SQLite database to MariaDB.
|
|
|
|
"""
|
2022-11-29 10:16:08 +01:00
|
|
|
schema_errors: set[str] = set()
|
|
|
|
|
2022-10-13 08:11:54 +02:00
|
|
|
current_version = get_schema_version(session_maker)
|
|
|
|
if current_version is None:
|
|
|
|
return None
|
|
|
|
|
2022-11-29 10:16:08 +01:00
|
|
|
if is_current := _schema_is_current(current_version):
|
|
|
|
# We can only check for further errors if the schema is current, because
|
|
|
|
# columns may otherwise not exist etc.
|
2023-03-22 10:05:23 -10:00
|
|
|
schema_errors = _find_schema_errors(hass, instance, session_maker)
|
2022-11-29 10:16:08 +01:00
|
|
|
|
|
|
|
valid = is_current and not schema_errors
|
|
|
|
|
|
|
|
return SchemaValidationStatus(current_version, schema_errors, valid)
|
2022-10-13 08:11:54 +02:00
|
|
|
|
|
|
|
|
2023-03-22 10:05:23 -10:00
|
|
|
def _find_schema_errors(
|
|
|
|
hass: HomeAssistant, instance: Recorder, session_maker: Callable[[], Session]
|
|
|
|
) -> set[str]:
|
|
|
|
"""Find schema errors."""
|
|
|
|
schema_errors: set[str] = set()
|
|
|
|
schema_errors |= statistics_validate_db_schema(instance)
|
|
|
|
schema_errors |= states_validate_db_schema(instance)
|
2023-03-22 15:17:36 -10:00
|
|
|
schema_errors |= events_validate_db_schema(instance)
|
2023-03-22 10:05:23 -10:00
|
|
|
return schema_errors
|
|
|
|
|
|
|
|
|
2022-10-13 08:11:54 +02:00
|
|
|
def live_migration(schema_status: SchemaValidationStatus) -> bool:
|
2022-07-22 15:11:34 +02:00
|
|
|
"""Check if live migration is possible."""
|
2022-10-13 08:11:54 +02:00
|
|
|
return schema_status.current_version >= LIVE_MIGRATION_MIN_SCHEMA_VERSION
|
2022-07-22 15:11:34 +02:00
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def migrate_schema(
|
2022-10-12 14:59:10 +02:00
|
|
|
instance: Recorder,
|
2022-05-04 12:22:50 -05:00
|
|
|
hass: HomeAssistant,
|
|
|
|
engine: Engine,
|
|
|
|
session_maker: Callable[[], Session],
|
2022-10-13 08:11:54 +02:00
|
|
|
schema_status: SchemaValidationStatus,
|
2022-05-04 12:22:50 -05:00
|
|
|
) -> None:
|
2021-04-11 20:43:54 -10:00
|
|
|
"""Check if the schema needs to be upgraded."""
|
2022-10-13 08:11:54 +02:00
|
|
|
current_version = schema_status.current_version
|
2022-11-29 10:16:08 +01:00
|
|
|
if current_version != SCHEMA_VERSION:
|
|
|
|
_LOGGER.warning(
|
|
|
|
"Database is about to upgrade from schema version: %s to: %s",
|
|
|
|
current_version,
|
|
|
|
SCHEMA_VERSION,
|
|
|
|
)
|
2022-07-22 15:11:34 +02:00
|
|
|
db_ready = False
|
2022-02-04 18:55:11 +01:00
|
|
|
for version in range(current_version, SCHEMA_VERSION):
|
2022-11-29 10:16:08 +01:00
|
|
|
if (
|
|
|
|
live_migration(dataclass_replace(schema_status, current_version=version))
|
|
|
|
and not db_ready
|
|
|
|
):
|
2022-07-22 15:11:34 +02:00
|
|
|
db_ready = True
|
|
|
|
instance.migration_is_live = True
|
2022-07-22 12:37:25 -05:00
|
|
|
hass.add_job(instance.async_set_db_ready)
|
2022-02-04 18:55:11 +01:00
|
|
|
new_version = version + 1
|
|
|
|
_LOGGER.info("Upgrading recorder db schema to version %s", new_version)
|
2023-03-03 17:00:13 -10:00
|
|
|
_apply_update(
|
|
|
|
instance, hass, engine, session_maker, new_version, current_version
|
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2021-04-11 20:43:54 -10:00
|
|
|
session.add(SchemaChanges(schema_version=new_version))
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2023-03-12 14:07:05 -10:00
|
|
|
# Log at the same level as the long schema changes
|
|
|
|
# so its clear that the upgrade is done
|
|
|
|
_LOGGER.warning("Upgrade to version %s done", new_version)
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2023-03-22 10:05:23 -10:00
|
|
|
if schema_errors := schema_status.schema_errors:
|
2022-11-29 10:16:08 +01:00
|
|
|
_LOGGER.warning(
|
|
|
|
"Database is about to correct DB schema errors: %s",
|
|
|
|
", ".join(sorted(schema_errors)),
|
|
|
|
)
|
2023-03-22 10:05:23 -10:00
|
|
|
statistics_correct_db_schema(instance, schema_errors)
|
|
|
|
states_correct_db_schema(instance, schema_errors)
|
2023-03-22 15:17:36 -10:00
|
|
|
events_correct_db_schema(instance, schema_errors)
|
2022-11-29 10:16:08 +01:00
|
|
|
|
2023-01-02 13:26:08 -10:00
|
|
|
if current_version != SCHEMA_VERSION:
|
|
|
|
instance.queue_task(PostSchemaMigrationTask(current_version, SCHEMA_VERSION))
|
2023-01-27 17:39:45 -10:00
|
|
|
# Make sure the post schema migration task is committed in case
|
|
|
|
# the next task does not have commit_before = True
|
|
|
|
instance.queue_task(CommitTask())
|
2023-01-02 13:26:08 -10:00
|
|
|
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def _create_index(
|
|
|
|
session_maker: Callable[[], Session], table_name: str, index_name: str
|
|
|
|
) -> None:
|
2017-03-23 22:48:31 -05:00
|
|
|
"""Create an index for the specified table.
|
|
|
|
|
|
|
|
The index name should match the name given for the index
|
|
|
|
within the table definition described in the models
|
|
|
|
"""
|
2019-10-19 04:14:54 +11:00
|
|
|
table = Table(table_name, Base.metadata)
|
2020-06-23 12:57:52 -05:00
|
|
|
_LOGGER.debug("Looking up index %s for table %s", index_name, table_name)
|
2017-09-23 17:15:46 +02:00
|
|
|
# Look up the index object by name from the table is the models
|
2020-06-23 12:57:52 -05:00
|
|
|
index_list = [idx for idx in table.indexes if idx.name == index_name]
|
|
|
|
if not index_list:
|
|
|
|
_LOGGER.debug("The index %s no longer exists", index_name)
|
|
|
|
return
|
|
|
|
index = index_list[0]
|
2017-03-23 22:48:31 -05:00
|
|
|
_LOGGER.debug("Creating %s index", index_name)
|
2020-06-23 12:57:52 -05:00
|
|
|
_LOGGER.warning(
|
2022-12-23 13:27:27 +01:00
|
|
|
(
|
2023-03-12 14:07:05 -10:00
|
|
|
"Adding index `%s` to table `%s`. Note: this can take several "
|
2022-12-23 13:27:27 +01:00
|
|
|
"minutes on large databases and slow computers. Please "
|
|
|
|
"be patient!"
|
|
|
|
),
|
2019-07-31 12:25:30 -07:00
|
|
|
index_name,
|
2023-03-12 14:07:05 -10:00
|
|
|
table_name,
|
2019-07-31 12:25:30 -07:00
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
|
|
|
index.create(connection)
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError, ProgrammingError) as err:
|
|
|
|
raise_if_exception_missing_str(err, ["already exists", "duplicate"])
|
|
|
|
_LOGGER.warning(
|
|
|
|
"Index %s already exists on %s, continuing", index_name, table_name
|
|
|
|
)
|
2018-08-21 11:41:52 +02:00
|
|
|
|
2017-03-23 22:48:31 -05:00
|
|
|
_LOGGER.debug("Finished creating %s", index_name)
|
|
|
|
|
|
|
|
|
2023-03-22 15:03:26 -10:00
|
|
|
def _execute_or_collect_error(
|
|
|
|
session_maker: Callable[[], Session], query: str, errors: list[str]
|
|
|
|
) -> bool:
|
|
|
|
"""Execute a query or collect an error."""
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
try:
|
|
|
|
session.connection().execute(text(query))
|
|
|
|
return True
|
|
|
|
except SQLAlchemyError as err:
|
|
|
|
errors.append(str(err))
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def _drop_index(
|
2023-03-08 14:51:45 -10:00
|
|
|
session_maker: Callable[[], Session],
|
|
|
|
table_name: str,
|
|
|
|
index_name: str,
|
|
|
|
quiet: bool | None = None,
|
2022-05-04 12:22:50 -05:00
|
|
|
) -> None:
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
"""Drop an index from a specified table.
|
|
|
|
|
|
|
|
There is no universal way to do something like `DROP INDEX IF EXISTS`
|
|
|
|
so we will simply execute the DROP command and ignore any exceptions
|
|
|
|
|
|
|
|
WARNING: Due to some engines (MySQL at least) being unable to use bind
|
|
|
|
parameters in a DROP INDEX statement (at least via SQLAlchemy), the query
|
|
|
|
string here is generated from the method parameters without sanitizing.
|
|
|
|
DO NOT USE THIS FUNCTION IN ANY OPERATION THAT TAKES USER INPUT.
|
|
|
|
"""
|
2023-03-12 14:07:05 -10:00
|
|
|
_LOGGER.warning(
|
|
|
|
(
|
|
|
|
"Dropping index `%s` from table `%s`. Note: this can take several "
|
|
|
|
"minutes on large databases and slow computers. Please "
|
|
|
|
"be patient!"
|
|
|
|
),
|
|
|
|
index_name,
|
|
|
|
table_name,
|
|
|
|
)
|
2023-03-22 15:03:26 -10:00
|
|
|
index_to_drop: str | None = None
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
index_to_drop = get_index_by_name(session, table_name, index_name)
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
|
2023-03-22 15:03:26 -10:00
|
|
|
if index_to_drop is None:
|
|
|
|
_LOGGER.debug(
|
|
|
|
"The index %s on table %s no longer exists", index_name, table_name
|
|
|
|
)
|
|
|
|
return
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
|
2023-03-22 15:03:26 -10:00
|
|
|
errors: list[str] = []
|
|
|
|
for query in (
|
|
|
|
# Engines like DB2/Oracle
|
|
|
|
f"DROP INDEX {index_name}",
|
|
|
|
# Engines like SQLite, SQL Server
|
|
|
|
f"DROP INDEX {table_name}.{index_name}",
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
# Engines like MySQL, MS Access
|
2023-03-22 15:03:26 -10:00
|
|
|
f"DROP INDEX {index_name} ON {table_name}",
|
2023-03-12 14:07:05 -10:00
|
|
|
# Engines like postgresql may have a prefix
|
|
|
|
# ex idx_16532_ix_events_event_type_time_fired
|
2023-03-22 15:03:26 -10:00
|
|
|
f"DROP INDEX {index_to_drop}",
|
|
|
|
):
|
|
|
|
if _execute_or_collect_error(session_maker, query, errors):
|
|
|
|
_LOGGER.debug(
|
|
|
|
"Finished dropping index %s from table %s", index_name, table_name
|
|
|
|
)
|
|
|
|
return
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
|
2023-03-22 15:03:26 -10:00
|
|
|
if not quiet:
|
|
|
|
_LOGGER.warning(
|
|
|
|
(
|
|
|
|
"Failed to drop index `%s` from table `%s`. Schema "
|
|
|
|
"Migration will continue; this is not a "
|
|
|
|
"critical operation: %s"
|
|
|
|
),
|
|
|
|
index_name,
|
|
|
|
table_name,
|
|
|
|
errors,
|
2019-07-31 12:25:30 -07:00
|
|
|
)
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def _add_columns(
|
|
|
|
session_maker: Callable[[], Session], table_name: str, columns_def: list[str]
|
|
|
|
) -> None:
|
2018-08-10 18:09:01 +02:00
|
|
|
"""Add columns to a table."""
|
2020-06-23 12:57:52 -05:00
|
|
|
_LOGGER.warning(
|
2022-12-23 13:27:27 +01:00
|
|
|
(
|
|
|
|
"Adding columns %s to table %s. Note: this can take several "
|
|
|
|
"minutes on large databases and slow computers. Please "
|
|
|
|
"be patient!"
|
|
|
|
),
|
2019-07-31 12:25:30 -07:00
|
|
|
", ".join(column.split(" ")[0] for column in columns_def),
|
|
|
|
table_name,
|
|
|
|
)
|
2018-08-10 18:09:01 +02:00
|
|
|
|
2019-08-23 18:53:33 +02:00
|
|
|
columns_def = [f"ADD {col_def}" for col_def in columns_def]
|
2018-08-10 18:09:01 +02:00
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
|
|
|
connection.execute(
|
|
|
|
text(
|
|
|
|
"ALTER TABLE {table} {columns_def}".format(
|
|
|
|
table=table_name, columns_def=", ".join(columns_def)
|
|
|
|
)
|
2019-07-31 12:25:30 -07:00
|
|
|
)
|
|
|
|
)
|
2022-02-04 18:55:11 +01:00
|
|
|
return
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError, ProgrammingError):
|
|
|
|
# Some engines support adding all columns at once,
|
|
|
|
# this error is when they don't
|
|
|
|
_LOGGER.info("Unable to use quick column add. Adding 1 by 1")
|
2018-08-10 18:09:01 +02:00
|
|
|
|
|
|
|
for column_def in columns_def:
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
|
|
|
connection.execute(
|
|
|
|
text(
|
|
|
|
"ALTER TABLE {table} {column_def}".format(
|
|
|
|
table=table_name, column_def=column_def
|
|
|
|
)
|
2019-07-31 12:25:30 -07:00
|
|
|
)
|
|
|
|
)
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError, ProgrammingError) as err:
|
|
|
|
raise_if_exception_missing_str(err, ["already exists", "duplicate"])
|
|
|
|
_LOGGER.warning(
|
|
|
|
"Column %s already exists on %s, continuing",
|
|
|
|
column_def.split(" ")[1],
|
|
|
|
table_name,
|
|
|
|
)
|
2018-08-10 18:09:01 +02:00
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def _modify_columns(
|
|
|
|
session_maker: Callable[[], Session],
|
|
|
|
engine: Engine,
|
|
|
|
table_name: str,
|
|
|
|
columns_def: list[str],
|
|
|
|
) -> None:
|
2021-03-10 19:12:58 +01:00
|
|
|
"""Modify columns in a table."""
|
2022-05-07 23:02:54 -05:00
|
|
|
if engine.dialect.name == SupportedDialect.SQLITE:
|
2021-04-08 19:08:49 +02:00
|
|
|
_LOGGER.debug(
|
2022-12-23 13:27:27 +01:00
|
|
|
(
|
|
|
|
"Skipping to modify columns %s in table %s; "
|
|
|
|
"Modifying column length in SQLite is unnecessary, "
|
|
|
|
"it does not impose any length restrictions"
|
|
|
|
),
|
2021-04-08 19:08:49 +02:00
|
|
|
", ".join(column.split(" ")[0] for column in columns_def),
|
|
|
|
table_name,
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
2021-03-10 19:12:58 +01:00
|
|
|
_LOGGER.warning(
|
2022-12-23 13:27:27 +01:00
|
|
|
(
|
|
|
|
"Modifying columns %s in table %s. Note: this can take several "
|
|
|
|
"minutes on large databases and slow computers. Please "
|
|
|
|
"be patient!"
|
|
|
|
),
|
2021-03-10 19:12:58 +01:00
|
|
|
", ".join(column.split(" ")[0] for column in columns_def),
|
|
|
|
table_name,
|
|
|
|
)
|
2021-04-08 19:08:49 +02:00
|
|
|
|
2022-05-07 23:02:54 -05:00
|
|
|
if engine.dialect.name == SupportedDialect.POSTGRESQL:
|
2021-04-08 19:08:49 +02:00
|
|
|
columns_def = [
|
|
|
|
"ALTER {column} TYPE {type}".format(
|
|
|
|
**dict(zip(["column", "type"], col_def.split(" ", 1)))
|
|
|
|
)
|
|
|
|
for col_def in columns_def
|
|
|
|
]
|
|
|
|
elif engine.dialect.name == "mssql":
|
|
|
|
columns_def = [f"ALTER COLUMN {col_def}" for col_def in columns_def]
|
|
|
|
else:
|
|
|
|
columns_def = [f"MODIFY {col_def}" for col_def in columns_def]
|
2021-03-10 19:12:58 +01:00
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
|
|
|
connection.execute(
|
|
|
|
text(
|
|
|
|
"ALTER TABLE {table} {columns_def}".format(
|
|
|
|
table=table_name, columns_def=", ".join(columns_def)
|
|
|
|
)
|
2021-03-10 19:12:58 +01:00
|
|
|
)
|
|
|
|
)
|
2022-02-04 18:55:11 +01:00
|
|
|
return
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError):
|
|
|
|
_LOGGER.info("Unable to use quick column modify. Modifying 1 by 1")
|
2021-03-10 19:12:58 +01:00
|
|
|
|
|
|
|
for column_def in columns_def:
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
|
|
|
connection.execute(
|
|
|
|
text(
|
|
|
|
"ALTER TABLE {table} {column_def}".format(
|
|
|
|
table=table_name, column_def=column_def
|
|
|
|
)
|
2021-03-10 19:12:58 +01:00
|
|
|
)
|
|
|
|
)
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError):
|
|
|
|
_LOGGER.exception(
|
|
|
|
"Could not modify column %s in table %s", column_def, table_name
|
|
|
|
)
|
2021-03-10 19:12:58 +01:00
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def _update_states_table_with_foreign_key_options(
|
|
|
|
session_maker: Callable[[], Session], engine: Engine
|
|
|
|
) -> None:
|
2020-11-28 19:42:29 +01:00
|
|
|
"""Add the options to foreign key constraints."""
|
2021-04-21 20:29:36 -10:00
|
|
|
inspector = sqlalchemy.inspect(engine)
|
2020-11-28 19:42:29 +01:00
|
|
|
alters = []
|
|
|
|
for foreign_key in inspector.get_foreign_keys(TABLE_STATES):
|
2021-01-03 23:51:44 -10:00
|
|
|
if foreign_key["name"] and (
|
|
|
|
# MySQL/MariaDB will have empty options
|
2021-06-05 00:13:12 -10:00
|
|
|
not foreign_key.get("options")
|
2021-01-03 23:51:44 -10:00
|
|
|
or
|
|
|
|
# Postgres will have ondelete set to None
|
2021-06-05 00:13:12 -10:00
|
|
|
foreign_key.get("options", {}).get("ondelete") is None
|
2021-01-03 23:51:44 -10:00
|
|
|
):
|
2020-11-28 19:42:29 +01:00
|
|
|
alters.append(
|
|
|
|
{
|
|
|
|
"old_fk": ForeignKeyConstraint((), (), name=foreign_key["name"]),
|
|
|
|
"columns": foreign_key["constrained_columns"],
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
if not alters:
|
|
|
|
return
|
|
|
|
|
|
|
|
states_key_constraints = Base.metadata.tables[TABLE_STATES].foreign_key_constraints
|
|
|
|
old_states_table = Table( # noqa: F841 pylint: disable=unused-variable
|
2023-02-08 15:17:32 +01:00
|
|
|
TABLE_STATES, MetaData(), *(alter["old_fk"] for alter in alters) # type: ignore[arg-type]
|
2020-11-28 19:42:29 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
for alter in alters:
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
2023-02-08 15:17:32 +01:00
|
|
|
connection.execute(DropConstraint(alter["old_fk"])) # type: ignore[no-untyped-call]
|
2022-02-04 18:55:11 +01:00
|
|
|
for fkc in states_key_constraints:
|
|
|
|
if fkc.column_keys == alter["columns"]:
|
2023-02-08 15:17:32 +01:00
|
|
|
connection.execute(AddConstraint(fkc)) # type: ignore[no-untyped-call]
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError):
|
|
|
|
_LOGGER.exception(
|
|
|
|
"Could not update foreign options in %s table", TABLE_STATES
|
|
|
|
)
|
2020-11-28 19:42:29 +01:00
|
|
|
|
|
|
|
|
2022-05-04 12:22:50 -05:00
|
|
|
def _drop_foreign_key_constraints(
|
|
|
|
session_maker: Callable[[], Session], engine: Engine, table: str, columns: list[str]
|
|
|
|
) -> None:
|
2021-05-23 04:10:27 +02:00
|
|
|
"""Drop foreign key constraints for a table on specific columns."""
|
|
|
|
inspector = sqlalchemy.inspect(engine)
|
|
|
|
drops = []
|
|
|
|
for foreign_key in inspector.get_foreign_keys(table):
|
2023-03-19 16:04:24 -10:00
|
|
|
if foreign_key["name"] and foreign_key["constrained_columns"] == columns:
|
2021-05-23 04:10:27 +02:00
|
|
|
drops.append(ForeignKeyConstraint((), (), name=foreign_key["name"]))
|
|
|
|
|
|
|
|
# Bind the ForeignKeyConstraints to the table
|
|
|
|
old_table = Table( # noqa: F841 pylint: disable=unused-variable
|
|
|
|
table, MetaData(), *drops
|
|
|
|
)
|
|
|
|
|
|
|
|
for drop in drops:
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-08 00:46:40 +01:00
|
|
|
try:
|
2022-02-04 18:55:11 +01:00
|
|
|
connection = session.connection()
|
2023-02-08 15:17:32 +01:00
|
|
|
connection.execute(DropConstraint(drop)) # type: ignore[no-untyped-call]
|
2022-02-08 00:46:40 +01:00
|
|
|
except (InternalError, OperationalError):
|
|
|
|
_LOGGER.exception(
|
|
|
|
"Could not drop foreign constraints in %s table on %s",
|
|
|
|
TABLE_STATES,
|
|
|
|
columns,
|
|
|
|
)
|
2021-05-23 04:10:27 +02:00
|
|
|
|
|
|
|
|
2023-03-03 17:00:13 -10:00
|
|
|
@database_job_retry_wrapper("Apply migration update", 10)
|
2022-05-04 12:22:50 -05:00
|
|
|
def _apply_update( # noqa: C901
|
2023-03-03 17:00:13 -10:00
|
|
|
instance: Recorder,
|
2022-05-04 12:22:50 -05:00
|
|
|
hass: HomeAssistant,
|
|
|
|
engine: Engine,
|
|
|
|
session_maker: Callable[[], Session],
|
|
|
|
new_version: int,
|
|
|
|
old_version: int,
|
|
|
|
) -> None:
|
2017-03-23 22:48:31 -05:00
|
|
|
"""Perform operations to bring schema up to date."""
|
2023-03-21 15:06:10 -10:00
|
|
|
assert engine.dialect.name is not None, "Dialect name must be set"
|
|
|
|
dialect = try_parse_enum(SupportedDialect, engine.dialect.name)
|
|
|
|
_column_types = _COLUMN_TYPES_FOR_DIALECT.get(dialect, _SQLITE_COLUMN_TYPES)
|
2017-02-26 14:38:06 -08:00
|
|
|
if new_version == 1:
|
2023-01-29 16:33:23 -10:00
|
|
|
# This used to create ix_events_time_fired, but it was removed in version 32
|
|
|
|
pass
|
2017-03-23 22:48:31 -05:00
|
|
|
elif new_version == 2:
|
|
|
|
# Create compound start/end index for recorder_runs
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "recorder_runs", "ix_recorder_runs_start_end")
|
2023-01-29 16:33:23 -10:00
|
|
|
# This used to create ix_states_last_updated bit it was removed in version 32
|
2017-07-01 14:10:17 -07:00
|
|
|
elif new_version == 3:
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
# There used to be a new index here, but it was removed in version 4.
|
|
|
|
pass
|
|
|
|
elif new_version == 4:
|
|
|
|
# Queries were rewritten in this schema release. Most indexes from
|
|
|
|
# earlier versions of the schema are no longer needed.
|
|
|
|
|
|
|
|
if old_version == 3:
|
|
|
|
# Remove index that was added in version 3
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "states", "ix_states_created_domain")
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
if old_version == 2:
|
|
|
|
# Remove index that was added in version 2
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id_created")
|
History query and schema optimizations for huge performance boost (#8748)
* Add DEBUG-level log for db row to native object conversion
This is now the bottleneck (by a large margin) for big history queries, so I'm leaving this log feature in to help diagnose users with a slow history page
* Rewrite of the "first synthetic datapoint" query for multiple entities
The old method was written in a manner that prevented an index from being used in the inner-most GROUP BY statement, causing massive performance issues especially when querying for a large time period.
The new query does have one material change that will cause it to return different results than before: instead of using max(state_id) to get the latest entry, we now get the max(last_updated). This is more appropriate (primary key should not be assumed to be in order of event firing) and allows an index to be used on the inner-most query. I added another JOIN layer to account for cases where there are two entries on the exact same `last_created` for a given entity. In this case we do use `state_id` as a tiebreaker.
For performance reasons the domain filters were also moved to the outermost query, as it's way more efficient to do it there than on the innermost query as before (due to indexing with GROUP BY problems)
The result is a query that only needs to do a filesort on the final result set, which will only be as many rows as there are entities.
* Remove the ORDER BY entity_id when fetching states, and add logging
Having this ORDER BY in the query prevents it from using an index due to the range filter, so it has been removed.
We already do a `groupby` in the `states_to_json` method which accomplishes exactly what the ORDER BY in the query was trying to do anyway, so this change causes no functional difference.
Also added DEBUG-level logging to allow diagnosing a user's slow history page.
* Add DEBUG-level logging for the synthetic-first-datapoint query
For diagnosing a user's slow history page
* Missed a couple instances of `created` that should be `last_updated`
* Remove `entity_id` sorting from state_changes; match significant_update
This is the same change as 09b3498f410106881fc5e095c49a8d527fa89644 , but applied to the `state_changes_during_period` method which I missed before. This should give the same performance boost to the history sensor component!
* Bugfix in History query used for History Sensor
The date filter was using a different column for the upper and lower bounds. It would work, but it would be slow!
* Update Recorder purge script to use more appropriate columns
Two reasons: 1. the `created` column's meaning is fairly arbitrary and does not represent when an event or state change actually ocurred. It seems more correct to purge based on the event date than the time the database row was written.
2. The new columns are indexed, which will speed up this purge script by orders of magnitude
* Updating db model to match new query optimizations
A few things here: 1. New schema version with a new index and several removed indexes
2. A new method in the migration script to drop old indexes
3. Added an INFO-level log message when a new index will be added, as this can take quite some time on a Raspberry Pi
2017-08-04 23:16:53 -07:00
|
|
|
|
|
|
|
# Remove indexes that were added in version 0
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "states", "states__state_changes")
|
|
|
|
_drop_index(session_maker, "states", "states__significant_changes")
|
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id_created")
|
2023-02-04 18:52:26 +01:00
|
|
|
# This used to create ix_states_entity_id_last_updated,
|
|
|
|
# but it was removed in version 32
|
2018-03-03 22:54:38 +01:00
|
|
|
elif new_version == 5:
|
|
|
|
# Create supporting index for States.event_id foreign key
|
2023-03-17 14:27:33 -10:00
|
|
|
_create_index(session_maker, "states", LEGACY_STATES_EVENT_ID_INDEX)
|
2018-08-10 18:09:01 +02:00
|
|
|
elif new_version == 6:
|
2019-07-31 12:25:30 -07:00
|
|
|
_add_columns(
|
2022-05-04 12:22:50 -05:00
|
|
|
session_maker,
|
2019-07-31 12:25:30 -07:00
|
|
|
"events",
|
|
|
|
["context_id CHARACTER(36)", "context_user_id CHARACTER(36)"],
|
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "events", "ix_events_context_id")
|
2023-02-04 18:52:26 +01:00
|
|
|
# This used to create ix_events_context_user_id,
|
|
|
|
# but it was removed in version 28
|
2019-07-31 12:25:30 -07:00
|
|
|
_add_columns(
|
2022-05-04 12:22:50 -05:00
|
|
|
session_maker,
|
2019-07-31 12:25:30 -07:00
|
|
|
"states",
|
|
|
|
["context_id CHARACTER(36)", "context_user_id CHARACTER(36)"],
|
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "states", "ix_states_context_id")
|
2023-02-04 18:52:26 +01:00
|
|
|
# This used to create ix_states_context_user_id,
|
|
|
|
# but it was removed in version 28
|
2018-11-19 10:36:00 +01:00
|
|
|
elif new_version == 7:
|
2023-02-04 18:52:26 +01:00
|
|
|
# There used to be a ix_states_entity_id index here,
|
|
|
|
# but it was removed in later schema
|
2023-01-29 16:33:23 -10:00
|
|
|
pass
|
2019-03-01 10:08:38 -08:00
|
|
|
elif new_version == 8:
|
2022-05-04 12:22:50 -05:00
|
|
|
_add_columns(session_maker, "events", ["context_parent_id CHARACTER(36)"])
|
|
|
|
_add_columns(session_maker, "states", ["old_state_id INTEGER"])
|
2023-02-04 18:52:26 +01:00
|
|
|
# This used to create ix_events_context_parent_id,
|
|
|
|
# but it was removed in version 28
|
2020-06-23 12:57:52 -05:00
|
|
|
elif new_version == 9:
|
|
|
|
# We now get the context from events with a join
|
|
|
|
# since its always there on state_changed events
|
|
|
|
#
|
|
|
|
# Ideally we would drop the columns from the states
|
|
|
|
# table as well but sqlite doesn't support that
|
|
|
|
# and we would have to move to something like
|
|
|
|
# sqlalchemy alembic to make that work
|
|
|
|
#
|
2022-05-02 02:10:34 -05:00
|
|
|
# no longer dropping ix_states_context_id since its recreated in 28
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "states", "ix_states_context_user_id")
|
2020-06-24 11:56:01 -05:00
|
|
|
# This index won't be there if they were not running
|
|
|
|
# nightly but we don't treat that as a critical issue
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "states", "ix_states_context_parent_id")
|
2020-06-23 12:57:52 -05:00
|
|
|
# Redundant keys on composite index:
|
|
|
|
# We already have ix_states_entity_id_last_updated
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id")
|
2023-02-04 18:52:26 +01:00
|
|
|
# This used to create ix_events_event_type_time_fired,
|
|
|
|
# but it was removed in version 32
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "events", "ix_events_event_type")
|
2020-11-28 19:42:29 +01:00
|
|
|
elif new_version == 10:
|
2021-01-03 23:51:44 -10:00
|
|
|
# Now done in step 11
|
|
|
|
pass
|
|
|
|
elif new_version == 11:
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "states", "ix_states_old_state_id")
|
|
|
|
_update_states_table_with_foreign_key_options(session_maker, engine)
|
2021-03-10 19:12:58 +01:00
|
|
|
elif new_version == 12:
|
2022-05-07 23:02:54 -05:00
|
|
|
if engine.dialect.name == SupportedDialect.MYSQL:
|
2022-05-04 12:22:50 -05:00
|
|
|
_modify_columns(session_maker, engine, "events", ["event_data LONGTEXT"])
|
|
|
|
_modify_columns(session_maker, engine, "states", ["attributes LONGTEXT"])
|
2021-04-07 09:13:55 +02:00
|
|
|
elif new_version == 13:
|
2022-05-07 23:02:54 -05:00
|
|
|
if engine.dialect.name == SupportedDialect.MYSQL:
|
2021-04-07 09:13:55 +02:00
|
|
|
_modify_columns(
|
2022-05-04 12:22:50 -05:00
|
|
|
session_maker,
|
2021-04-21 20:29:36 -10:00
|
|
|
engine,
|
|
|
|
"events",
|
|
|
|
["time_fired DATETIME(6)", "created DATETIME(6)"],
|
2021-04-07 09:13:55 +02:00
|
|
|
)
|
|
|
|
_modify_columns(
|
2022-05-04 12:22:50 -05:00
|
|
|
session_maker,
|
2021-04-07 09:13:55 +02:00
|
|
|
engine,
|
|
|
|
"states",
|
|
|
|
[
|
|
|
|
"last_changed DATETIME(6)",
|
|
|
|
"last_updated DATETIME(6)",
|
|
|
|
"created DATETIME(6)",
|
|
|
|
],
|
|
|
|
)
|
2021-04-08 19:08:49 +02:00
|
|
|
elif new_version == 14:
|
2022-05-04 12:22:50 -05:00
|
|
|
_modify_columns(session_maker, engine, "events", ["event_type VARCHAR(64)"])
|
2021-05-20 13:05:15 +02:00
|
|
|
elif new_version == 15:
|
2021-07-05 13:28:01 +02:00
|
|
|
# This dropped the statistics table, done again in version 18.
|
|
|
|
pass
|
2021-05-23 04:10:27 +02:00
|
|
|
elif new_version == 16:
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_foreign_key_constraints(
|
|
|
|
session_maker, engine, TABLE_STATES, ["old_state_id"]
|
|
|
|
)
|
2021-06-30 13:32:17 +02:00
|
|
|
elif new_version == 17:
|
2021-07-02 13:17:00 +02:00
|
|
|
# This dropped the statistics table, done again in version 18.
|
|
|
|
pass
|
|
|
|
elif new_version == 18:
|
2021-07-05 13:34:40 +02:00
|
|
|
# Recreate the statistics and statistics meta tables.
|
|
|
|
#
|
2021-10-06 13:29:42 +02:00
|
|
|
# Order matters! Statistics and StatisticsShortTerm have a relation with
|
|
|
|
# StatisticsMeta, so statistics need to be deleted before meta (or in pair
|
|
|
|
# depending on the SQL backend); and meta needs to be created before statistics.
|
2023-02-08 15:17:32 +01:00
|
|
|
|
|
|
|
# We need to cast __table__ to Table, explanation in
|
|
|
|
# https://github.com/sqlalchemy/sqlalchemy/issues/9130
|
2021-10-06 13:29:42 +02:00
|
|
|
Base.metadata.drop_all(
|
|
|
|
bind=engine,
|
|
|
|
tables=[
|
2023-02-08 15:17:32 +01:00
|
|
|
cast(Table, StatisticsShortTerm.__table__),
|
|
|
|
cast(Table, Statistics.__table__),
|
|
|
|
cast(Table, StatisticsMeta.__table__),
|
2021-10-06 13:29:42 +02:00
|
|
|
],
|
|
|
|
)
|
2021-07-02 20:55:40 +02:00
|
|
|
|
2023-02-08 15:17:32 +01:00
|
|
|
cast(Table, StatisticsMeta.__table__).create(engine)
|
|
|
|
cast(Table, StatisticsShortTerm.__table__).create(engine)
|
|
|
|
cast(Table, Statistics.__table__).create(engine)
|
2021-08-20 07:10:45 +02:00
|
|
|
elif new_version == 19:
|
|
|
|
# This adds the statistic runs table, insert a fake run to prevent duplicating
|
|
|
|
# statistics.
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-04 18:55:11 +01:00
|
|
|
session.add(StatisticsRuns(start=get_start_time()))
|
2021-08-24 11:18:59 +02:00
|
|
|
elif new_version == 20:
|
|
|
|
# This changed the precision of statistics from float to double
|
2022-05-07 23:02:54 -05:00
|
|
|
if engine.dialect.name in [SupportedDialect.MYSQL, SupportedDialect.POSTGRESQL]:
|
2021-08-24 11:18:59 +02:00
|
|
|
_modify_columns(
|
2022-05-04 12:22:50 -05:00
|
|
|
session_maker,
|
2021-08-24 11:18:59 +02:00
|
|
|
engine,
|
|
|
|
"statistics",
|
|
|
|
[
|
2023-03-22 10:05:23 -10:00
|
|
|
f"{column} {DOUBLE_PRECISION_TYPE_SQL}"
|
|
|
|
for column in ("max", "mean", "min", "state", "sum")
|
2021-08-24 11:18:59 +02:00
|
|
|
],
|
|
|
|
)
|
2021-09-09 08:35:53 +02:00
|
|
|
elif new_version == 21:
|
2021-09-09 17:24:20 +02:00
|
|
|
# Try to change the character set of the statistic_meta table
|
2022-05-07 23:02:54 -05:00
|
|
|
if engine.dialect.name == SupportedDialect.MYSQL:
|
2021-09-09 19:26:28 +02:00
|
|
|
for table in ("events", "states", "statistics_meta"):
|
2023-03-22 10:05:23 -10:00
|
|
|
_correct_table_character_set_and_collation(table, session_maker)
|
2021-09-21 00:38:42 +02:00
|
|
|
elif new_version == 22:
|
|
|
|
# Recreate the all statistics tables for Oracle DB with Identity columns
|
|
|
|
#
|
|
|
|
# Order matters! Statistics has a relation with StatisticsMeta,
|
|
|
|
# so statistics need to be deleted before meta (or in pair depending
|
|
|
|
# on the SQL backend); and meta needs to be created before statistics.
|
|
|
|
if engine.dialect.name == "oracle":
|
2023-02-08 15:17:32 +01:00
|
|
|
# We need to cast __table__ to Table, explanation in
|
|
|
|
# https://github.com/sqlalchemy/sqlalchemy/issues/9130
|
2021-10-06 13:29:42 +02:00
|
|
|
Base.metadata.drop_all(
|
|
|
|
bind=engine,
|
|
|
|
tables=[
|
2023-02-08 15:17:32 +01:00
|
|
|
cast(Table, StatisticsShortTerm.__table__),
|
|
|
|
cast(Table, Statistics.__table__),
|
|
|
|
cast(Table, StatisticsMeta.__table__),
|
|
|
|
cast(Table, StatisticsRuns.__table__),
|
2021-10-06 13:29:42 +02:00
|
|
|
],
|
|
|
|
)
|
2021-09-21 00:38:42 +02:00
|
|
|
|
2023-02-08 15:17:32 +01:00
|
|
|
cast(Table, StatisticsRuns.__table__).create(engine)
|
|
|
|
cast(Table, StatisticsMeta.__table__).create(engine)
|
|
|
|
cast(Table, StatisticsShortTerm.__table__).create(engine)
|
|
|
|
cast(Table, Statistics.__table__).create(engine)
|
2021-09-24 09:19:22 +02:00
|
|
|
|
|
|
|
# Block 5-minute statistics for one hour from the last run, or it will overlap
|
|
|
|
# with existing hourly statistics. Don't block on a database with no existing
|
|
|
|
# statistics.
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-04 18:55:11 +01:00
|
|
|
if session.query(Statistics.id).count() and (
|
|
|
|
last_run_string := session.query(
|
2023-02-08 15:17:32 +01:00
|
|
|
# https://github.com/sqlalchemy/sqlalchemy/issues/9189
|
|
|
|
# pylint: disable-next=not-callable
|
2022-02-04 18:55:11 +01:00
|
|
|
func.max(StatisticsRuns.start)
|
|
|
|
).scalar()
|
|
|
|
):
|
|
|
|
last_run_start_time = process_timestamp(last_run_string)
|
|
|
|
if last_run_start_time:
|
|
|
|
fake_start_time = last_run_start_time + timedelta(minutes=5)
|
|
|
|
while fake_start_time < last_run_start_time + timedelta(hours=1):
|
|
|
|
session.add(StatisticsRuns(start=fake_start_time))
|
|
|
|
fake_start_time += timedelta(minutes=5)
|
2021-09-24 09:19:22 +02:00
|
|
|
|
2021-11-04 10:29:10 +01:00
|
|
|
# When querying the database, be careful to only explicitly query for columns
|
2022-09-09 08:06:14 +02:00
|
|
|
# which were present in schema version 22. If querying the table, SQLAlchemy
|
2021-11-04 10:29:10 +01:00
|
|
|
# will refer to future columns.
|
2022-05-04 12:22:50 -05:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2022-02-04 18:55:11 +01:00
|
|
|
for sum_statistic in session.query(StatisticsMeta.id).filter_by(
|
|
|
|
has_sum=true()
|
|
|
|
):
|
|
|
|
last_statistic = (
|
|
|
|
session.query(
|
|
|
|
Statistics.start,
|
|
|
|
Statistics.last_reset,
|
|
|
|
Statistics.state,
|
|
|
|
Statistics.sum,
|
2021-09-24 09:19:22 +02:00
|
|
|
)
|
2022-02-04 18:55:11 +01:00
|
|
|
.filter_by(metadata_id=sum_statistic.id)
|
|
|
|
.order_by(Statistics.start.desc())
|
|
|
|
.first()
|
2021-09-24 09:19:22 +02:00
|
|
|
)
|
2022-02-04 18:55:11 +01:00
|
|
|
if last_statistic:
|
|
|
|
session.add(
|
|
|
|
StatisticsShortTerm(
|
|
|
|
metadata_id=sum_statistic.id,
|
|
|
|
start=last_statistic.start,
|
|
|
|
last_reset=last_statistic.last_reset,
|
|
|
|
state=last_statistic.state,
|
|
|
|
sum=last_statistic.sum,
|
|
|
|
)
|
|
|
|
)
|
2021-10-26 10:26:50 +02:00
|
|
|
elif new_version == 23:
|
|
|
|
# Add name column to StatisticsMeta
|
2022-05-04 12:22:50 -05:00
|
|
|
_add_columns(session_maker, "statistics_meta", ["name VARCHAR(255)"])
|
2021-12-13 14:15:36 +01:00
|
|
|
elif new_version == 24:
|
2023-02-24 11:41:44 -06:00
|
|
|
# This used to create the unique indices for start and statistic_id
|
|
|
|
# but we changed the format in schema 34 which will now take care
|
|
|
|
# of removing any duplicate if they still exist.
|
|
|
|
pass
|
2022-03-18 00:23:13 -10:00
|
|
|
elif new_version == 25:
|
2023-03-21 15:06:10 -10:00
|
|
|
_add_columns(
|
|
|
|
session_maker, "states", [f"attributes_id {_column_types.big_int_type}"]
|
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "states", "ix_states_attributes_id")
|
2022-04-26 10:11:57 -10:00
|
|
|
elif new_version == 26:
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "statistics_runs", "ix_statistics_runs_start")
|
2022-05-01 21:01:17 -05:00
|
|
|
elif new_version == 27:
|
2023-03-21 15:06:10 -10:00
|
|
|
_add_columns(session_maker, "events", [f"data_id {_column_types.big_int_type}"])
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "events", "ix_events_data_id")
|
2022-05-02 02:10:34 -05:00
|
|
|
elif new_version == 28:
|
2022-05-04 12:22:50 -05:00
|
|
|
_add_columns(session_maker, "events", ["origin_idx INTEGER"])
|
2022-05-02 02:10:34 -05:00
|
|
|
# We never use the user_id or parent_id index
|
2022-05-04 12:22:50 -05:00
|
|
|
_drop_index(session_maker, "events", "ix_events_context_user_id")
|
|
|
|
_drop_index(session_maker, "events", "ix_events_context_parent_id")
|
2022-05-02 02:10:34 -05:00
|
|
|
_add_columns(
|
2022-05-04 12:22:50 -05:00
|
|
|
session_maker,
|
2022-05-02 02:10:34 -05:00
|
|
|
"states",
|
|
|
|
[
|
|
|
|
"origin_idx INTEGER",
|
|
|
|
"context_id VARCHAR(36)",
|
|
|
|
"context_user_id VARCHAR(36)",
|
|
|
|
"context_parent_id VARCHAR(36)",
|
|
|
|
],
|
|
|
|
)
|
2022-05-04 12:22:50 -05:00
|
|
|
_create_index(session_maker, "states", "ix_states_context_id")
|
2022-05-02 02:10:34 -05:00
|
|
|
# Once there are no longer any state_changed events
|
|
|
|
# in the events table we can drop the index on states.event_id
|
2022-05-24 15:34:46 +02:00
|
|
|
elif new_version == 29:
|
|
|
|
# Recreate statistics_meta index to block duplicated statistic_id
|
|
|
|
_drop_index(session_maker, "statistics_meta", "ix_statistics_meta_statistic_id")
|
2022-05-31 11:35:28 -10:00
|
|
|
if engine.dialect.name == SupportedDialect.MYSQL:
|
|
|
|
# Ensure the row format is dynamic or the index
|
|
|
|
# unique will be too large
|
2023-01-27 11:52:49 +01:00
|
|
|
with contextlib.suppress(SQLAlchemyError), session_scope(
|
|
|
|
session=session_maker()
|
|
|
|
) as session:
|
|
|
|
connection = session.connection()
|
|
|
|
# This is safe to run multiple times and fast
|
|
|
|
# since the table is small.
|
|
|
|
connection.execute(
|
|
|
|
text("ALTER TABLE statistics_meta ROW_FORMAT=DYNAMIC")
|
|
|
|
)
|
2022-05-24 15:34:46 +02:00
|
|
|
try:
|
|
|
|
_create_index(
|
|
|
|
session_maker, "statistics_meta", "ix_statistics_meta_statistic_id"
|
|
|
|
)
|
|
|
|
except DatabaseError:
|
|
|
|
# There may be duplicated statistics_meta entries, delete duplicates
|
|
|
|
# and try again
|
|
|
|
with session_scope(session=session_maker()) as session:
|
2023-03-16 19:00:02 -10:00
|
|
|
delete_statistics_meta_duplicates(instance, session)
|
2022-05-24 15:34:46 +02:00
|
|
|
_create_index(
|
|
|
|
session_maker, "statistics_meta", "ix_statistics_meta_statistic_id"
|
|
|
|
)
|
2022-09-15 18:01:24 +02:00
|
|
|
elif new_version == 30:
|
2022-10-01 18:55:00 +02:00
|
|
|
# This added a column to the statistics_meta table, removed again before
|
|
|
|
# release of HA Core 2022.10.0
|
2022-10-03 11:40:11 +02:00
|
|
|
# SQLite 3.31.0 does not support dropping columns.
|
|
|
|
# Once we require SQLite >= 3.35.5, we should drop the column:
|
|
|
|
# ALTER TABLE statistics_meta DROP COLUMN state_unit_of_measurement
|
2022-10-01 18:55:00 +02:00
|
|
|
pass
|
2023-01-02 13:26:08 -10:00
|
|
|
elif new_version == 31:
|
|
|
|
# Once we require SQLite >= 3.35.5, we should drop the column:
|
|
|
|
# ALTER TABLE events DROP COLUMN time_fired
|
|
|
|
# ALTER TABLE states DROP COLUMN last_updated
|
|
|
|
# ALTER TABLE states DROP COLUMN last_changed
|
2023-03-21 15:06:10 -10:00
|
|
|
_add_columns(
|
|
|
|
session_maker, "events", [f"time_fired_ts {_column_types.timestamp_type}"]
|
|
|
|
)
|
2023-01-02 13:26:08 -10:00
|
|
|
_add_columns(
|
|
|
|
session_maker,
|
|
|
|
"states",
|
2023-03-21 15:06:10 -10:00
|
|
|
[
|
|
|
|
f"last_updated_ts {_column_types.timestamp_type}",
|
|
|
|
f"last_changed_ts {_column_types.timestamp_type}",
|
|
|
|
],
|
2023-01-02 13:26:08 -10:00
|
|
|
)
|
|
|
|
_create_index(session_maker, "events", "ix_events_time_fired_ts")
|
|
|
|
_create_index(session_maker, "events", "ix_events_event_type_time_fired_ts")
|
|
|
|
_create_index(session_maker, "states", "ix_states_entity_id_last_updated_ts")
|
|
|
|
_create_index(session_maker, "states", "ix_states_last_updated_ts")
|
2023-01-27 17:39:45 -10:00
|
|
|
_migrate_columns_to_timestamp(session_maker, engine)
|
2023-01-02 13:26:08 -10:00
|
|
|
elif new_version == 32:
|
|
|
|
# Migration is done in two steps to ensure we can start using
|
|
|
|
# the new columns before we wipe the old ones.
|
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id_last_updated")
|
|
|
|
_drop_index(session_maker, "events", "ix_events_event_type_time_fired")
|
|
|
|
_drop_index(session_maker, "states", "ix_states_last_updated")
|
|
|
|
_drop_index(session_maker, "events", "ix_events_time_fired")
|
2023-01-29 16:33:23 -10:00
|
|
|
elif new_version == 33:
|
|
|
|
# This index is no longer used and can cause MySQL to use the wrong index
|
|
|
|
# when querying the states table.
|
|
|
|
# https://github.com/home-assistant/core/issues/83787
|
2023-03-12 14:07:05 -10:00
|
|
|
# There was an index cleanup here but its now done in schema 39
|
|
|
|
pass
|
2023-02-09 12:24:19 -06:00
|
|
|
elif new_version == 34:
|
|
|
|
# Once we require SQLite >= 3.35.5, we should drop the columns:
|
|
|
|
# ALTER TABLE statistics DROP COLUMN created
|
|
|
|
# ALTER TABLE statistics DROP COLUMN start
|
|
|
|
# ALTER TABLE statistics DROP COLUMN last_reset
|
|
|
|
# ALTER TABLE statistics_short_term DROP COLUMN created
|
|
|
|
# ALTER TABLE statistics_short_term DROP COLUMN start
|
|
|
|
# ALTER TABLE statistics_short_term DROP COLUMN last_reset
|
|
|
|
_add_columns(
|
|
|
|
session_maker,
|
|
|
|
"statistics",
|
|
|
|
[
|
2023-03-21 15:06:10 -10:00
|
|
|
f"created_ts {_column_types.timestamp_type}",
|
|
|
|
f"start_ts {_column_types.timestamp_type}",
|
|
|
|
f"last_reset_ts {_column_types.timestamp_type}",
|
2023-02-09 12:24:19 -06:00
|
|
|
],
|
|
|
|
)
|
|
|
|
_add_columns(
|
|
|
|
session_maker,
|
|
|
|
"statistics_short_term",
|
|
|
|
[
|
2023-03-21 15:06:10 -10:00
|
|
|
f"created_ts {_column_types.timestamp_type}",
|
|
|
|
f"start_ts {_column_types.timestamp_type}",
|
|
|
|
f"last_reset_ts {_column_types.timestamp_type}",
|
2023-02-09 12:24:19 -06:00
|
|
|
],
|
|
|
|
)
|
|
|
|
_create_index(session_maker, "statistics", "ix_statistics_start_ts")
|
|
|
|
_create_index(
|
|
|
|
session_maker, "statistics", "ix_statistics_statistic_id_start_ts"
|
|
|
|
)
|
|
|
|
_create_index(
|
|
|
|
session_maker, "statistics_short_term", "ix_statistics_short_term_start_ts"
|
|
|
|
)
|
|
|
|
_create_index(
|
|
|
|
session_maker,
|
|
|
|
"statistics_short_term",
|
|
|
|
"ix_statistics_short_term_statistic_id_start_ts",
|
|
|
|
)
|
2023-02-24 11:41:44 -06:00
|
|
|
try:
|
|
|
|
_migrate_statistics_columns_to_timestamp(session_maker, engine)
|
|
|
|
except IntegrityError as ex:
|
|
|
|
_LOGGER.error(
|
|
|
|
"Statistics table contains duplicate entries: %s; "
|
|
|
|
"Cleaning up duplicates and trying again; "
|
|
|
|
"This will take a while; "
|
|
|
|
"Please be patient!",
|
|
|
|
ex,
|
|
|
|
)
|
|
|
|
# There may be duplicated statistics entries, delete duplicates
|
|
|
|
# and try again
|
|
|
|
with session_scope(session=session_maker()) as session:
|
2023-03-03 17:00:13 -10:00
|
|
|
delete_statistics_duplicates(instance, hass, session)
|
2023-02-24 11:41:44 -06:00
|
|
|
_migrate_statistics_columns_to_timestamp(session_maker, engine)
|
|
|
|
# Log at error level to ensure the user sees this message in the log
|
|
|
|
# since we logged the error above.
|
|
|
|
_LOGGER.error(
|
|
|
|
"Statistics migration successfully recovered after statistics table duplicate cleanup"
|
|
|
|
)
|
2023-02-09 12:24:19 -06:00
|
|
|
elif new_version == 35:
|
|
|
|
# Migration is done in two steps to ensure we can start using
|
|
|
|
# the new columns before we wipe the old ones.
|
2023-03-12 14:07:05 -10:00
|
|
|
_drop_index(
|
|
|
|
session_maker, "statistics", "ix_statistics_statistic_id_start", quiet=True
|
|
|
|
)
|
2023-02-09 12:24:19 -06:00
|
|
|
_drop_index(
|
|
|
|
session_maker,
|
|
|
|
"statistics_short_term",
|
|
|
|
"ix_statistics_short_term_statistic_id_start",
|
2023-03-12 14:07:05 -10:00
|
|
|
quiet=True,
|
2023-02-09 12:24:19 -06:00
|
|
|
)
|
|
|
|
# ix_statistics_start and ix_statistics_statistic_id_start are still used
|
|
|
|
# for the post migration cleanup and can be removed in a future version.
|
2023-03-08 14:51:45 -10:00
|
|
|
elif new_version == 36:
|
|
|
|
for table in ("states", "events"):
|
|
|
|
_add_columns(
|
|
|
|
session_maker,
|
|
|
|
table,
|
|
|
|
[
|
2023-03-21 15:06:10 -10:00
|
|
|
f"context_id_bin {_column_types.context_bin_type}",
|
|
|
|
f"context_user_id_bin {_column_types.context_bin_type}",
|
|
|
|
f"context_parent_id_bin {_column_types.context_bin_type}",
|
2023-03-08 14:51:45 -10:00
|
|
|
],
|
|
|
|
)
|
|
|
|
_create_index(session_maker, "events", "ix_events_context_id_bin")
|
|
|
|
_create_index(session_maker, "states", "ix_states_context_id_bin")
|
2023-03-11 09:54:55 -10:00
|
|
|
elif new_version == 37:
|
2023-03-21 15:06:10 -10:00
|
|
|
_add_columns(
|
|
|
|
session_maker, "events", [f"event_type_id {_column_types.big_int_type}"]
|
|
|
|
)
|
2023-03-11 09:54:55 -10:00
|
|
|
_create_index(session_maker, "events", "ix_events_event_type_id")
|
|
|
|
_drop_index(session_maker, "events", "ix_events_event_type_time_fired_ts")
|
|
|
|
_create_index(session_maker, "events", "ix_events_event_type_id_time_fired_ts")
|
2023-03-12 10:01:58 -10:00
|
|
|
elif new_version == 38:
|
2023-03-21 15:06:10 -10:00
|
|
|
_add_columns(
|
|
|
|
session_maker, "states", [f"metadata_id {_column_types.big_int_type}"]
|
|
|
|
)
|
2023-03-12 10:01:58 -10:00
|
|
|
_create_index(session_maker, "states", "ix_states_metadata_id")
|
|
|
|
_create_index(session_maker, "states", "ix_states_metadata_id_last_updated_ts")
|
2023-03-12 14:07:05 -10:00
|
|
|
elif new_version == 39:
|
|
|
|
# Dropping indexes with PostgreSQL never worked correctly if there was a prefix
|
|
|
|
# so we need to cleanup leftover indexes.
|
|
|
|
_drop_index(
|
|
|
|
session_maker, "events", "ix_events_event_type_time_fired_ts", quiet=True
|
|
|
|
)
|
|
|
|
_drop_index(session_maker, "events", "ix_events_event_type", quiet=True)
|
|
|
|
_drop_index(
|
|
|
|
session_maker, "events", "ix_events_event_type_time_fired", quiet=True
|
|
|
|
)
|
|
|
|
_drop_index(session_maker, "events", "ix_events_time_fired", quiet=True)
|
|
|
|
_drop_index(session_maker, "events", "ix_events_context_user_id", quiet=True)
|
|
|
|
_drop_index(session_maker, "events", "ix_events_context_parent_id", quiet=True)
|
|
|
|
_drop_index(
|
|
|
|
session_maker, "states", "ix_states_entity_id_last_updated", quiet=True
|
|
|
|
)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_last_updated", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_context_user_id", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_context_parent_id", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_created_domain", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id_created", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "states__state_changes", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "states__significant_changes", quiet=True)
|
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id_created", quiet=True)
|
|
|
|
_drop_index(
|
|
|
|
session_maker, "statistics", "ix_statistics_statistic_id_start", quiet=True
|
|
|
|
)
|
|
|
|
_drop_index(
|
|
|
|
session_maker,
|
|
|
|
"statistics_short_term",
|
|
|
|
"ix_statistics_short_term_statistic_id_start",
|
|
|
|
quiet=True,
|
|
|
|
)
|
2023-03-12 22:24:57 -10:00
|
|
|
elif new_version == 40:
|
|
|
|
# ix_events_event_type_id is a left-prefix of ix_events_event_type_id_time_fired_ts
|
|
|
|
_drop_index(session_maker, "events", "ix_events_event_type_id")
|
|
|
|
# ix_states_metadata_id is a left-prefix of ix_states_metadata_id_last_updated_ts
|
|
|
|
_drop_index(session_maker, "states", "ix_states_metadata_id")
|
|
|
|
# ix_statistics_metadata_id is a left-prefix of ix_statistics_statistic_id_start_ts
|
|
|
|
_drop_index(session_maker, "statistics", "ix_statistics_metadata_id")
|
|
|
|
# ix_statistics_short_term_metadata_id is a left-prefix of ix_statistics_short_term_statistic_id_start_ts
|
|
|
|
_drop_index(
|
|
|
|
session_maker,
|
|
|
|
"statistics_short_term",
|
|
|
|
"ix_statistics_short_term_metadata_id",
|
|
|
|
)
|
2023-03-13 23:09:21 -10:00
|
|
|
elif new_version == 41:
|
|
|
|
_create_index(session_maker, "event_types", "ix_event_types_event_type")
|
|
|
|
_create_index(session_maker, "states_meta", "ix_states_meta_entity_id")
|
2017-02-26 14:38:06 -08:00
|
|
|
else:
|
2019-08-23 18:53:33 +02:00
|
|
|
raise ValueError(f"No schema migration defined for version {new_version}")
|
2017-02-26 14:38:06 -08:00
|
|
|
|
|
|
|
|
2023-03-22 10:05:23 -10:00
|
|
|
def _correct_table_character_set_and_collation(
|
|
|
|
table: str,
|
|
|
|
session_maker: Callable[[], Session],
|
|
|
|
) -> None:
|
|
|
|
"""Correct issues detected by validate_db_schema."""
|
|
|
|
# Attempt to convert the table to utf8mb4
|
|
|
|
_LOGGER.warning(
|
|
|
|
"Updating character set and collation of table %s to utf8mb4. "
|
|
|
|
"Note: this can take several minutes on large databases and slow "
|
|
|
|
"computers. Please be patient!",
|
|
|
|
table,
|
|
|
|
)
|
|
|
|
with contextlib.suppress(SQLAlchemyError), session_scope(
|
|
|
|
session=session_maker()
|
|
|
|
) as session:
|
|
|
|
connection = session.connection()
|
|
|
|
connection.execute(
|
|
|
|
# Using LOCK=EXCLUSIVE to prevent the database from corrupting
|
|
|
|
# https://github.com/home-assistant/core/issues/56104
|
|
|
|
text(
|
|
|
|
f"ALTER TABLE {table} CONVERT TO CHARACTER SET "
|
|
|
|
f"{MYSQL_DEFAULT_CHARSET} "
|
|
|
|
f"COLLATE {MYSQL_COLLATE}, LOCK=EXCLUSIVE"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-01-02 13:26:08 -10:00
|
|
|
def post_schema_migration(
|
2023-02-09 12:24:19 -06:00
|
|
|
instance: Recorder,
|
2023-01-02 13:26:08 -10:00
|
|
|
old_version: int,
|
|
|
|
new_version: int,
|
|
|
|
) -> None:
|
|
|
|
"""Post schema migration.
|
|
|
|
|
|
|
|
Run any housekeeping tasks after the schema migration has completed.
|
|
|
|
|
|
|
|
Post schema migration is run after the schema migration has completed
|
|
|
|
and the queue has been processed to ensure that we reduce the memory
|
|
|
|
pressure since events are held in memory until the queue is processed
|
|
|
|
which is blocked from being processed until the schema migration is
|
|
|
|
complete.
|
|
|
|
"""
|
|
|
|
if old_version < 32 <= new_version:
|
|
|
|
# In version 31 we migrated all the time_fired, last_updated, and last_changed
|
|
|
|
# columns to be timestamps. In version 32 we need to wipe the old columns
|
|
|
|
# since they are no longer used and take up a significant amount of space.
|
2023-02-09 12:24:19 -06:00
|
|
|
assert instance.event_session is not None
|
|
|
|
assert instance.engine is not None
|
2023-03-03 17:00:13 -10:00
|
|
|
_wipe_old_string_time_columns(instance, instance.engine, instance.event_session)
|
2023-02-09 12:24:19 -06:00
|
|
|
if old_version < 35 <= new_version:
|
|
|
|
# In version 34 we migrated all the created, start, and last_reset
|
|
|
|
# columns to be timestamps. In version 34 we need to wipe the old columns
|
|
|
|
# since they are no longer used and take up a significant amount of space.
|
|
|
|
_wipe_old_string_statistics_columns(instance)
|
|
|
|
|
|
|
|
|
|
|
|
def _wipe_old_string_statistics_columns(instance: Recorder) -> None:
|
|
|
|
"""Wipe old string statistics columns to save space."""
|
|
|
|
instance.queue_task(StatisticsTimestampMigrationCleanupTask())
|
2023-01-02 13:26:08 -10:00
|
|
|
|
|
|
|
|
2023-03-03 17:00:13 -10:00
|
|
|
@database_job_retry_wrapper("Wipe old string time columns", 3)
|
|
|
|
def _wipe_old_string_time_columns(
|
|
|
|
instance: Recorder, engine: Engine, session: Session
|
|
|
|
) -> None:
|
2023-01-02 13:26:08 -10:00
|
|
|
"""Wipe old string time columns to save space."""
|
|
|
|
# Wipe Events.time_fired since its been replaced by Events.time_fired_ts
|
|
|
|
# Wipe States.last_updated since its been replaced by States.last_updated_ts
|
|
|
|
# Wipe States.last_changed since its been replaced by States.last_changed_ts
|
2023-01-27 17:39:45 -10:00
|
|
|
#
|
|
|
|
if engine.dialect.name == SupportedDialect.SQLITE:
|
|
|
|
session.execute(text("UPDATE events set time_fired=NULL;"))
|
|
|
|
session.commit()
|
|
|
|
session.execute(text("UPDATE states set last_updated=NULL, last_changed=NULL;"))
|
|
|
|
session.commit()
|
|
|
|
elif engine.dialect.name == SupportedDialect.MYSQL:
|
|
|
|
#
|
|
|
|
# Since this is only to save space we limit the number of rows we update
|
|
|
|
# to 10,000,000 per table since we do not want to block the database for too long
|
|
|
|
# or run out of innodb_buffer_pool_size on MySQL. The old data will eventually
|
|
|
|
# be cleaned up by the recorder purge if we do not do it now.
|
|
|
|
#
|
|
|
|
session.execute(text("UPDATE events set time_fired=NULL LIMIT 10000000;"))
|
|
|
|
session.commit()
|
|
|
|
session.execute(
|
|
|
|
text(
|
|
|
|
"UPDATE states set last_updated=NULL, last_changed=NULL "
|
|
|
|
" LIMIT 10000000;"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
session.commit()
|
|
|
|
elif engine.dialect.name == SupportedDialect.POSTGRESQL:
|
|
|
|
#
|
|
|
|
# Since this is only to save space we limit the number of rows we update
|
|
|
|
# to 250,000 per table since we do not want to block the database for too long
|
|
|
|
# or run out ram with postgresql. The old data will eventually
|
|
|
|
# be cleaned up by the recorder purge if we do not do it now.
|
|
|
|
#
|
|
|
|
session.execute(
|
|
|
|
text(
|
|
|
|
"UPDATE events set time_fired=NULL "
|
|
|
|
"where event_id in "
|
|
|
|
"(select event_id from events where time_fired_ts is NOT NULL LIMIT 250000);"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
session.commit()
|
|
|
|
session.execute(
|
|
|
|
text(
|
|
|
|
"UPDATE states set last_updated=NULL, last_changed=NULL "
|
|
|
|
"where state_id in "
|
|
|
|
"(select state_id from states where last_updated_ts is NOT NULL LIMIT 250000);"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
session.commit()
|
2023-01-02 13:26:08 -10:00
|
|
|
|
|
|
|
|
|
|
|
def _migrate_columns_to_timestamp(
|
2023-01-27 17:39:45 -10:00
|
|
|
session_maker: Callable[[], Session], engine: Engine
|
2023-01-02 13:26:08 -10:00
|
|
|
) -> None:
|
|
|
|
"""Migrate columns to use timestamp."""
|
|
|
|
# Migrate all data in Events.time_fired to Events.time_fired_ts
|
|
|
|
# Migrate all data in States.last_updated to States.last_updated_ts
|
|
|
|
# Migrate all data in States.last_changed to States.last_changed_ts
|
2023-01-27 17:39:45 -10:00
|
|
|
result: CursorResult | None = None
|
2023-01-02 13:26:08 -10:00
|
|
|
if engine.dialect.name == SupportedDialect.SQLITE:
|
2023-01-27 17:39:45 -10:00
|
|
|
# With SQLite we do this in one go since it is faster
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
connection = session.connection()
|
|
|
|
connection.execute(
|
|
|
|
text(
|
|
|
|
'UPDATE events set time_fired_ts=strftime("%s",time_fired) + '
|
|
|
|
"cast(substr(time_fired,-7) AS FLOAT);"
|
|
|
|
)
|
2023-01-02 13:26:08 -10:00
|
|
|
)
|
2023-01-27 17:39:45 -10:00
|
|
|
connection.execute(
|
|
|
|
text(
|
|
|
|
'UPDATE states set last_updated_ts=strftime("%s",last_updated) + '
|
|
|
|
"cast(substr(last_updated,-7) AS FLOAT), "
|
|
|
|
'last_changed_ts=strftime("%s",last_changed) + '
|
|
|
|
"cast(substr(last_changed,-7) AS FLOAT);"
|
|
|
|
)
|
2023-01-02 13:26:08 -10:00
|
|
|
)
|
|
|
|
elif engine.dialect.name == SupportedDialect.MYSQL:
|
2023-01-27 17:39:45 -10:00
|
|
|
# With MySQL we do this in chunks to avoid hitting the `innodb_buffer_pool_size` limit
|
|
|
|
# We also need to do this in a loop since we can't be sure that we have
|
|
|
|
# updated all rows in the table until the rowcount is 0
|
|
|
|
while result is None or result.rowcount > 0:
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.connection().execute(
|
|
|
|
text(
|
|
|
|
"UPDATE events set time_fired_ts="
|
2023-03-09 16:03:41 -10:00
|
|
|
"IF(time_fired is NULL or UNIX_TIMESTAMP(time_fired) is NULL,0,"
|
2023-02-09 11:39:45 -06:00
|
|
|
"UNIX_TIMESTAMP(time_fired)"
|
2023-01-28 17:06:07 -10:00
|
|
|
") "
|
2023-01-27 17:39:45 -10:00
|
|
|
"where time_fired_ts is NULL "
|
|
|
|
"LIMIT 250000;"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
result = None
|
2023-02-08 15:17:32 +01:00
|
|
|
while result is None or result.rowcount > 0: # type: ignore[unreachable]
|
2023-01-27 17:39:45 -10:00
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.connection().execute(
|
|
|
|
text(
|
|
|
|
"UPDATE states set last_updated_ts="
|
2023-03-09 16:03:41 -10:00
|
|
|
"IF(last_updated is NULL or UNIX_TIMESTAMP(last_updated) is NULL,0,"
|
2023-02-09 11:39:45 -06:00
|
|
|
"UNIX_TIMESTAMP(last_updated) "
|
2023-01-28 17:06:07 -10:00
|
|
|
"), "
|
|
|
|
"last_changed_ts="
|
2023-02-09 11:39:45 -06:00
|
|
|
"UNIX_TIMESTAMP(last_changed) "
|
2023-01-27 17:39:45 -10:00
|
|
|
"where last_updated_ts is NULL "
|
|
|
|
"LIMIT 250000;"
|
|
|
|
)
|
|
|
|
)
|
2023-01-02 13:26:08 -10:00
|
|
|
elif engine.dialect.name == SupportedDialect.POSTGRESQL:
|
2023-01-27 17:39:45 -10:00
|
|
|
# With Postgresql we do this in chunks to avoid using too much memory
|
|
|
|
# We also need to do this in a loop since we can't be sure that we have
|
|
|
|
# updated all rows in the table until the rowcount is 0
|
|
|
|
while result is None or result.rowcount > 0:
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.connection().execute(
|
|
|
|
text(
|
|
|
|
"UPDATE events SET "
|
|
|
|
"time_fired_ts= "
|
|
|
|
"(case when time_fired is NULL then 0 else EXTRACT(EPOCH FROM time_fired) end) "
|
|
|
|
"WHERE event_id IN ( "
|
|
|
|
"SELECT event_id FROM events where time_fired_ts is NULL LIMIT 250000 "
|
|
|
|
" );"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
result = None
|
2023-02-08 15:17:32 +01:00
|
|
|
while result is None or result.rowcount > 0: # type: ignore[unreachable]
|
2023-01-27 17:39:45 -10:00
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.connection().execute(
|
|
|
|
text(
|
|
|
|
"UPDATE states set last_updated_ts="
|
|
|
|
"(case when last_updated is NULL then 0 else EXTRACT(EPOCH FROM last_updated) end), "
|
|
|
|
"last_changed_ts=EXTRACT(EPOCH FROM last_changed) "
|
|
|
|
"where state_id IN ( "
|
|
|
|
"SELECT state_id FROM states where last_updated_ts is NULL LIMIT 250000 "
|
|
|
|
" );"
|
|
|
|
)
|
|
|
|
)
|
2023-01-02 13:26:08 -10:00
|
|
|
|
|
|
|
|
2023-02-09 12:24:19 -06:00
|
|
|
def _migrate_statistics_columns_to_timestamp(
|
|
|
|
session_maker: Callable[[], Session], engine: Engine
|
|
|
|
) -> None:
|
|
|
|
"""Migrate statistics columns to use timestamp."""
|
|
|
|
# Migrate all data in statistics.start to statistics.start_ts
|
|
|
|
# Migrate all data in statistics.created to statistics.created_ts
|
|
|
|
# Migrate all data in statistics.last_reset to statistics.last_reset_ts
|
|
|
|
# Migrate all data in statistics_short_term.start to statistics_short_term.start_ts
|
|
|
|
# Migrate all data in statistics_short_term.created to statistics_short_term.created_ts
|
|
|
|
# Migrate all data in statistics_short_term.last_reset to statistics_short_term.last_reset_ts
|
|
|
|
result: CursorResult | None = None
|
|
|
|
if engine.dialect.name == SupportedDialect.SQLITE:
|
|
|
|
# With SQLite we do this in one go since it is faster
|
|
|
|
for table in STATISTICS_TABLES:
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
session.connection().execute(
|
|
|
|
text(
|
|
|
|
f"UPDATE {table} set start_ts=strftime('%s',start) + "
|
|
|
|
"cast(substr(start,-7) AS FLOAT), "
|
|
|
|
f"created_ts=strftime('%s',created) + "
|
|
|
|
"cast(substr(created,-7) AS FLOAT), "
|
|
|
|
f"last_reset_ts=strftime('%s',last_reset) + "
|
|
|
|
"cast(substr(last_reset,-7) AS FLOAT);"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
elif engine.dialect.name == SupportedDialect.MYSQL:
|
|
|
|
# With MySQL we do this in chunks to avoid hitting the `innodb_buffer_pool_size` limit
|
|
|
|
# We also need to do this in a loop since we can't be sure that we have
|
|
|
|
# updated all rows in the table until the rowcount is 0
|
|
|
|
for table in STATISTICS_TABLES:
|
|
|
|
result = None
|
|
|
|
while result is None or result.rowcount > 0: # type: ignore[unreachable]
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.connection().execute(
|
|
|
|
text(
|
|
|
|
f"UPDATE {table} set start_ts="
|
2023-03-09 16:03:41 -10:00
|
|
|
"IF(start is NULL or UNIX_TIMESTAMP(start) is NULL,0,"
|
2023-02-09 12:24:19 -06:00
|
|
|
"UNIX_TIMESTAMP(start) "
|
|
|
|
"), "
|
|
|
|
"created_ts="
|
|
|
|
"UNIX_TIMESTAMP(created), "
|
|
|
|
"last_reset_ts="
|
|
|
|
"UNIX_TIMESTAMP(last_reset) "
|
|
|
|
"where start_ts is NULL "
|
2023-03-03 17:00:13 -10:00
|
|
|
"LIMIT 100000;"
|
2023-02-09 12:24:19 -06:00
|
|
|
)
|
|
|
|
)
|
|
|
|
elif engine.dialect.name == SupportedDialect.POSTGRESQL:
|
|
|
|
# With Postgresql we do this in chunks to avoid using too much memory
|
|
|
|
# We also need to do this in a loop since we can't be sure that we have
|
|
|
|
# updated all rows in the table until the rowcount is 0
|
|
|
|
for table in STATISTICS_TABLES:
|
|
|
|
result = None
|
|
|
|
while result is None or result.rowcount > 0: # type: ignore[unreachable]
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.connection().execute(
|
|
|
|
text(
|
|
|
|
f"UPDATE {table} set start_ts=" # nosec
|
|
|
|
"(case when start is NULL then 0 else EXTRACT(EPOCH FROM start) end), "
|
|
|
|
"created_ts=EXTRACT(EPOCH FROM created), "
|
|
|
|
"last_reset_ts=EXTRACT(EPOCH FROM last_reset) "
|
|
|
|
"where id IN ( "
|
2023-03-03 17:00:13 -10:00
|
|
|
f"SELECT id FROM {table} where start_ts is NULL LIMIT 100000 "
|
2023-02-09 12:24:19 -06:00
|
|
|
" );"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-03-08 14:51:45 -10:00
|
|
|
def _context_id_to_bytes(context_id: str | None) -> bytes | None:
|
|
|
|
"""Convert a context_id to bytes."""
|
|
|
|
if context_id is None:
|
|
|
|
return None
|
2023-03-30 14:53:47 -10:00
|
|
|
with contextlib.suppress(ValueError):
|
|
|
|
# There may be garbage in the context_id column
|
|
|
|
# from custom integrations that are not UUIDs or
|
|
|
|
# ULIDs that filled the column to the max length
|
|
|
|
# so we need to catch the ValueError and return
|
|
|
|
# None if it happens
|
|
|
|
if len(context_id) == 32:
|
|
|
|
return UUID(context_id).bytes
|
|
|
|
if len(context_id) == 26:
|
|
|
|
return ulid_to_bytes(context_id)
|
2023-03-08 14:51:45 -10:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
2023-03-12 15:41:48 -10:00
|
|
|
@retryable_database_job("migrate states context_ids to binary format")
|
|
|
|
def migrate_states_context_ids(instance: Recorder) -> bool:
|
|
|
|
"""Migrate states context_ids to use binary format."""
|
2023-03-08 14:51:45 -10:00
|
|
|
_to_bytes = _context_id_to_bytes
|
|
|
|
session_maker = instance.get_session
|
2023-03-12 15:41:48 -10:00
|
|
|
_LOGGER.debug("Migrating states context_ids to binary format")
|
2023-03-08 14:51:45 -10:00
|
|
|
with session_scope(session=session_maker()) as session:
|
2023-03-12 15:41:48 -10:00
|
|
|
if states := session.execute(find_states_context_ids_to_migrate()).all():
|
2023-03-08 14:51:45 -10:00
|
|
|
session.execute(
|
2023-03-12 15:41:48 -10:00
|
|
|
update(States),
|
2023-03-08 14:51:45 -10:00
|
|
|
[
|
|
|
|
{
|
2023-03-12 15:41:48 -10:00
|
|
|
"state_id": state_id,
|
2023-03-08 14:51:45 -10:00
|
|
|
"context_id": None,
|
|
|
|
"context_id_bin": _to_bytes(context_id) or _EMPTY_CONTEXT_ID,
|
|
|
|
"context_user_id": None,
|
|
|
|
"context_user_id_bin": _to_bytes(context_user_id),
|
|
|
|
"context_parent_id": None,
|
|
|
|
"context_parent_id_bin": _to_bytes(context_parent_id),
|
|
|
|
}
|
2023-03-12 15:41:48 -10:00
|
|
|
for state_id, context_id, context_user_id, context_parent_id in states
|
2023-03-08 14:51:45 -10:00
|
|
|
],
|
|
|
|
)
|
2023-03-12 15:41:48 -10:00
|
|
|
# If there is more work to do return False
|
|
|
|
# so that we can be called again
|
|
|
|
is_done = not states
|
|
|
|
|
|
|
|
if is_done:
|
|
|
|
_drop_index(session_maker, "states", "ix_states_context_id")
|
|
|
|
|
|
|
|
_LOGGER.debug("Migrating states context_ids to binary format: done=%s", is_done)
|
|
|
|
return is_done
|
|
|
|
|
|
|
|
|
|
|
|
@retryable_database_job("migrate events context_ids to binary format")
|
|
|
|
def migrate_events_context_ids(instance: Recorder) -> bool:
|
|
|
|
"""Migrate events context_ids to use binary format."""
|
|
|
|
_to_bytes = _context_id_to_bytes
|
|
|
|
session_maker = instance.get_session
|
|
|
|
_LOGGER.debug("Migrating context_ids to binary format")
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
if events := session.execute(find_events_context_ids_to_migrate()).all():
|
2023-03-08 14:51:45 -10:00
|
|
|
session.execute(
|
2023-03-12 15:41:48 -10:00
|
|
|
update(Events),
|
2023-03-08 14:51:45 -10:00
|
|
|
[
|
|
|
|
{
|
2023-03-12 15:41:48 -10:00
|
|
|
"event_id": event_id,
|
2023-03-08 14:51:45 -10:00
|
|
|
"context_id": None,
|
|
|
|
"context_id_bin": _to_bytes(context_id) or _EMPTY_CONTEXT_ID,
|
|
|
|
"context_user_id": None,
|
|
|
|
"context_user_id_bin": _to_bytes(context_user_id),
|
|
|
|
"context_parent_id": None,
|
|
|
|
"context_parent_id_bin": _to_bytes(context_parent_id),
|
|
|
|
}
|
2023-03-12 15:41:48 -10:00
|
|
|
for event_id, context_id, context_user_id, context_parent_id in events
|
2023-03-08 14:51:45 -10:00
|
|
|
],
|
|
|
|
)
|
|
|
|
# If there is more work to do return False
|
|
|
|
# so that we can be called again
|
2023-03-12 15:41:48 -10:00
|
|
|
is_done = not events
|
2023-03-08 14:51:45 -10:00
|
|
|
|
|
|
|
if is_done:
|
2023-03-12 14:07:05 -10:00
|
|
|
_drop_index(session_maker, "events", "ix_events_context_id")
|
2023-03-08 14:51:45 -10:00
|
|
|
|
2023-03-12 15:41:48 -10:00
|
|
|
_LOGGER.debug("Migrating events context_ids to binary format: done=%s", is_done)
|
2023-03-08 14:51:45 -10:00
|
|
|
return is_done
|
|
|
|
|
|
|
|
|
2023-03-12 15:41:48 -10:00
|
|
|
@retryable_database_job("migrate events event_types to event_type_ids")
|
2023-03-11 09:54:55 -10:00
|
|
|
def migrate_event_type_ids(instance: Recorder) -> bool:
|
|
|
|
"""Migrate event_type to event_type_ids."""
|
|
|
|
session_maker = instance.get_session
|
|
|
|
_LOGGER.debug("Migrating event_types")
|
|
|
|
event_type_manager = instance.event_type_manager
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
if events := session.execute(find_event_type_to_migrate()).all():
|
|
|
|
event_types = {event_type for _, event_type in events}
|
|
|
|
event_type_to_id = event_type_manager.get_many(event_types, session)
|
|
|
|
if missing_event_types := {
|
2023-03-12 10:01:58 -10:00
|
|
|
# We should never see see None for the event_Type in the events table
|
|
|
|
# but we need to be defensive so we don't fail the migration
|
|
|
|
# because of a bad event
|
|
|
|
_EMPTY_EVENT_TYPE if event_type is None else event_type
|
2023-03-11 09:54:55 -10:00
|
|
|
for event_type, event_id in event_type_to_id.items()
|
|
|
|
if event_id is None
|
|
|
|
}:
|
|
|
|
missing_db_event_types = [
|
|
|
|
EventTypes(event_type=event_type)
|
|
|
|
for event_type in missing_event_types
|
|
|
|
]
|
|
|
|
session.add_all(missing_db_event_types)
|
|
|
|
session.flush() # Assign ids
|
|
|
|
for db_event_type in missing_db_event_types:
|
|
|
|
# We cannot add the assigned ids to the event_type_manager
|
|
|
|
# because the commit could get rolled back
|
2023-03-12 10:01:58 -10:00
|
|
|
assert (
|
|
|
|
db_event_type.event_type is not None
|
|
|
|
), "event_type should never be None"
|
2023-03-11 09:54:55 -10:00
|
|
|
event_type_to_id[
|
|
|
|
db_event_type.event_type
|
|
|
|
] = db_event_type.event_type_id
|
|
|
|
|
|
|
|
session.execute(
|
|
|
|
update(Events),
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"event_id": event_id,
|
|
|
|
"event_type": None,
|
|
|
|
"event_type_id": event_type_to_id[event_type],
|
|
|
|
}
|
|
|
|
for event_id, event_type in events
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
# If there is more work to do return False
|
|
|
|
# so that we can be called again
|
|
|
|
is_done = not events
|
|
|
|
|
|
|
|
if is_done:
|
|
|
|
instance.event_type_manager.active = True
|
|
|
|
|
|
|
|
_LOGGER.debug("Migrating event_types done=%s", is_done)
|
|
|
|
return is_done
|
|
|
|
|
|
|
|
|
2023-03-12 15:41:48 -10:00
|
|
|
@retryable_database_job("migrate states entity_ids to states_meta")
|
2023-03-12 10:01:58 -10:00
|
|
|
def migrate_entity_ids(instance: Recorder) -> bool:
|
|
|
|
"""Migrate entity_ids to states_meta.
|
|
|
|
|
|
|
|
We do this in two steps because we need the history queries to work
|
|
|
|
while we are migrating.
|
|
|
|
|
|
|
|
1. Link the states to the states_meta table
|
|
|
|
2. Remove the entity_id column from the states table (in post_migrate_entity_ids)
|
|
|
|
"""
|
|
|
|
_LOGGER.debug("Migrating entity_ids")
|
|
|
|
states_meta_manager = instance.states_meta_manager
|
|
|
|
with session_scope(session=instance.get_session()) as session:
|
|
|
|
if states := session.execute(find_entity_ids_to_migrate()).all():
|
|
|
|
entity_ids = {entity_id for _, entity_id in states}
|
2023-03-15 02:54:02 -10:00
|
|
|
entity_id_to_metadata_id = states_meta_manager.get_many(
|
|
|
|
entity_ids, session, True
|
|
|
|
)
|
2023-03-12 10:01:58 -10:00
|
|
|
if missing_entity_ids := {
|
|
|
|
# We should never see _EMPTY_ENTITY_ID in the states table
|
|
|
|
# but we need to be defensive so we don't fail the migration
|
|
|
|
# because of a bad state
|
|
|
|
_EMPTY_ENTITY_ID if entity_id is None else entity_id
|
|
|
|
for entity_id, metadata_id in entity_id_to_metadata_id.items()
|
|
|
|
if metadata_id is None
|
|
|
|
}:
|
|
|
|
missing_states_metadata = [
|
|
|
|
StatesMeta(entity_id=entity_id) for entity_id in missing_entity_ids
|
|
|
|
]
|
|
|
|
session.add_all(missing_states_metadata)
|
|
|
|
session.flush() # Assign ids
|
|
|
|
for db_states_metadata in missing_states_metadata:
|
|
|
|
# We cannot add the assigned ids to the event_type_manager
|
|
|
|
# because the commit could get rolled back
|
|
|
|
assert (
|
|
|
|
db_states_metadata.entity_id is not None
|
|
|
|
), "entity_id should never be None"
|
|
|
|
entity_id_to_metadata_id[
|
|
|
|
db_states_metadata.entity_id
|
|
|
|
] = db_states_metadata.metadata_id
|
|
|
|
|
|
|
|
session.execute(
|
|
|
|
update(States),
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"state_id": state_id,
|
|
|
|
# We cannot set "entity_id": None yet since
|
|
|
|
# the history queries still need to work while the
|
|
|
|
# migration is in progress and we will do this in
|
|
|
|
# post_migrate_entity_ids
|
|
|
|
"metadata_id": entity_id_to_metadata_id[entity_id],
|
|
|
|
}
|
|
|
|
for state_id, entity_id in states
|
|
|
|
],
|
|
|
|
)
|
|
|
|
|
|
|
|
# If there is more work to do return False
|
|
|
|
# so that we can be called again
|
|
|
|
is_done = not states
|
|
|
|
|
|
|
|
_LOGGER.debug("Migrating entity_ids done=%s", is_done)
|
|
|
|
return is_done
|
|
|
|
|
|
|
|
|
2023-03-12 15:41:48 -10:00
|
|
|
@retryable_database_job("post migrate states entity_ids to states_meta")
|
2023-03-12 10:01:58 -10:00
|
|
|
def post_migrate_entity_ids(instance: Recorder) -> bool:
|
|
|
|
"""Remove old entity_id strings from states.
|
|
|
|
|
|
|
|
We cannot do this in migrate_entity_ids since the history queries
|
|
|
|
still need to work while the migration is in progress.
|
|
|
|
"""
|
|
|
|
session_maker = instance.get_session
|
|
|
|
_LOGGER.debug("Cleanup legacy entity_ids")
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
cursor_result = session.connection().execute(batch_cleanup_entity_ids())
|
|
|
|
is_done = not cursor_result or cursor_result.rowcount == 0
|
|
|
|
# If there is more work to do return False
|
|
|
|
# so that we can be called again
|
|
|
|
|
|
|
|
if is_done:
|
|
|
|
# Drop the old indexes since they are no longer needed
|
|
|
|
_drop_index(session_maker, "states", "ix_states_entity_id_last_updated_ts")
|
|
|
|
|
|
|
|
_LOGGER.debug("Cleanup legacy entity_ids done=%s", is_done)
|
|
|
|
return is_done
|
|
|
|
|
|
|
|
|
2023-03-17 14:27:33 -10:00
|
|
|
@retryable_database_job("cleanup_legacy_event_ids")
|
|
|
|
def cleanup_legacy_states_event_ids(instance: Recorder) -> bool:
|
|
|
|
"""Remove old event_id index from states.
|
|
|
|
|
|
|
|
We used to link states to events using the event_id column but we no
|
|
|
|
longer store state changed events in the events table.
|
|
|
|
|
|
|
|
If all old states have been purged and existing states are in the new
|
|
|
|
format we can drop the index since it can take up ~10MB per 1M rows.
|
|
|
|
"""
|
|
|
|
session_maker = instance.get_session
|
|
|
|
_LOGGER.debug("Cleanup legacy entity_ids")
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
result = session.execute(has_used_states_event_ids()).scalar()
|
|
|
|
# In the future we may migrate existing states to the new format
|
|
|
|
# but in practice very few of these still exist in production and
|
|
|
|
# removing the index is the likely all that needs to happen.
|
|
|
|
all_gone = not result
|
|
|
|
|
|
|
|
if all_gone:
|
|
|
|
# Only drop the index if there are no more event_ids in the states table
|
|
|
|
# ex all NULL
|
2023-03-19 16:04:24 -10:00
|
|
|
assert instance.engine is not None, "engine should never be None"
|
|
|
|
if instance.dialect_name != SupportedDialect.SQLITE:
|
|
|
|
# SQLite does not support dropping foreign key constraints
|
|
|
|
# so we can't drop the index at this time but we can avoid
|
|
|
|
# looking for legacy rows during purge
|
|
|
|
_drop_foreign_key_constraints(
|
|
|
|
session_maker, instance.engine, TABLE_STATES, ["event_id"]
|
|
|
|
)
|
|
|
|
_drop_index(session_maker, "states", LEGACY_STATES_EVENT_ID_INDEX)
|
|
|
|
instance.use_legacy_events_index = False
|
2023-03-17 14:27:33 -10:00
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
2022-10-13 13:01:27 +02:00
|
|
|
def _initialize_database(session: Session) -> bool:
|
2023-01-16 19:51:11 +01:00
|
|
|
"""Initialize a new database.
|
2022-10-13 13:01:27 +02:00
|
|
|
|
|
|
|
The function determines the schema version by inspecting the db structure.
|
2017-02-26 14:38:06 -08:00
|
|
|
|
2017-09-23 17:15:46 +02:00
|
|
|
When the schema version is not present in the db, either db was just
|
2017-02-26 14:38:06 -08:00
|
|
|
created with the correct schema, or this is a db created before schema
|
|
|
|
versions were tracked. For now, we'll test if the changes for schema
|
|
|
|
version 1 are present to make the determination. Eventually this logic
|
|
|
|
can be removed and we can assume a new db is being created.
|
|
|
|
"""
|
2022-04-12 19:41:46 +02:00
|
|
|
inspector = sqlalchemy.inspect(session.connection())
|
2017-02-26 14:38:06 -08:00
|
|
|
indexes = inspector.get_indexes("events")
|
|
|
|
|
|
|
|
for index in indexes:
|
2023-01-02 13:26:08 -10:00
|
|
|
if index["column_names"] in (["time_fired"], ["time_fired_ts"]):
|
2017-02-26 14:38:06 -08:00
|
|
|
# Schema addition from version 1 detected. New DB.
|
2021-09-16 10:57:15 +02:00
|
|
|
session.add(StatisticsRuns(start=get_start_time()))
|
2019-07-31 12:25:30 -07:00
|
|
|
session.add(SchemaChanges(schema_version=SCHEMA_VERSION))
|
2022-10-13 13:01:27 +02:00
|
|
|
return True
|
2017-02-26 14:38:06 -08:00
|
|
|
|
|
|
|
# Version 1 schema changes not found, this db needs to be migrated.
|
|
|
|
current_version = SchemaChanges(schema_version=0)
|
|
|
|
session.add(current_version)
|
2022-10-13 13:01:27 +02:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
def initialize_database(session_maker: Callable[[], Session]) -> bool:
|
2023-01-16 19:51:11 +01:00
|
|
|
"""Initialize a new database."""
|
2022-10-13 13:01:27 +02:00
|
|
|
try:
|
|
|
|
with session_scope(session=session_maker()) as session:
|
|
|
|
if _get_schema_version(session) is not None:
|
|
|
|
return True
|
|
|
|
return _initialize_database(session)
|
|
|
|
|
|
|
|
except Exception as err: # pylint: disable=broad-except
|
|
|
|
_LOGGER.exception("Error when initialise database: %s", err)
|
|
|
|
return False
|