From ec131d685e3f5e2054e58e3ba55110e7d80f40ce Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 4 Apr 2022 21:39:12 -1000 Subject: [PATCH] Auto repack the database on the second sunday of the month (#69314) --- homeassistant/components/recorder/__init__.py | 10 +- homeassistant/components/recorder/util.py | 22 +++- tests/components/recorder/test_init.py | 117 ++++++++++++++++++ tests/components/recorder/test_util.py | 19 ++- 4 files changed, 164 insertions(+), 4 deletions(-) diff --git a/homeassistant/components/recorder/__init__.py b/homeassistant/components/recorder/__init__.py index 0381e5a4671..db645b3a195 100644 --- a/homeassistant/components/recorder/__init__.py +++ b/homeassistant/components/recorder/__init__.py @@ -84,6 +84,7 @@ from .pool import POOL_SIZE, RecorderPool from .util import ( dburl_to_path, end_incomplete_runs, + is_second_sunday, move_away_broken_database, perodic_db_cleanups, session_scope, @@ -156,6 +157,7 @@ DB_LOCK_TIMEOUT = 30 DB_LOCK_QUEUE_CHECK_TIMEOUT = 1 CONF_AUTO_PURGE = "auto_purge" +CONF_AUTO_REPACK = "auto_repack" CONF_DB_URL = "db_url" CONF_DB_MAX_RETRIES = "db_max_retries" CONF_DB_RETRY_WAIT = "db_retry_wait" @@ -183,6 +185,7 @@ CONFIG_SCHEMA = vol.Schema( FILTER_SCHEMA.extend( { vol.Optional(CONF_AUTO_PURGE, default=True): cv.boolean, + vol.Optional(CONF_AUTO_REPACK, default=True): cv.boolean, vol.Optional(CONF_PURGE_KEEP_DAYS, default=10): vol.All( vol.Coerce(int), vol.Range(min=1) ), @@ -283,6 +286,7 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool: conf = config[DOMAIN] entity_filter = convert_include_exclude_filter(conf) auto_purge = conf[CONF_AUTO_PURGE] + auto_repack = conf[CONF_AUTO_REPACK] keep_days = conf[CONF_PURGE_KEEP_DAYS] commit_interval = conf[CONF_COMMIT_INTERVAL] db_max_retries = conf[CONF_DB_MAX_RETRIES] @@ -300,6 +304,7 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool: instance = hass.data[DATA_INSTANCE] = Recorder( hass=hass, auto_purge=auto_purge, + auto_repack=auto_repack, keep_days=keep_days, commit_interval=commit_interval, uri=db_url, @@ -570,6 +575,7 @@ class Recorder(threading.Thread): self, hass: HomeAssistant, auto_purge: bool, + auto_repack: bool, keep_days: int, commit_interval: int, uri: str, @@ -584,6 +590,7 @@ class Recorder(threading.Thread): self.hass = hass self.auto_purge = auto_purge + self.auto_repack = auto_repack self.keep_days = keep_days self._hass_started: asyncio.Future[object] = asyncio.Future() self.commit_interval = commit_interval @@ -808,8 +815,9 @@ class Recorder(threading.Thread): # Purge will schedule the perodic cleanups # after it completes to ensure it does not happen # until after the database is vacuumed + repack = self.auto_repack and is_second_sunday(now) purge_before = dt_util.utcnow() - timedelta(days=self.keep_days) - self.queue.put(PurgeTask(purge_before, repack=False, apply_filter=False)) + self.queue.put(PurgeTask(purge_before, repack=repack, apply_filter=False)) else: self.queue.put(PerodicCleanupTask()) diff --git a/homeassistant/components/recorder/util.py b/homeassistant/components/recorder/util.py index 487b8dd22f7..ff862e6a6b0 100644 --- a/homeassistant/components/recorder/util.py +++ b/homeassistant/components/recorder/util.py @@ -3,7 +3,7 @@ from __future__ import annotations from collections.abc import Callable, Generator from contextlib import contextmanager -from datetime import datetime, timedelta +from datetime import date, datetime, timedelta import functools import logging import os @@ -65,6 +65,10 @@ RETRYABLE_MYSQL_ERRORS = (1205, 1206, 1213) # 1206: The total number of locks exceeds the lock table size # 1213: Deadlock found when trying to get lock; try restarting transaction +FIRST_POSSIBLE_SUNDAY = 8 +SUNDAY_WEEKDAY = 6 +DAYS_IN_WEEK = 7 + @contextmanager def session_scope( @@ -501,3 +505,19 @@ def async_migration_in_progress(hass: HomeAssistant) -> bool: return False instance: Recorder = hass.data[DATA_INSTANCE] return instance.migration_in_progress + + +def second_sunday(year: int, month: int) -> date: + """Return the datetime.date for the second sunday of a month.""" + second = date(year, month, FIRST_POSSIBLE_SUNDAY) + day_of_week = second.weekday() + if day_of_week == SUNDAY_WEEKDAY: + return second + return second.replace( + day=(FIRST_POSSIBLE_SUNDAY + (SUNDAY_WEEKDAY - day_of_week) % DAYS_IN_WEEK) + ) + + +def is_second_sunday(date_time: datetime) -> bool: + """Check if a time is the second sunday of the month.""" + return bool(second_sunday(date_time.year, date_time.month).day == date_time.day) diff --git a/tests/components/recorder/test_init.py b/tests/components/recorder/test_init.py index fda49f02aab..b18e663fe6d 100644 --- a/tests/components/recorder/test_init.py +++ b/tests/components/recorder/test_init.py @@ -12,6 +12,7 @@ from sqlalchemy.exc import DatabaseError, OperationalError, SQLAlchemyError from homeassistant.components import recorder from homeassistant.components.recorder import ( CONF_AUTO_PURGE, + CONF_AUTO_REPACK, CONF_DB_URL, CONFIG_SCHEMA, DOMAIN, @@ -70,6 +71,7 @@ def _default_recorder(hass): return Recorder( hass, auto_purge=True, + auto_repack=True, keep_days=7, commit_interval=1, uri="sqlite://", @@ -627,6 +629,7 @@ async def test_defaults_set(hass): assert recorder_config is not None # pylint: disable=unsubscriptable-object assert recorder_config["auto_purge"] + assert recorder_config["auto_repack"] assert recorder_config["purge_keep_days"] == 10 @@ -695,6 +698,120 @@ def test_auto_purge(hass_recorder): dt_util.set_default_time_zone(original_tz) +@pytest.mark.parametrize("enable_nightly_purge", [True]) +def test_auto_purge_auto_repack_on_second_sunday(hass_recorder): + """Test periodic purge scheduling does a repack on the 2nd sunday.""" + hass = hass_recorder() + + original_tz = dt_util.DEFAULT_TIME_ZONE + + tz = dt_util.get_time_zone("Europe/Copenhagen") + dt_util.set_default_time_zone(tz) + + # Purging is scheduled to happen at 4:12am every day. Exercise this behavior by + # firing time changed events and advancing the clock around this time. Pick an + # arbitrary year in the future to avoid boundary conditions relative to the current + # date. + # + # The clock is started at 4:15am then advanced forward below + now = dt_util.utcnow() + test_time = datetime(now.year + 2, 1, 1, 4, 15, 0, tzinfo=tz) + run_tasks_at_time(hass, test_time) + + with patch( + "homeassistant.components.recorder.is_second_sunday", return_value=True + ), patch( + "homeassistant.components.recorder.purge.purge_old_data", return_value=True + ) as purge_old_data, patch( + "homeassistant.components.recorder.perodic_db_cleanups" + ) as perodic_db_cleanups: + # Advance one day, and the purge task should run + test_time = test_time + timedelta(days=1) + run_tasks_at_time(hass, test_time) + assert len(purge_old_data.mock_calls) == 1 + args, _ = purge_old_data.call_args_list[0] + assert args[2] is True # repack + assert len(perodic_db_cleanups.mock_calls) == 1 + + dt_util.set_default_time_zone(original_tz) + + +@pytest.mark.parametrize("enable_nightly_purge", [True]) +def test_auto_purge_auto_repack_disabled_on_second_sunday(hass_recorder): + """Test periodic purge scheduling does not auto repack on the 2nd sunday if disabled.""" + hass = hass_recorder({CONF_AUTO_REPACK: False}) + + original_tz = dt_util.DEFAULT_TIME_ZONE + + tz = dt_util.get_time_zone("Europe/Copenhagen") + dt_util.set_default_time_zone(tz) + + # Purging is scheduled to happen at 4:12am every day. Exercise this behavior by + # firing time changed events and advancing the clock around this time. Pick an + # arbitrary year in the future to avoid boundary conditions relative to the current + # date. + # + # The clock is started at 4:15am then advanced forward below + now = dt_util.utcnow() + test_time = datetime(now.year + 2, 1, 1, 4, 15, 0, tzinfo=tz) + run_tasks_at_time(hass, test_time) + + with patch( + "homeassistant.components.recorder.is_second_sunday", return_value=True + ), patch( + "homeassistant.components.recorder.purge.purge_old_data", return_value=True + ) as purge_old_data, patch( + "homeassistant.components.recorder.perodic_db_cleanups" + ) as perodic_db_cleanups: + # Advance one day, and the purge task should run + test_time = test_time + timedelta(days=1) + run_tasks_at_time(hass, test_time) + assert len(purge_old_data.mock_calls) == 1 + args, _ = purge_old_data.call_args_list[0] + assert args[2] is False # repack + assert len(perodic_db_cleanups.mock_calls) == 1 + + dt_util.set_default_time_zone(original_tz) + + +@pytest.mark.parametrize("enable_nightly_purge", [True]) +def test_auto_purge_no_auto_repack_on_not_second_sunday(hass_recorder): + """Test periodic purge scheduling does not do a repack unless its the 2nd sunday.""" + hass = hass_recorder() + + original_tz = dt_util.DEFAULT_TIME_ZONE + + tz = dt_util.get_time_zone("Europe/Copenhagen") + dt_util.set_default_time_zone(tz) + + # Purging is scheduled to happen at 4:12am every day. Exercise this behavior by + # firing time changed events and advancing the clock around this time. Pick an + # arbitrary year in the future to avoid boundary conditions relative to the current + # date. + # + # The clock is started at 4:15am then advanced forward below + now = dt_util.utcnow() + test_time = datetime(now.year + 2, 1, 1, 4, 15, 0, tzinfo=tz) + run_tasks_at_time(hass, test_time) + + with patch( + "homeassistant.components.recorder.is_second_sunday", return_value=False + ), patch( + "homeassistant.components.recorder.purge.purge_old_data", return_value=True + ) as purge_old_data, patch( + "homeassistant.components.recorder.perodic_db_cleanups" + ) as perodic_db_cleanups: + # Advance one day, and the purge task should run + test_time = test_time + timedelta(days=1) + run_tasks_at_time(hass, test_time) + assert len(purge_old_data.mock_calls) == 1 + args, _ = purge_old_data.call_args_list[0] + assert args[2] is False # repack + assert len(perodic_db_cleanups.mock_calls) == 1 + + dt_util.set_default_time_zone(original_tz) + + @pytest.mark.parametrize("enable_nightly_purge", [True]) def test_auto_purge_disabled(hass_recorder): """Test periodic db cleanup still run when auto purge is disabled.""" diff --git a/tests/components/recorder/test_util.py b/tests/components/recorder/test_util.py index fe38aa2ab4f..fa0a8fb3c49 100644 --- a/tests/components/recorder/test_util.py +++ b/tests/components/recorder/test_util.py @@ -1,5 +1,5 @@ """Test util methods.""" -from datetime import timedelta +from datetime import datetime, timedelta import os import sqlite3 from unittest.mock import MagicMock, patch @@ -12,7 +12,11 @@ from homeassistant.components import recorder from homeassistant.components.recorder import run_information_with_session, util from homeassistant.components.recorder.const import DATA_INSTANCE, SQLITE_URL_PREFIX from homeassistant.components.recorder.models import RecorderRuns -from homeassistant.components.recorder.util import end_incomplete_runs, session_scope +from homeassistant.components.recorder.util import ( + end_incomplete_runs, + is_second_sunday, + session_scope, +) from homeassistant.util import dt as dt_util from .common import corrupt_db_file @@ -584,3 +588,14 @@ async def test_write_lock_db(hass, tmp_path): # would be allowed to proceed as the goal is to prevent # all the other threads from accessing the database await hass.async_add_executor_job(_drop_table) + + +def test_is_second_sunday(): + """Test we can find the second sunday of the month.""" + assert is_second_sunday(datetime(2022, 1, 9, 0, 0, 0, tzinfo=dt_util.UTC)) is True + assert is_second_sunday(datetime(2022, 2, 13, 0, 0, 0, tzinfo=dt_util.UTC)) is True + assert is_second_sunday(datetime(2022, 3, 13, 0, 0, 0, tzinfo=dt_util.UTC)) is True + assert is_second_sunday(datetime(2022, 4, 10, 0, 0, 0, tzinfo=dt_util.UTC)) is True + assert is_second_sunday(datetime(2022, 5, 8, 0, 0, 0, tzinfo=dt_util.UTC)) is True + + assert is_second_sunday(datetime(2022, 1, 10, 0, 0, 0, tzinfo=dt_util.UTC)) is False