From 53d7e33607582e3a30d90e1943a8f02d23382235 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 7 Apr 2023 16:32:36 -1000 Subject: [PATCH] Raise an issue for legacy SQL queries that will cause full table scans (#90971) * Raise an issue for SQL queries that will cause full table scans * Raise an issue for SQL queries that will cause full table scans * Raise an issue for SQL queries that will cause full table scans * Raise an issue for SQL queries that will cause full table scans * Update homeassistant/components/sql/sensor.py Co-authored-by: Paulus Schoutsen * coverage --------- Co-authored-by: Paulus Schoutsen --- homeassistant/components/sql/sensor.py | 39 ++++++++++++++- homeassistant/components/sql/strings.json | 6 +++ tests/components/sql/__init__.py | 18 +++++++ tests/components/sql/test_sensor.py | 58 +++++++++++++++++++++-- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/homeassistant/components/sql/sensor.py b/homeassistant/components/sql/sensor.py index c19c2c258bc..8408b98730b 100644 --- a/homeassistant/components/sql/sensor.py +++ b/homeassistant/components/sql/sensor.py @@ -30,6 +30,7 @@ from homeassistant.const import ( ) from homeassistant.core import HomeAssistant from homeassistant.exceptions import TemplateError +from homeassistant.helpers import issue_registry as ir from homeassistant.helpers.device_registry import DeviceEntryType from homeassistant.helpers.entity import DeviceInfo from homeassistant.helpers.entity_platform import AddEntitiesCallback @@ -153,10 +154,44 @@ async def async_setup_sensor( ): return + upper_query = query_str.upper() + if use_database_executor: + redacted_query = redact_credentials(query_str) + + issue_key = unique_id if unique_id else redacted_query + # If the query has a unique id and they fix it we can dismiss the issue + # but if it doesn't have a unique id they have to ignore it instead + + if "ENTITY_ID" in upper_query and "STATES_META" not in upper_query: + _LOGGER.error( + "The query `%s` contains the keyword `entity_id` but does not " + "reference the `states_meta` table. This will cause a full table " + "scan and database instability. Please check the documentation and use " + "`states_meta.entity_id` instead", + redacted_query, + ) + + ir.async_create_issue( + hass, + DOMAIN, + f"entity_id_query_does_full_table_scan_{issue_key}", + translation_key="entity_id_query_does_full_table_scan", + translation_placeholders={"query": redacted_query}, + is_fixable=False, + severity=ir.IssueSeverity.ERROR, + ) + raise ValueError( + "Query contains entity_id but does not reference states_meta" + ) + + ir.async_delete_issue( + hass, DOMAIN, f"entity_id_query_does_full_table_scan_{issue_key}" + ) + # MSSQL uses TOP and not LIMIT - if not ("LIMIT" in query_str.upper() or "SELECT TOP" in query_str.upper()): + if not ("LIMIT" in upper_query or "SELECT TOP" in upper_query): if "mssql" in db_url: - query_str = query_str.upper().replace("SELECT", "SELECT TOP 1") + query_str = upper_query.replace("SELECT", "SELECT TOP 1") else: query_str = query_str.replace(";", "") + " LIMIT 1;" diff --git a/homeassistant/components/sql/strings.json b/homeassistant/components/sql/strings.json index 2a300f75b3e..1e7aef4ffde 100644 --- a/homeassistant/components/sql/strings.json +++ b/homeassistant/components/sql/strings.json @@ -53,5 +53,11 @@ "db_url_invalid": "[%key:component::sql::config::error::db_url_invalid%]", "query_invalid": "[%key:component::sql::config::error::query_invalid%]" } + }, + "issues": { + "entity_id_query_does_full_table_scan": { + "title": "SQL query does full table scan", + "description": "The query `{query}` contains the keyword `entity_id` but does not reference the `states_meta` table. This will cause a full table scan and database instability. Please check the documentation and use `states_meta.entity_id` instead." + } } } diff --git a/tests/components/sql/__init__.py b/tests/components/sql/__init__.py index c794f7a6b9a..c976f87f50f 100644 --- a/tests/components/sql/__init__.py +++ b/tests/components/sql/__init__.py @@ -63,6 +63,24 @@ YAML_CONFIG = { } } +YAML_CONFIG_FULL_TABLE_SCAN = { + "sql": { + CONF_NAME: "Get entity_id", + CONF_QUERY: "SELECT entity_id from states", + CONF_COLUMN_NAME: "entity_id", + CONF_UNIQUE_ID: "entity_id_12345", + } +} + + +YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID = { + "sql": { + CONF_NAME: "Get entity_id", + CONF_QUERY: "SELECT entity_id from states", + CONF_COLUMN_NAME: "entity_id", + } +} + YAML_CONFIG_BINARY = { "sql": { CONF_DB_URL: "sqlite://", diff --git a/tests/components/sql/test_sensor.py b/tests/components/sql/test_sensor.py index 426dd9e196f..811bb3f45bf 100644 --- a/tests/components/sql/test_sensor.py +++ b/tests/components/sql/test_sensor.py @@ -11,14 +11,21 @@ from sqlalchemy.exc import SQLAlchemyError from homeassistant.components.recorder import Recorder from homeassistant.components.sensor import SensorDeviceClass, SensorStateClass -from homeassistant.components.sql.const import DOMAIN +from homeassistant.components.sql.const import CONF_QUERY, DOMAIN from homeassistant.config_entries import SOURCE_USER -from homeassistant.const import STATE_UNKNOWN +from homeassistant.const import CONF_UNIQUE_ID, STATE_UNKNOWN from homeassistant.core import HomeAssistant +from homeassistant.helpers import issue_registry as ir from homeassistant.setup import async_setup_component from homeassistant.util import dt -from . import YAML_CONFIG, YAML_CONFIG_BINARY, init_integration +from . import ( + YAML_CONFIG, + YAML_CONFIG_BINARY, + YAML_CONFIG_FULL_TABLE_SCAN, + YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID, + init_integration, +) from tests.common import MockConfigEntry, async_fire_time_changed @@ -322,3 +329,48 @@ async def test_binary_data_from_yaml_setup( state = hass.states.get("sensor.get_binary_value") assert state.state == "0xd34324324230392032" assert state.attributes["test_attr"] == "0xd343aa" + + +async def test_issue_when_using_old_query( + recorder_mock: Recorder, hass: HomeAssistant, caplog: pytest.LogCaptureFixture +) -> None: + """Test we create an issue for an old query that will do a full table scan.""" + + assert await async_setup_component(hass, DOMAIN, YAML_CONFIG_FULL_TABLE_SCAN) + await hass.async_block_till_done() + assert "Query contains entity_id but does not reference states_meta" in caplog.text + + assert not hass.states.async_all() + issue_registry = ir.async_get(hass) + + config = YAML_CONFIG_FULL_TABLE_SCAN["sql"] + + unique_id = config[CONF_UNIQUE_ID] + + issue = issue_registry.async_get_issue( + DOMAIN, f"entity_id_query_does_full_table_scan_{unique_id}" + ) + assert issue.translation_placeholders == {"query": config[CONF_QUERY]} + + +async def test_issue_when_using_old_query_without_unique_id( + recorder_mock: Recorder, hass: HomeAssistant, caplog: pytest.LogCaptureFixture +) -> None: + """Test we create an issue for an old query that will do a full table scan.""" + + assert await async_setup_component( + hass, DOMAIN, YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID + ) + await hass.async_block_till_done() + assert "Query contains entity_id but does not reference states_meta" in caplog.text + + assert not hass.states.async_all() + issue_registry = ir.async_get(hass) + + config = YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID["sql"] + query = config[CONF_QUERY] + + issue = issue_registry.async_get_issue( + DOMAIN, f"entity_id_query_does_full_table_scan_{query}" + ) + assert issue.translation_placeholders == {"query": query}