Raise an issue for legacy SQL queries that will cause full table scans (#90971)

* Raise an issue for SQL queries that will cause full table scans

* Raise an issue for SQL queries that will cause full table scans

* Raise an issue for SQL queries that will cause full table scans

* Raise an issue for SQL queries that will cause full table scans

* Update homeassistant/components/sql/sensor.py

Co-authored-by: Paulus Schoutsen <balloob@gmail.com>

* coverage

---------

Co-authored-by: Paulus Schoutsen <balloob@gmail.com>
This commit is contained in:
J. Nick Koston 2023-04-07 16:32:36 -10:00 committed by GitHub
parent dbfd0d50ba
commit 53d7e33607
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 116 additions and 5 deletions

View file

@ -30,6 +30,7 @@ from homeassistant.const import (
)
from homeassistant.core import HomeAssistant
from homeassistant.exceptions import TemplateError
from homeassistant.helpers import issue_registry as ir
from homeassistant.helpers.device_registry import DeviceEntryType
from homeassistant.helpers.entity import DeviceInfo
from homeassistant.helpers.entity_platform import AddEntitiesCallback
@ -153,10 +154,44 @@ async def async_setup_sensor(
):
return
upper_query = query_str.upper()
if use_database_executor:
redacted_query = redact_credentials(query_str)
issue_key = unique_id if unique_id else redacted_query
# If the query has a unique id and they fix it we can dismiss the issue
# but if it doesn't have a unique id they have to ignore it instead
if "ENTITY_ID" in upper_query and "STATES_META" not in upper_query:
_LOGGER.error(
"The query `%s` contains the keyword `entity_id` but does not "
"reference the `states_meta` table. This will cause a full table "
"scan and database instability. Please check the documentation and use "
"`states_meta.entity_id` instead",
redacted_query,
)
ir.async_create_issue(
hass,
DOMAIN,
f"entity_id_query_does_full_table_scan_{issue_key}",
translation_key="entity_id_query_does_full_table_scan",
translation_placeholders={"query": redacted_query},
is_fixable=False,
severity=ir.IssueSeverity.ERROR,
)
raise ValueError(
"Query contains entity_id but does not reference states_meta"
)
ir.async_delete_issue(
hass, DOMAIN, f"entity_id_query_does_full_table_scan_{issue_key}"
)
# MSSQL uses TOP and not LIMIT
if not ("LIMIT" in query_str.upper() or "SELECT TOP" in query_str.upper()):
if not ("LIMIT" in upper_query or "SELECT TOP" in upper_query):
if "mssql" in db_url:
query_str = query_str.upper().replace("SELECT", "SELECT TOP 1")
query_str = upper_query.replace("SELECT", "SELECT TOP 1")
else:
query_str = query_str.replace(";", "") + " LIMIT 1;"

View file

@ -53,5 +53,11 @@
"db_url_invalid": "[%key:component::sql::config::error::db_url_invalid%]",
"query_invalid": "[%key:component::sql::config::error::query_invalid%]"
}
},
"issues": {
"entity_id_query_does_full_table_scan": {
"title": "SQL query does full table scan",
"description": "The query `{query}` contains the keyword `entity_id` but does not reference the `states_meta` table. This will cause a full table scan and database instability. Please check the documentation and use `states_meta.entity_id` instead."
}
}
}

View file

@ -63,6 +63,24 @@ YAML_CONFIG = {
}
}
YAML_CONFIG_FULL_TABLE_SCAN = {
"sql": {
CONF_NAME: "Get entity_id",
CONF_QUERY: "SELECT entity_id from states",
CONF_COLUMN_NAME: "entity_id",
CONF_UNIQUE_ID: "entity_id_12345",
}
}
YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID = {
"sql": {
CONF_NAME: "Get entity_id",
CONF_QUERY: "SELECT entity_id from states",
CONF_COLUMN_NAME: "entity_id",
}
}
YAML_CONFIG_BINARY = {
"sql": {
CONF_DB_URL: "sqlite://",

View file

@ -11,14 +11,21 @@ from sqlalchemy.exc import SQLAlchemyError
from homeassistant.components.recorder import Recorder
from homeassistant.components.sensor import SensorDeviceClass, SensorStateClass
from homeassistant.components.sql.const import DOMAIN
from homeassistant.components.sql.const import CONF_QUERY, DOMAIN
from homeassistant.config_entries import SOURCE_USER
from homeassistant.const import STATE_UNKNOWN
from homeassistant.const import CONF_UNIQUE_ID, STATE_UNKNOWN
from homeassistant.core import HomeAssistant
from homeassistant.helpers import issue_registry as ir
from homeassistant.setup import async_setup_component
from homeassistant.util import dt
from . import YAML_CONFIG, YAML_CONFIG_BINARY, init_integration
from . import (
YAML_CONFIG,
YAML_CONFIG_BINARY,
YAML_CONFIG_FULL_TABLE_SCAN,
YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID,
init_integration,
)
from tests.common import MockConfigEntry, async_fire_time_changed
@ -322,3 +329,48 @@ async def test_binary_data_from_yaml_setup(
state = hass.states.get("sensor.get_binary_value")
assert state.state == "0xd34324324230392032"
assert state.attributes["test_attr"] == "0xd343aa"
async def test_issue_when_using_old_query(
recorder_mock: Recorder, hass: HomeAssistant, caplog: pytest.LogCaptureFixture
) -> None:
"""Test we create an issue for an old query that will do a full table scan."""
assert await async_setup_component(hass, DOMAIN, YAML_CONFIG_FULL_TABLE_SCAN)
await hass.async_block_till_done()
assert "Query contains entity_id but does not reference states_meta" in caplog.text
assert not hass.states.async_all()
issue_registry = ir.async_get(hass)
config = YAML_CONFIG_FULL_TABLE_SCAN["sql"]
unique_id = config[CONF_UNIQUE_ID]
issue = issue_registry.async_get_issue(
DOMAIN, f"entity_id_query_does_full_table_scan_{unique_id}"
)
assert issue.translation_placeholders == {"query": config[CONF_QUERY]}
async def test_issue_when_using_old_query_without_unique_id(
recorder_mock: Recorder, hass: HomeAssistant, caplog: pytest.LogCaptureFixture
) -> None:
"""Test we create an issue for an old query that will do a full table scan."""
assert await async_setup_component(
hass, DOMAIN, YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID
)
await hass.async_block_till_done()
assert "Query contains entity_id but does not reference states_meta" in caplog.text
assert not hass.states.async_all()
issue_registry = ir.async_get(hass)
config = YAML_CONFIG_FULL_TABLE_SCAN_NO_UNIQUE_ID["sql"]
query = config[CONF_QUERY]
issue = issue_registry.async_get_issue(
DOMAIN, f"entity_id_query_does_full_table_scan_{query}"
)
assert issue.translation_placeholders == {"query": query}