Unexport unavailable metrics in Prometheus (#125492)

This commit is contained in:
Adam Goode 2024-10-29 12:56:54 -04:00 committed by GitHub
parent 45fb21e32d
commit f12ba5f7a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 72 additions and 21 deletions

View file

@ -76,6 +76,8 @@ from homeassistant.util.unit_conversion import TemperatureConverter
_LOGGER = logging.getLogger(__name__)
API_ENDPOINT = "/api/prometheus"
IGNORED_STATES = frozenset({STATE_UNAVAILABLE, STATE_UNKNOWN})
DOMAIN = "prometheus"
CONF_FILTER = "filter"
@ -211,14 +213,6 @@ class PrometheusMetrics:
"""Add/update a state in Prometheus."""
entity_id = state.entity_id
_LOGGER.debug("Handling state update for %s", entity_id)
domain, _ = hacore.split_entity_id(entity_id)
ignored_states = (STATE_UNAVAILABLE, STATE_UNKNOWN)
handler = f"_handle_{domain}"
if hasattr(self, handler) and state.state not in ignored_states:
getattr(self, handler)(state)
labels = self._labels(state)
state_change = self._metric(
@ -231,7 +225,7 @@ class PrometheusMetrics:
prometheus_client.Gauge,
"Entity is available (not in the unavailable or unknown state)",
)
entity_available.labels(**labels).set(float(state.state not in ignored_states))
entity_available.labels(**labels).set(float(state.state not in IGNORED_STATES))
last_updated_time_seconds = self._metric(
"last_updated_time_seconds",
@ -240,6 +234,18 @@ class PrometheusMetrics:
)
last_updated_time_seconds.labels(**labels).set(state.last_updated.timestamp())
if state.state in IGNORED_STATES:
self._remove_labelsets(
entity_id,
None,
{state_change, entity_available, last_updated_time_seconds},
)
else:
domain, _ = hacore.split_entity_id(entity_id)
handler = f"_handle_{domain}"
if hasattr(self, handler) and state.state:
getattr(self, handler)(state)
def handle_entity_registry_updated(
self, event: Event[EventEntityRegistryUpdatedData]
) -> None:
@ -266,10 +272,17 @@ class PrometheusMetrics:
self._remove_labelsets(metrics_entity_id)
def _remove_labelsets(
self, entity_id: str, friendly_name: str | None = None
self,
entity_id: str,
friendly_name: str | None = None,
ignored_metrics: set[MetricWrapperBase] | None = None,
) -> None:
"""Remove labelsets matching the given entity id from all metrics."""
"""Remove labelsets matching the given entity id from all non-ignored metrics."""
if ignored_metrics is None:
ignored_metrics = set()
for metric in list(self._metrics.values()):
if metric in ignored_metrics:
continue
for sample in cast(list[prometheus_client.Metric], metric.collect())[
0
].samples:
@ -663,7 +676,7 @@ class PrometheusMetrics:
def _sensor_override_component_metric(
self, state: State, unit: str | None
) -> str | None:
"""Get metric from override in component confioguration."""
"""Get metric from override in component configuration."""
return self._component_config.get(state.entity_id).get(CONF_OVERRIDE_METRIC)
@staticmethod

View file

@ -74,6 +74,7 @@ from homeassistant.const import (
STATE_OPEN,
STATE_OPENING,
STATE_UNAVAILABLE,
STATE_UNKNOWN,
UnitOfEnergy,
UnitOfTemperature,
)
@ -1666,13 +1667,15 @@ async def test_disabling_entity(
@pytest.mark.parametrize("namespace", [""])
async def test_entity_becomes_unavailable_with_export(
@pytest.mark.parametrize("unavailable_state", [STATE_UNAVAILABLE, STATE_UNKNOWN])
async def test_entity_becomes_unavailable(
hass: HomeAssistant,
entity_registry: er.EntityRegistry,
client: ClientSessionGenerator,
sensor_entities: dict[str, er.RegistryEntry],
unavailable_state: str,
) -> None:
"""Test an entity that becomes unavailable is still exported."""
"""Test an entity that becomes unavailable/unknown is no longer exported."""
data = {**sensor_entities}
await hass.async_block_till_done()
@ -1699,6 +1702,20 @@ async def test_entity_becomes_unavailable_with_export(
entity="sensor.outside_temperature",
).withValue(1).assert_in_metrics(body)
EntityMetric(
metric_name="last_updated_time_seconds",
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).assert_in_metrics(body)
EntityMetric(
metric_name="battery_level_percent",
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).withValue(12.0).assert_in_metrics(body)
EntityMetric(
metric_name="sensor_humidity_percent",
domain="sensor",
@ -1720,21 +1737,28 @@ async def test_entity_becomes_unavailable_with_export(
entity="sensor.outside_humidity",
).withValue(1).assert_in_metrics(body)
# Make sensor_1 unavailable.
# Make sensor_1 unavailable/unknown.
set_state_with_entry(
hass, data["sensor_1"], STATE_UNAVAILABLE, data["sensor_1_attributes"]
hass, data["sensor_1"], unavailable_state, data["sensor_1_attributes"]
)
await hass.async_block_till_done()
body = await generate_latest_metrics(client)
# Check that only the availability changed on sensor_1.
# Check that the availability changed on sensor_1 and the metric with the value is gone.
EntityMetric(
metric_name="sensor_temperature_celsius",
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).withValue(15.6).assert_in_metrics(body)
).assert_not_in_metrics(body)
EntityMetric(
metric_name="battery_level_percent",
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).assert_not_in_metrics(body)
EntityMetric(
metric_name="state_change_total",
@ -1750,6 +1774,13 @@ async def test_entity_becomes_unavailable_with_export(
entity="sensor.outside_temperature",
).withValue(0.0).assert_in_metrics(body)
EntityMetric(
metric_name="last_updated_time_seconds",
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).assert_in_metrics(body)
# The other sensor should be unchanged.
EntityMetric(
metric_name="sensor_humidity_percent",
@ -1772,8 +1803,8 @@ async def test_entity_becomes_unavailable_with_export(
entity="sensor.outside_humidity",
).withValue(1).assert_in_metrics(body)
# Bring sensor_1 back and check that it is correct.
set_state_with_entry(hass, data["sensor_1"], 200.0, data["sensor_1_attributes"])
# Bring sensor_1 back and check that it returned.
set_state_with_entry(hass, data["sensor_1"], 201.0, data["sensor_1_attributes"])
await hass.async_block_till_done()
body = await generate_latest_metrics(client)
@ -1783,7 +1814,14 @@ async def test_entity_becomes_unavailable_with_export(
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).withValue(200.0).assert_in_metrics(body)
).withValue(201.0).assert_in_metrics(body)
EntityMetric(
metric_name="battery_level_percent",
domain="sensor",
friendly_name="Outside Temperature",
entity="sensor.outside_temperature",
).withValue(12.0).assert_in_metrics(body)
EntityMetric(
metric_name="state_change_total",