Add object source logger to profiler (#90650)
* Add object source logger to profiler * fixes * cleanup * tweaks * logging * logging * too intensive * adjust * Update homeassistant/bootstrap.py * fixes * fixes * coverage
This commit is contained in:
parent
2229a63acd
commit
51ff027fce
3 changed files with 314 additions and 30 deletions
|
@ -17,7 +17,7 @@ import voluptuous as vol
|
|||
from homeassistant.components import persistent_notification
|
||||
from homeassistant.config_entries import ConfigEntry
|
||||
from homeassistant.const import CONF_SCAN_INTERVAL, CONF_TYPE
|
||||
from homeassistant.core import HomeAssistant, ServiceCall
|
||||
from homeassistant.core import HomeAssistant, ServiceCall, callback
|
||||
from homeassistant.exceptions import HomeAssistantError
|
||||
import homeassistant.helpers.config_validation as cv
|
||||
from homeassistant.helpers.event import async_track_time_interval
|
||||
|
@ -29,6 +29,8 @@ SERVICE_START = "start"
|
|||
SERVICE_MEMORY = "memory"
|
||||
SERVICE_START_LOG_OBJECTS = "start_log_objects"
|
||||
SERVICE_STOP_LOG_OBJECTS = "stop_log_objects"
|
||||
SERVICE_START_LOG_OBJECT_SOURCES = "start_log_object_sources"
|
||||
SERVICE_STOP_LOG_OBJECT_SOURCES = "stop_log_object_sources"
|
||||
SERVICE_DUMP_LOG_OBJECTS = "dump_log_objects"
|
||||
SERVICE_LRU_STATS = "lru_stats"
|
||||
SERVICE_LOG_THREAD_FRAMES = "log_thread_frames"
|
||||
|
@ -60,7 +62,10 @@ SERVICES = (
|
|||
|
||||
DEFAULT_SCAN_INTERVAL = timedelta(seconds=30)
|
||||
|
||||
DEFAULT_MAX_OBJECTS = 5
|
||||
|
||||
CONF_SECONDS = "seconds"
|
||||
CONF_MAX_OBJECTS = "max_objects"
|
||||
|
||||
LOG_INTERVAL_SUB = "log_interval_subscription"
|
||||
|
||||
|
@ -85,7 +90,7 @@ async def async_setup_entry( # noqa: C901
|
|||
|
||||
async def _async_start_log_objects(call: ServiceCall) -> None:
|
||||
if LOG_INTERVAL_SUB in domain_data:
|
||||
domain_data[LOG_INTERVAL_SUB]()
|
||||
raise HomeAssistantError("Object logging already started")
|
||||
|
||||
persistent_notification.async_create(
|
||||
hass,
|
||||
|
@ -103,21 +108,53 @@ async def async_setup_entry( # noqa: C901
|
|||
|
||||
async def _async_stop_log_objects(call: ServiceCall) -> None:
|
||||
if LOG_INTERVAL_SUB not in domain_data:
|
||||
return
|
||||
raise HomeAssistantError("Object logging not running")
|
||||
|
||||
persistent_notification.async_dismiss(hass, "profile_object_logging")
|
||||
domain_data.pop(LOG_INTERVAL_SUB)()
|
||||
|
||||
def _safe_repr(obj: Any) -> str:
|
||||
"""Get the repr of an object but keep going if there is an exception.
|
||||
async def _async_start_object_sources(call: ServiceCall) -> None:
|
||||
if LOG_INTERVAL_SUB in domain_data:
|
||||
raise HomeAssistantError("Object logging already started")
|
||||
|
||||
We wrap repr to ensure if one object cannot be serialized, we can
|
||||
still get the rest.
|
||||
"""
|
||||
try:
|
||||
return repr(obj)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
return f"Failed to serialize {type(obj)}"
|
||||
persistent_notification.async_create(
|
||||
hass,
|
||||
(
|
||||
"Object source logging has started. See [the logs](/config/logs) to"
|
||||
" track the growth of new objects."
|
||||
),
|
||||
title="Object source logging started",
|
||||
notification_id="profile_object_source_logging",
|
||||
)
|
||||
|
||||
last_ids: set[int] = set()
|
||||
last_stats: dict[str, int] = {}
|
||||
|
||||
async def _log_object_sources_with_max(*_: Any) -> None:
|
||||
await hass.async_add_executor_job(
|
||||
_log_object_sources, call.data[CONF_MAX_OBJECTS], last_ids, last_stats
|
||||
)
|
||||
|
||||
await _log_object_sources_with_max()
|
||||
cancel_track = async_track_time_interval(
|
||||
hass, _log_object_sources_with_max, call.data[CONF_SCAN_INTERVAL]
|
||||
)
|
||||
|
||||
@callback
|
||||
def _cancel():
|
||||
cancel_track()
|
||||
last_ids.clear()
|
||||
last_stats.clear()
|
||||
|
||||
domain_data[LOG_INTERVAL_SUB] = _cancel
|
||||
|
||||
@callback
|
||||
def _async_stop_object_sources(call: ServiceCall) -> None:
|
||||
if LOG_INTERVAL_SUB not in domain_data:
|
||||
raise HomeAssistantError("Object logging not running")
|
||||
|
||||
persistent_notification.async_dismiss(hass, "profile_object_source_logging")
|
||||
domain_data.pop(LOG_INTERVAL_SUB)()
|
||||
|
||||
def _dump_log_objects(call: ServiceCall) -> None:
|
||||
# Imports deferred to avoid loading modules
|
||||
|
@ -143,15 +180,6 @@ async def async_setup_entry( # noqa: C901
|
|||
notification_id="profile_object_dump",
|
||||
)
|
||||
|
||||
def _get_function_absfile(func: Any) -> str:
|
||||
"""Get the absolute file path of a function."""
|
||||
import inspect # pylint: disable=import-outside-toplevel
|
||||
|
||||
abs_file = "unknown"
|
||||
with suppress(Exception):
|
||||
abs_file = inspect.getabsfile(func)
|
||||
return abs_file
|
||||
|
||||
def _lru_stats(call: ServiceCall) -> None:
|
||||
"""Log the stats of all lru caches."""
|
||||
# Imports deferred to avoid loading modules
|
||||
|
@ -164,7 +192,7 @@ async def async_setup_entry( # noqa: C901
|
|||
_LOGGER.critical(
|
||||
"Cache stats for lru_cache %s at %s: %s",
|
||||
lru.__wrapped__,
|
||||
_get_function_absfile(lru.__wrapped__),
|
||||
_get_function_absfile(lru.__wrapped__) or "unknown",
|
||||
lru.cache_info(),
|
||||
)
|
||||
|
||||
|
@ -175,7 +203,7 @@ async def async_setup_entry( # noqa: C901
|
|||
_LOGGER.critical(
|
||||
"Cache stats for LRU %s at %s: %s",
|
||||
type(class_with_lru_attr),
|
||||
_get_function_absfile(class_with_lru_attr),
|
||||
_get_function_absfile(class_with_lru_attr) or "unknown",
|
||||
maybe_lru.get_stats(),
|
||||
)
|
||||
|
||||
|
@ -267,6 +295,30 @@ async def async_setup_entry( # noqa: C901
|
|||
_async_stop_log_objects,
|
||||
)
|
||||
|
||||
async_register_admin_service(
|
||||
hass,
|
||||
DOMAIN,
|
||||
SERVICE_START_LOG_OBJECT_SOURCES,
|
||||
_async_start_object_sources,
|
||||
schema=vol.Schema(
|
||||
{
|
||||
vol.Optional(
|
||||
CONF_SCAN_INTERVAL, default=DEFAULT_SCAN_INTERVAL
|
||||
): cv.time_period,
|
||||
vol.Optional(CONF_MAX_OBJECTS, default=DEFAULT_MAX_OBJECTS): vol.Range(
|
||||
min=1, max=1024
|
||||
),
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
async_register_admin_service(
|
||||
hass,
|
||||
DOMAIN,
|
||||
SERVICE_STOP_LOG_OBJECT_SOURCES,
|
||||
_async_stop_object_sources,
|
||||
)
|
||||
|
||||
async_register_admin_service(
|
||||
hass,
|
||||
DOMAIN,
|
||||
|
@ -404,3 +456,101 @@ def _log_objects(*_):
|
|||
import objgraph # pylint: disable=import-outside-toplevel
|
||||
|
||||
_LOGGER.critical("Memory Growth: %s", objgraph.growth(limit=1000))
|
||||
|
||||
|
||||
def _get_function_absfile(func: Any) -> str | None:
|
||||
"""Get the absolute file path of a function."""
|
||||
import inspect # pylint: disable=import-outside-toplevel
|
||||
|
||||
abs_file: str | None = None
|
||||
with suppress(Exception):
|
||||
abs_file = inspect.getabsfile(func)
|
||||
return abs_file
|
||||
|
||||
|
||||
def _safe_repr(obj: Any) -> str:
|
||||
"""Get the repr of an object but keep going if there is an exception.
|
||||
|
||||
We wrap repr to ensure if one object cannot be serialized, we can
|
||||
still get the rest.
|
||||
"""
|
||||
try:
|
||||
return repr(obj)
|
||||
except Exception: # pylint: disable=broad-except
|
||||
return f"Failed to serialize {type(obj)}"
|
||||
|
||||
|
||||
def _find_backrefs_not_to_self(_object: Any) -> list[str]:
|
||||
import objgraph # pylint: disable=import-outside-toplevel
|
||||
|
||||
return [
|
||||
_safe_repr(backref)
|
||||
for backref in objgraph.find_backref_chain(
|
||||
_object, lambda obj: obj is not _object
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def _log_object_sources(
|
||||
max_objects: int, last_ids: set[int], last_stats: dict[str, int]
|
||||
) -> None:
|
||||
# Imports deferred to avoid loading modules
|
||||
# in memory since usually only one part of this
|
||||
# integration is used at a time
|
||||
import gc # pylint: disable=import-outside-toplevel
|
||||
|
||||
gc.collect()
|
||||
|
||||
objects = gc.get_objects()
|
||||
new_objects: list[object] = []
|
||||
new_objects_overflow: dict[str, int] = {}
|
||||
current_ids = set()
|
||||
new_stats: dict[str, int] = {}
|
||||
had_new_object_growth = False
|
||||
try:
|
||||
for _object in objects:
|
||||
object_type = type(_object).__name__
|
||||
new_stats[object_type] = new_stats.get(object_type, 0) + 1
|
||||
|
||||
for _object in objects:
|
||||
id_ = id(_object)
|
||||
current_ids.add(id_)
|
||||
if id_ in last_ids:
|
||||
continue
|
||||
object_type = type(_object).__name__
|
||||
if last_stats.get(object_type, 0) < new_stats[object_type]:
|
||||
if len(new_objects) < max_objects:
|
||||
new_objects.append(_object)
|
||||
else:
|
||||
new_objects_overflow.setdefault(object_type, 0)
|
||||
new_objects_overflow[object_type] += 1
|
||||
|
||||
for _object in new_objects:
|
||||
had_new_object_growth = True
|
||||
object_type = type(_object).__name__
|
||||
_LOGGER.critical(
|
||||
"New object %s (%s/%s) at %s: %s",
|
||||
object_type,
|
||||
last_stats.get(object_type, 0),
|
||||
new_stats[object_type],
|
||||
_get_function_absfile(_object) or _find_backrefs_not_to_self(_object),
|
||||
_safe_repr(_object),
|
||||
)
|
||||
|
||||
for object_type, count in last_stats.items():
|
||||
new_stats[object_type] = max(new_stats.get(object_type, 0), count)
|
||||
finally:
|
||||
# Break reference cycles
|
||||
del objects
|
||||
del new_objects
|
||||
last_ids.clear()
|
||||
last_ids.update(current_ids)
|
||||
last_stats.clear()
|
||||
last_stats.update(new_stats)
|
||||
del new_stats
|
||||
del current_ids
|
||||
|
||||
if new_objects_overflow:
|
||||
_LOGGER.critical("New objects overflowed by %s", new_objects_overflow)
|
||||
elif not had_new_object_growth:
|
||||
_LOGGER.critical("No new object growth found")
|
||||
|
|
|
@ -25,7 +25,7 @@ memory:
|
|||
max: 3600
|
||||
unit_of_measurement: seconds
|
||||
start_log_objects:
|
||||
name: Start log objects
|
||||
name: Start logging objects
|
||||
description: Start logging growth of objects in memory
|
||||
fields:
|
||||
scan_interval:
|
||||
|
@ -38,7 +38,7 @@ start_log_objects:
|
|||
max: 3600
|
||||
unit_of_measurement: seconds
|
||||
stop_log_objects:
|
||||
name: Stop log objects
|
||||
name: Stop logging objects
|
||||
description: Stop logging growth of objects in memory.
|
||||
dump_log_objects:
|
||||
name: Dump log objects
|
||||
|
@ -51,6 +51,31 @@ dump_log_objects:
|
|||
example: State
|
||||
selector:
|
||||
text:
|
||||
start_log_object_sources:
|
||||
name: Start logging object sources
|
||||
description: Start logging sources of new objects in memory
|
||||
fields:
|
||||
scan_interval:
|
||||
name: Scan interval
|
||||
description: The number of seconds between logging objects.
|
||||
default: 30.0
|
||||
selector:
|
||||
number:
|
||||
min: 1
|
||||
max: 3600
|
||||
unit_of_measurement: seconds
|
||||
max_objects:
|
||||
name: Maximum objects
|
||||
description: The maximum number of objects to log.
|
||||
default: 5
|
||||
selector:
|
||||
number:
|
||||
min: 1
|
||||
max: 30
|
||||
unit_of_measurement: objects
|
||||
stop_log_object_sources:
|
||||
name: Stop logging object sources
|
||||
description: Stop logging sources of new objects in memory.
|
||||
lru_stats:
|
||||
name: Log LRU stats
|
||||
description: Log the stats of all lru caches.
|
||||
|
|
|
@ -19,7 +19,9 @@ from homeassistant.components.profiler import (
|
|||
SERVICE_LRU_STATS,
|
||||
SERVICE_MEMORY,
|
||||
SERVICE_START,
|
||||
SERVICE_START_LOG_OBJECT_SOURCES,
|
||||
SERVICE_START_LOG_OBJECTS,
|
||||
SERVICE_STOP_LOG_OBJECT_SOURCES,
|
||||
SERVICE_STOP_LOG_OBJECTS,
|
||||
)
|
||||
from homeassistant.components.profiler.const import DOMAIN
|
||||
|
@ -130,13 +132,20 @@ async def test_object_growth_logging(
|
|||
await hass.services.async_call(
|
||||
DOMAIN, SERVICE_START_LOG_OBJECTS, {CONF_SCAN_INTERVAL: 10}, blocking=True
|
||||
)
|
||||
with pytest.raises(HomeAssistantError, match="Object logging already started"):
|
||||
await hass.services.async_call(
|
||||
DOMAIN,
|
||||
SERVICE_START_LOG_OBJECTS,
|
||||
{CONF_SCAN_INTERVAL: 10},
|
||||
blocking=True,
|
||||
)
|
||||
|
||||
assert "Growth" in caplog.text
|
||||
caplog.clear()
|
||||
assert "Growth" in caplog.text
|
||||
caplog.clear()
|
||||
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=11))
|
||||
await hass.async_block_till_done()
|
||||
assert "Growth" in caplog.text
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=11))
|
||||
await hass.async_block_till_done()
|
||||
assert "Growth" in caplog.text
|
||||
|
||||
await hass.services.async_call(DOMAIN, SERVICE_STOP_LOG_OBJECTS, {}, blocking=True)
|
||||
caplog.clear()
|
||||
|
@ -145,6 +154,17 @@ async def test_object_growth_logging(
|
|||
await hass.async_block_till_done()
|
||||
assert "Growth" not in caplog.text
|
||||
|
||||
with pytest.raises(HomeAssistantError, match="Object logging not running"):
|
||||
await hass.services.async_call(
|
||||
DOMAIN, SERVICE_STOP_LOG_OBJECTS, {}, blocking=True
|
||||
)
|
||||
|
||||
with patch("objgraph.growth"):
|
||||
await hass.services.async_call(
|
||||
DOMAIN, SERVICE_START_LOG_OBJECTS, {CONF_SCAN_INTERVAL: 10}, blocking=True
|
||||
)
|
||||
caplog.clear()
|
||||
|
||||
assert await hass.config_entries.async_unload(entry.entry_id)
|
||||
await hass.async_block_till_done()
|
||||
|
||||
|
@ -276,3 +296,92 @@ async def test_lru_stats(hass: HomeAssistant, caplog: pytest.LogCaptureFixture)
|
|||
assert "_dummy_test_lru_stats" in caplog.text
|
||||
assert "CacheInfo" in caplog.text
|
||||
assert "sqlalchemy_test" in caplog.text
|
||||
|
||||
|
||||
async def test_log_object_sources(
|
||||
hass: HomeAssistant, caplog: pytest.LogCaptureFixture
|
||||
) -> None:
|
||||
"""Test we can setup and the service and we can dump objects to the log."""
|
||||
|
||||
entry = MockConfigEntry(domain=DOMAIN)
|
||||
entry.add_to_hass(hass)
|
||||
|
||||
assert await hass.config_entries.async_setup(entry.entry_id)
|
||||
await hass.async_block_till_done()
|
||||
|
||||
assert hass.services.has_service(DOMAIN, SERVICE_START_LOG_OBJECT_SOURCES)
|
||||
assert hass.services.has_service(DOMAIN, SERVICE_STOP_LOG_OBJECT_SOURCES)
|
||||
|
||||
class FakeObject:
|
||||
"""Fake object."""
|
||||
|
||||
def __repr__(self):
|
||||
"""Return a fake repr.""."""
|
||||
return "<FakeObject>"
|
||||
|
||||
fake_object = FakeObject()
|
||||
|
||||
with patch("gc.collect"), patch("gc.get_objects", return_value=[fake_object]):
|
||||
await hass.services.async_call(
|
||||
DOMAIN,
|
||||
SERVICE_START_LOG_OBJECT_SOURCES,
|
||||
{CONF_SCAN_INTERVAL: 10},
|
||||
blocking=True,
|
||||
)
|
||||
with pytest.raises(HomeAssistantError, match="Object logging already started"):
|
||||
await hass.services.async_call(
|
||||
DOMAIN,
|
||||
SERVICE_START_LOG_OBJECT_SOURCES,
|
||||
{CONF_SCAN_INTERVAL: 10},
|
||||
blocking=True,
|
||||
)
|
||||
|
||||
assert "New object FakeObject (0/1)" in caplog.text
|
||||
caplog.clear()
|
||||
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=11))
|
||||
await hass.async_block_till_done()
|
||||
assert "No new object growth found" in caplog.text
|
||||
|
||||
fake_object2 = FakeObject()
|
||||
|
||||
with patch("gc.collect"), patch(
|
||||
"gc.get_objects", return_value=[fake_object, fake_object2]
|
||||
):
|
||||
caplog.clear()
|
||||
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=21))
|
||||
await hass.async_block_till_done()
|
||||
assert "New object FakeObject (1/2)" in caplog.text
|
||||
|
||||
many_objects = [FakeObject() for _ in range(30)]
|
||||
with patch("gc.collect"), patch("gc.get_objects", return_value=many_objects):
|
||||
caplog.clear()
|
||||
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=31))
|
||||
await hass.async_block_till_done()
|
||||
assert "New object FakeObject (2/30)" in caplog.text
|
||||
assert "New objects overflowed by {'FakeObject': 25}" in caplog.text
|
||||
|
||||
await hass.services.async_call(
|
||||
DOMAIN, SERVICE_STOP_LOG_OBJECT_SOURCES, {}, blocking=True
|
||||
)
|
||||
caplog.clear()
|
||||
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=41))
|
||||
await hass.async_block_till_done()
|
||||
assert "FakeObject" not in caplog.text
|
||||
assert "No new object growth found" not in caplog.text
|
||||
|
||||
assert await hass.config_entries.async_unload(entry.entry_id)
|
||||
await hass.async_block_till_done()
|
||||
|
||||
async_fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=51))
|
||||
await hass.async_block_till_done()
|
||||
assert "FakeObject" not in caplog.text
|
||||
assert "No new object growth found" not in caplog.text
|
||||
|
||||
with pytest.raises(HomeAssistantError, match="Object logging not running"):
|
||||
await hass.services.async_call(
|
||||
DOMAIN, SERVICE_STOP_LOG_OBJECT_SOURCES, {}, blocking=True
|
||||
)
|
||||
|
|
Loading…
Add table
Reference in a new issue