Avoid calling extract_stack in system_log since it does blocking I/O (#100455)

2023-09-21 09:48:41 +02:00 · 2023-09-21 09:48:41 +02:00 · 59daceafd2
commit 59daceafd2
parent f2fc62138a
3 changed files with 103 additions and 46 deletions
--- a/homeassistant/components/system_log/init.py
+++ b/homeassistant/components/system_log/init.py
@ -4,6 +4,7 @@ from __future__ import annotations
 from collections import OrderedDict, deque
 import logging
 import re
+import sys
 import traceback
 from typing import Any, cast

@ -59,31 +60,65 @@ SERVICE_WRITE_SCHEMA = vol.Schema(


 def _figure_out_source(
-    record: logging.LogRecord, call_stack: list[tuple[str, int]], paths_re: re.Pattern
+    record: logging.LogRecord, paths_re: re.Pattern
 ) -> tuple[str, int]:
+    """Figure out where a log message came from."""
    # If a stack trace exists, extract file names from the entire call stack.
    # The other case is when a regular "log" is made (without an attached
    # exception). In that case, just use the file where the log was made from.
    if record.exc_info:
        stack = [(x[0], x[1]) for x in traceback.extract_tb(record.exc_info[2])]
-    else:
-        index = -1
-        for i, frame in enumerate(call_stack):
-            if frame[0] == record.pathname:
-                index = i
+        for i, (filename, _) in enumerate(stack):
+            # Slice the stack to the first frame that matches
+            # the record pathname.
+            if filename == record.pathname:
+                stack = stack[0 : i + 1]
                break
-        if index == -1:
-            # For some reason we couldn't find pathname in the stack.
-            stack = [(record.pathname, record.lineno)]
-        else:
-            stack = call_stack[0 : index + 1]
+        # Iterate through the stack call (in reverse) and find the last call from
+        # a file in Home Assistant. Try to figure out where error happened.
+        for path, line_number in reversed(stack):
+            # Try to match with a file within Home Assistant
+            if match := paths_re.match(path):
+                return (cast(str, match.group(1)), line_number)
+    else:
+        #
+        # We need to figure out where the log call came from if we
+        # don't have an exception.
+        #
+        # We do this by walking up the stack until we find the first
+        # frame match the record pathname so the code below
+        # can be used to reverse the remaining stack frames
+        # and find the first one that is from a file within Home Assistant.
+        #
+        # We do not call traceback.extract_stack() because it is
+        # it makes many stat() syscalls calls which do blocking I/O,
+        # and since this code is running in the event loop, we need to avoid
+        # blocking I/O.
+
+        frame = sys._getframe(4)  # pylint: disable=protected-access
+        #
+        # We use _getframe with 4 to skip the following frames:
+        #
+        # Jump 2 frames up to get to the actual caller
+        # since we are in a function, and always called from another function
+        # that are never the original source of the log message.
+        #
+        # Next try to skip any frames that are from the logging module
+        # We know that the logger module typically has 5 frames itself
+        # but it may change in the future so we are conservative and
+        # only skip 2.
+        #
+        # _getframe is cpython only but we are already using cpython specific
+        # code everywhere in HA so it's fine as its unlikely we will ever
+        # support other python implementations.
+        #
+        # Iterate through the stack call (in reverse) and find the last call from
+        # a file in Home Assistant. Try to figure out where error happened.
+        while back := frame.f_back:
+            if match := paths_re.match(frame.f_code.co_filename):
+                return (cast(str, match.group(1)), frame.f_lineno)
+            frame = back

-    # Iterate through the stack call (in reverse) and find the last call from
-    # a file in Home Assistant. Try to figure out where error happened.
-    for pathname in reversed(stack):
-        # Try to match with a file within Home Assistant
-        if match := paths_re.match(pathname[0]):
-            return (cast(str, match.group(1)), pathname[1])
    # Ok, we don't know what this is
    return (record.pathname, record.lineno)

@ -217,11 +252,7 @@ class LogErrorHandler(logging.Handler):
        default upper limit is set to 50 (older entries are discarded) but can
        be changed if needed.
        """
-        stack = []
-        if not record.exc_info:
-            stack = [(f[0], f[1]) for f in traceback.extract_stack()]
-
-        entry = LogEntry(record, _figure_out_source(record, stack, self.paths_re))
+        entry = LogEntry(record, _figure_out_source(record, self.paths_re))
        self.records.add_entry(entry)
        if self.fire_event:
            self.hass.bus.fire(EVENT_SYSTEM_LOG, entry.to_dict())
--- a/homeassistant/components/zha/core/gateway.py
+++ b/homeassistant/components/zha/core/gateway.py
@ -10,7 +10,6 @@ import itertools
 import logging
 import re
 import time
-import traceback
 from typing import TYPE_CHECKING, Any, NamedTuple

 from zigpy.application import ControllerApplication
@ -814,21 +813,20 @@ class LogRelayHandler(logging.Handler):
        super().__init__()
        self.hass = hass
        self.gateway = gateway
-
-    def emit(self, record: LogRecord) -> None:
-        """Relay log message via dispatcher."""
-        stack = []
-        if record.levelno >= logging.WARN and not record.exc_info:
-            stack = [f for f, _, _, _ in traceback.extract_stack()]
-
        hass_path: str = HOMEASSISTANT_PATH[0]
        config_dir = self.hass.config.config_dir
-        paths_re = re.compile(
+        self.paths_re = re.compile(
            r"(?:{})/(.*)".format(
                "|".join([re.escape(x) for x in (hass_path, config_dir)])
            )
        )
-        entry = LogEntry(record, _figure_out_source(record, stack, paths_re))
+
+    def emit(self, record: LogRecord) -> None:
+        """Relay log message via dispatcher."""
+        if record.levelno >= logging.WARN:
+            entry = LogEntry(record, _figure_out_source(record, self.paths_re))
+        else:
+            entry = LogEntry(record, (record.pathname, record.lineno))
        async_dispatcher_send(
            self.hass,
            ZHA_GW_MSG,
--- a/tests/components/system_log/test_init.py
+++ b/tests/components/system_log/test_init.py
@ -4,6 +4,7 @@ from __future__ import annotations
 import asyncio
 from collections.abc import Awaitable
 import logging
+import re
 import traceback
 from typing import Any
 from unittest.mock import MagicMock, patch
@ -87,11 +88,6 @@ class WatchLogErrorHandler(system_log.LogErrorHandler):
            self.watch_event.set()


-def get_frame(name):
-    """Get log stack frame."""
-    return (name, 5, None, None)
-
-
 async def async_setup_system_log(hass, config) -> WatchLogErrorHandler:
    """Set up the system_log component."""
    WatchLogErrorHandler.instances = []
@ -362,21 +358,28 @@ async def test_unknown_path(
    assert log["source"] == ["unknown_path", 0]


+def get_frame(path: str, previous_frame: MagicMock | None) -> MagicMock:
+    """Get log stack frame."""
+    return MagicMock(
+        f_back=previous_frame,
+        f_code=MagicMock(co_filename=path),
+        f_lineno=5,
+    )
+
+
 async def async_log_error_from_test_path(hass, path, watcher):
    """Log error while mocking the path."""
    call_path = "internal_path.py"
+    main_frame = get_frame("main_path/main.py", None)
+    path_frame = get_frame(path, main_frame)
+    call_path_frame = get_frame(call_path, path_frame)
+    logger_frame = get_frame("venv_path/logging/log.py", call_path_frame)
+
    with patch.object(
        _LOGGER, "findCaller", MagicMock(return_value=(call_path, 0, None, None))
    ), patch(
-        "traceback.extract_stack",
-        MagicMock(
-            return_value=[
-                get_frame("main_path/main.py"),
-                get_frame(path),
-                get_frame(call_path),
-                get_frame("venv_path/logging/log.py"),
-            ]
-        ),
+        "homeassistant.components.system_log.sys._getframe",
+        return_value=logger_frame,
    ):
        wait_empty = watcher.add_watcher("error message")
        _LOGGER.error("error message")
@ -441,3 +444,28 @@ async def test_raise_during_log_capture(
    log = find_log(await get_error_log(hass_ws_client), "ERROR")
    assert log is not None
    assert_log(log, "", "Bad logger message: repr error", "ERROR")
+
+
+async def test__figure_out_source(hass: HomeAssistant) -> None:
+    """Test that source is figured out correctly.
+
+    We have to test this directly for exception tracebacks since
+    we cannot generate a trackback from a Home Assistant component
+    in a test because the test is not a component.
+    """
+    try:
+        raise ValueError("test")
+    except ValueError as ex:
+        exc_info = (type(ex), ex, ex.__traceback__)
+    mock_record = MagicMock(
+        pathname="should not hit",
+        lineno=5,
+        exc_info=exc_info,
+    )
+    regex_str = f"({__file__})"
+    file, line_no = system_log._figure_out_source(
+        mock_record,
+        re.compile(regex_str),
+    )
+    assert file == __file__
+    assert line_no != 5