Mask sensitive data in google_assistant logs (#109366)

* Mask sensitive data in google_assistant logs

* Move common code to homeassistant/util/redact.py

* Move to helpers

* Add tests

* Tweak

* Redact additional logs

* Fix stale docstring

* Don't reveal the length of masked data

* Update test
This commit is contained in:
Erik Montnemery 2024-02-02 22:10:30 +01:00 committed by GitHub
parent ae5d4e183a
commit 09ba46ddb9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 232 additions and 13 deletions

View file

@ -0,0 +1,36 @@
"""Helpers to redact Google Assistant data when logging."""
from __future__ import annotations
from collections.abc import Callable
from typing import Any
from homeassistant.core import callback
from homeassistant.helpers.redact import async_redact_data, partial_redact
REQUEST_MSG_TO_REDACT: dict[str, Callable[[str], str]] = {
"agentUserId": partial_redact,
"uuid": partial_redact,
"webhookId": partial_redact,
}
RESPONSE_MSG_TO_REDACT = REQUEST_MSG_TO_REDACT | {id: partial_redact}
SYNC_MSG_TO_REDACT = REQUEST_MSG_TO_REDACT
@callback
def async_redact_request_msg(msg: dict[str, Any]) -> dict[str, Any]:
"""Mask sensitive data in message."""
return async_redact_data(msg, REQUEST_MSG_TO_REDACT)
@callback
def async_redact_response_msg(msg: dict[str, Any]) -> dict[str, Any]:
"""Mask sensitive data in message."""
return async_redact_data(msg, RESPONSE_MSG_TO_REDACT)
@callback
def async_redact_sync_msg(msg: dict[str, Any]) -> dict[str, Any]:
"""Mask sensitive data in message."""
return async_redact_data(msg, SYNC_MSG_TO_REDACT)

View file

@ -32,6 +32,7 @@ from homeassistant.helpers import (
)
from homeassistant.helpers.event import async_call_later
from homeassistant.helpers.network import get_url
from homeassistant.helpers.redact import partial_redact
from homeassistant.helpers.storage import Store
from homeassistant.util.dt import utcnow
@ -48,6 +49,7 @@ from .const import (
STORE_AGENT_USER_IDS,
STORE_GOOGLE_LOCAL_WEBHOOK_ID,
)
from .data_redaction import async_redact_request_msg, async_redact_response_msg
from .error import SmartHomeError
SYNC_DELAY = 15
@ -332,8 +334,8 @@ class AbstractConfig(ABC):
_LOGGER.debug(
"Register webhook handler %s for agent user id %s",
webhook_id,
user_agent_id,
partial_redact(webhook_id),
partial_redact(user_agent_id),
)
try:
webhook.async_register(
@ -348,8 +350,8 @@ class AbstractConfig(ABC):
except ValueError:
_LOGGER.warning(
"Webhook handler %s for agent user id %s is already defined!",
webhook_id,
user_agent_id,
partial_redact(webhook_id),
partial_redact(user_agent_id),
)
setup_successful = False
break
@ -374,8 +376,8 @@ class AbstractConfig(ABC):
webhook_id = self.get_local_webhook_id(agent_user_id)
_LOGGER.debug(
"Unregister webhook handler %s for agent user id %s",
webhook_id,
agent_user_id,
partial_redact(webhook_id),
partial_redact(agent_user_id),
)
webhook.async_unregister(self.hass, webhook_id)
@ -410,7 +412,7 @@ class AbstractConfig(ABC):
"Received local message from %s (JS %s):\n%s\n",
request.remote,
request.headers.get("HA-Cloud-Version", "unknown"),
pprint.pformat(payload),
pprint.pformat(async_redact_request_msg(payload)),
)
if (agent_user_id := self.get_local_agent_user_id(webhook_id)) is None:
@ -421,8 +423,8 @@ class AbstractConfig(ABC):
"Cannot process request for webhook %s as no linked agent user is"
" found:\n%s\n"
),
webhook_id,
pprint.pformat(payload),
partial_redact(webhook_id),
pprint.pformat(async_redact_request_msg(payload)),
)
webhook.async_unregister(self.hass, webhook_id)
return None
@ -441,7 +443,10 @@ class AbstractConfig(ABC):
)
if _LOGGER.isEnabledFor(logging.DEBUG):
_LOGGER.debug("Responding to local message:\n%s\n", pprint.pformat(result))
_LOGGER.debug(
"Responding to local message:\n%s\n",
pprint.pformat(async_redact_response_msg(result)),
)
return json_response(result)

View file

@ -18,6 +18,11 @@ from .const import (
EVENT_QUERY_RECEIVED,
EVENT_SYNC_RECEIVED,
)
from .data_redaction import (
async_redact_request_msg,
async_redact_response_msg,
async_redact_sync_msg,
)
from .error import SmartHomeError
from .helpers import GoogleEntity, RequestData, async_get_entities
@ -42,7 +47,11 @@ async def async_handle_message(hass, config, user_id, message, source):
response = await _process(hass, data, message)
if response and "errorCode" in response["payload"]:
_LOGGER.error("Error handling message %s: %s", message, response["payload"])
_LOGGER.error(
"Error handling message %s: %s",
async_redact_request_msg(message),
async_redact_response_msg(response["payload"]),
)
return response
@ -118,7 +127,7 @@ async def async_devices_sync(
devices = await async_devices_sync_response(hass, data.config, agent_user_id)
response = create_sync_response(agent_user_id, devices)
_LOGGER.debug("Syncing entities response: %s", response)
_LOGGER.debug("Syncing entities response: %s", async_redact_sync_msg(response))
return response

View file

@ -0,0 +1,75 @@
"""Helpers to redact sensitive data."""
from __future__ import annotations
from collections.abc import Callable, Iterable, Mapping
from typing import Any, TypeVar, cast, overload
from homeassistant.core import callback
REDACTED = "**REDACTED**"
_T = TypeVar("_T")
_ValueT = TypeVar("_ValueT")
def partial_redact(
x: str | Any, unmasked_prefix: int = 4, unmasked_suffix: int = 4
) -> str:
"""Mask part of a string with *."""
if not isinstance(x, str):
return REDACTED
unmasked = unmasked_prefix + unmasked_suffix
if len(x) < unmasked * 2:
return REDACTED
if not unmasked_prefix and not unmasked_suffix:
return REDACTED
suffix = x[-unmasked_suffix:] if unmasked_suffix else ""
return f"{x[:unmasked_prefix]}***{suffix}"
@overload
def async_redact_data( # type: ignore[overload-overlap]
data: Mapping, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]]
) -> dict:
...
@overload
def async_redact_data(
data: _T, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]]
) -> _T:
...
@callback
def async_redact_data(
data: _T, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]]
) -> _T:
"""Redact sensitive data in a dict."""
if not isinstance(data, (Mapping, list)):
return data
if isinstance(data, list):
return cast(_T, [async_redact_data(val, to_redact) for val in data])
redacted = {**data}
for key, value in redacted.items():
if value is None:
continue
if isinstance(value, str) and not value:
continue
if key in to_redact:
if isinstance(to_redact, Mapping):
redacted[key] = to_redact[key](value)
else:
redacted[key] = REDACTED
elif isinstance(value, Mapping):
redacted[key] = async_redact_data(value, to_redact)
elif isinstance(value, list):
redacted[key] = [async_redact_data(item, to_redact) for item in value]
return cast(_T, redacted)

View file

@ -466,6 +466,6 @@ async def test_async_enable_local_sdk(
)
assert resp.status == HTTPStatus.OK
assert (
"Cannot process request for webhook mock_webhook_id as no linked agent user is found:"
"Cannot process request for webhook **REDACTED** as no linked agent user is found:"
in caplog.text
)

View file

@ -0,0 +1,94 @@
"""Test the data redation helper."""
from homeassistant.helpers.redact import REDACTED, async_redact_data, partial_redact
def test_redact() -> None:
"""Test the async_redact_data helper."""
data = {
"key1": "value1",
"key2": ["value2_a", "value2_b"],
"key3": [["value_3a", "value_3b"], ["value_3c", "value_3d"]],
"key4": {
"key4_1": "value4_1",
"key4_2": ["value4_2a", "value4_2b"],
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
},
"key5": None,
"key6": "",
"key7": False,
}
to_redact = {
"key1",
"key3",
"key4_1",
"key5",
"key6",
"key7",
}
assert async_redact_data(data, to_redact) == {
"key1": REDACTED,
"key2": ["value2_a", "value2_b"],
"key3": REDACTED,
"key4": {
"key4_1": REDACTED,
"key4_2": ["value4_2a", "value4_2b"],
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
},
"key5": None,
"key6": "",
"key7": REDACTED,
}
def test_redact_custom_redact_function() -> None:
"""Test the async_redact_data helper."""
data = {
"key1": "val1val1val1val1",
"key2": ["value2_a", "value2_b"],
"key3": [
["val_3avalue_3avalue_3a", "value_3bvalue_3bvalue_3b"],
["value_3cvalue_3cvalue_3c", "value_3dvalue_3dvalue_3d"],
],
"key4": {
"key4_1": "val4_1val4_1val4_1val4_1",
"key4_2": ["value4_2a", "value4_2b"],
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
},
"key5": None,
"key6": "",
"key7": False,
}
to_redact = {
"key1": partial_redact,
"key3": partial_redact, # Value is a list, will default to REDACTED
"key4_1": partial_redact,
"key5": partial_redact,
"key6": partial_redact,
"key7": partial_redact, # Value is False, will default to REDACTED
}
assert async_redact_data(data, to_redact) == {
"key1": "val1***val1",
"key2": ["value2_a", "value2_b"],
"key3": REDACTED,
"key4": {
"key4_1": "val4***l4_1",
"key4_2": ["value4_2a", "value4_2b"],
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
},
"key5": None,
"key6": "",
"key7": REDACTED,
}
def test_partial_redact() -> None:
"""Test the partial_redact helper."""
assert partial_redact(None, 0, 0) == REDACTED
assert partial_redact("short_string") == REDACTED
assert partial_redact("long_enough_string") == "long***ring"
assert partial_redact("long_enough_string", 2, 2) == "lo***ng"
assert partial_redact("long_enough_string", 0, 0) == REDACTED