Mask sensitive data in google_assistant logs (#109366)
* Mask sensitive data in google_assistant logs * Move common code to homeassistant/util/redact.py * Move to helpers * Add tests * Tweak * Redact additional logs * Fix stale docstring * Don't reveal the length of masked data * Update test
This commit is contained in:
parent
ae5d4e183a
commit
09ba46ddb9
6 changed files with 232 additions and 13 deletions
36
homeassistant/components/google_assistant/data_redaction.py
Normal file
36
homeassistant/components/google_assistant/data_redaction.py
Normal file
|
@ -0,0 +1,36 @@
|
|||
"""Helpers to redact Google Assistant data when logging."""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
from homeassistant.core import callback
|
||||
from homeassistant.helpers.redact import async_redact_data, partial_redact
|
||||
|
||||
REQUEST_MSG_TO_REDACT: dict[str, Callable[[str], str]] = {
|
||||
"agentUserId": partial_redact,
|
||||
"uuid": partial_redact,
|
||||
"webhookId": partial_redact,
|
||||
}
|
||||
|
||||
RESPONSE_MSG_TO_REDACT = REQUEST_MSG_TO_REDACT | {id: partial_redact}
|
||||
|
||||
SYNC_MSG_TO_REDACT = REQUEST_MSG_TO_REDACT
|
||||
|
||||
|
||||
@callback
|
||||
def async_redact_request_msg(msg: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Mask sensitive data in message."""
|
||||
return async_redact_data(msg, REQUEST_MSG_TO_REDACT)
|
||||
|
||||
|
||||
@callback
|
||||
def async_redact_response_msg(msg: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Mask sensitive data in message."""
|
||||
return async_redact_data(msg, RESPONSE_MSG_TO_REDACT)
|
||||
|
||||
|
||||
@callback
|
||||
def async_redact_sync_msg(msg: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Mask sensitive data in message."""
|
||||
return async_redact_data(msg, SYNC_MSG_TO_REDACT)
|
|
@ -32,6 +32,7 @@ from homeassistant.helpers import (
|
|||
)
|
||||
from homeassistant.helpers.event import async_call_later
|
||||
from homeassistant.helpers.network import get_url
|
||||
from homeassistant.helpers.redact import partial_redact
|
||||
from homeassistant.helpers.storage import Store
|
||||
from homeassistant.util.dt import utcnow
|
||||
|
||||
|
@ -48,6 +49,7 @@ from .const import (
|
|||
STORE_AGENT_USER_IDS,
|
||||
STORE_GOOGLE_LOCAL_WEBHOOK_ID,
|
||||
)
|
||||
from .data_redaction import async_redact_request_msg, async_redact_response_msg
|
||||
from .error import SmartHomeError
|
||||
|
||||
SYNC_DELAY = 15
|
||||
|
@ -332,8 +334,8 @@ class AbstractConfig(ABC):
|
|||
|
||||
_LOGGER.debug(
|
||||
"Register webhook handler %s for agent user id %s",
|
||||
webhook_id,
|
||||
user_agent_id,
|
||||
partial_redact(webhook_id),
|
||||
partial_redact(user_agent_id),
|
||||
)
|
||||
try:
|
||||
webhook.async_register(
|
||||
|
@ -348,8 +350,8 @@ class AbstractConfig(ABC):
|
|||
except ValueError:
|
||||
_LOGGER.warning(
|
||||
"Webhook handler %s for agent user id %s is already defined!",
|
||||
webhook_id,
|
||||
user_agent_id,
|
||||
partial_redact(webhook_id),
|
||||
partial_redact(user_agent_id),
|
||||
)
|
||||
setup_successful = False
|
||||
break
|
||||
|
@ -374,8 +376,8 @@ class AbstractConfig(ABC):
|
|||
webhook_id = self.get_local_webhook_id(agent_user_id)
|
||||
_LOGGER.debug(
|
||||
"Unregister webhook handler %s for agent user id %s",
|
||||
webhook_id,
|
||||
agent_user_id,
|
||||
partial_redact(webhook_id),
|
||||
partial_redact(agent_user_id),
|
||||
)
|
||||
webhook.async_unregister(self.hass, webhook_id)
|
||||
|
||||
|
@ -410,7 +412,7 @@ class AbstractConfig(ABC):
|
|||
"Received local message from %s (JS %s):\n%s\n",
|
||||
request.remote,
|
||||
request.headers.get("HA-Cloud-Version", "unknown"),
|
||||
pprint.pformat(payload),
|
||||
pprint.pformat(async_redact_request_msg(payload)),
|
||||
)
|
||||
|
||||
if (agent_user_id := self.get_local_agent_user_id(webhook_id)) is None:
|
||||
|
@ -421,8 +423,8 @@ class AbstractConfig(ABC):
|
|||
"Cannot process request for webhook %s as no linked agent user is"
|
||||
" found:\n%s\n"
|
||||
),
|
||||
webhook_id,
|
||||
pprint.pformat(payload),
|
||||
partial_redact(webhook_id),
|
||||
pprint.pformat(async_redact_request_msg(payload)),
|
||||
)
|
||||
webhook.async_unregister(self.hass, webhook_id)
|
||||
return None
|
||||
|
@ -441,7 +443,10 @@ class AbstractConfig(ABC):
|
|||
)
|
||||
|
||||
if _LOGGER.isEnabledFor(logging.DEBUG):
|
||||
_LOGGER.debug("Responding to local message:\n%s\n", pprint.pformat(result))
|
||||
_LOGGER.debug(
|
||||
"Responding to local message:\n%s\n",
|
||||
pprint.pformat(async_redact_response_msg(result)),
|
||||
)
|
||||
|
||||
return json_response(result)
|
||||
|
||||
|
|
|
@ -18,6 +18,11 @@ from .const import (
|
|||
EVENT_QUERY_RECEIVED,
|
||||
EVENT_SYNC_RECEIVED,
|
||||
)
|
||||
from .data_redaction import (
|
||||
async_redact_request_msg,
|
||||
async_redact_response_msg,
|
||||
async_redact_sync_msg,
|
||||
)
|
||||
from .error import SmartHomeError
|
||||
from .helpers import GoogleEntity, RequestData, async_get_entities
|
||||
|
||||
|
@ -42,7 +47,11 @@ async def async_handle_message(hass, config, user_id, message, source):
|
|||
response = await _process(hass, data, message)
|
||||
|
||||
if response and "errorCode" in response["payload"]:
|
||||
_LOGGER.error("Error handling message %s: %s", message, response["payload"])
|
||||
_LOGGER.error(
|
||||
"Error handling message %s: %s",
|
||||
async_redact_request_msg(message),
|
||||
async_redact_response_msg(response["payload"]),
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
@ -118,7 +127,7 @@ async def async_devices_sync(
|
|||
devices = await async_devices_sync_response(hass, data.config, agent_user_id)
|
||||
response = create_sync_response(agent_user_id, devices)
|
||||
|
||||
_LOGGER.debug("Syncing entities response: %s", response)
|
||||
_LOGGER.debug("Syncing entities response: %s", async_redact_sync_msg(response))
|
||||
|
||||
return response
|
||||
|
||||
|
|
75
homeassistant/helpers/redact.py
Normal file
75
homeassistant/helpers/redact.py
Normal file
|
@ -0,0 +1,75 @@
|
|||
"""Helpers to redact sensitive data."""
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable, Iterable, Mapping
|
||||
from typing import Any, TypeVar, cast, overload
|
||||
|
||||
from homeassistant.core import callback
|
||||
|
||||
REDACTED = "**REDACTED**"
|
||||
|
||||
_T = TypeVar("_T")
|
||||
_ValueT = TypeVar("_ValueT")
|
||||
|
||||
|
||||
def partial_redact(
|
||||
x: str | Any, unmasked_prefix: int = 4, unmasked_suffix: int = 4
|
||||
) -> str:
|
||||
"""Mask part of a string with *."""
|
||||
if not isinstance(x, str):
|
||||
return REDACTED
|
||||
|
||||
unmasked = unmasked_prefix + unmasked_suffix
|
||||
if len(x) < unmasked * 2:
|
||||
return REDACTED
|
||||
|
||||
if not unmasked_prefix and not unmasked_suffix:
|
||||
return REDACTED
|
||||
|
||||
suffix = x[-unmasked_suffix:] if unmasked_suffix else ""
|
||||
return f"{x[:unmasked_prefix]}***{suffix}"
|
||||
|
||||
|
||||
@overload
|
||||
def async_redact_data( # type: ignore[overload-overlap]
|
||||
data: Mapping, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]]
|
||||
) -> dict:
|
||||
...
|
||||
|
||||
|
||||
@overload
|
||||
def async_redact_data(
|
||||
data: _T, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]]
|
||||
) -> _T:
|
||||
...
|
||||
|
||||
|
||||
@callback
|
||||
def async_redact_data(
|
||||
data: _T, to_redact: Iterable[Any] | Mapping[Any, Callable[[_ValueT], _ValueT]]
|
||||
) -> _T:
|
||||
"""Redact sensitive data in a dict."""
|
||||
if not isinstance(data, (Mapping, list)):
|
||||
return data
|
||||
|
||||
if isinstance(data, list):
|
||||
return cast(_T, [async_redact_data(val, to_redact) for val in data])
|
||||
|
||||
redacted = {**data}
|
||||
|
||||
for key, value in redacted.items():
|
||||
if value is None:
|
||||
continue
|
||||
if isinstance(value, str) and not value:
|
||||
continue
|
||||
if key in to_redact:
|
||||
if isinstance(to_redact, Mapping):
|
||||
redacted[key] = to_redact[key](value)
|
||||
else:
|
||||
redacted[key] = REDACTED
|
||||
elif isinstance(value, Mapping):
|
||||
redacted[key] = async_redact_data(value, to_redact)
|
||||
elif isinstance(value, list):
|
||||
redacted[key] = [async_redact_data(item, to_redact) for item in value]
|
||||
|
||||
return cast(_T, redacted)
|
|
@ -466,6 +466,6 @@ async def test_async_enable_local_sdk(
|
|||
)
|
||||
assert resp.status == HTTPStatus.OK
|
||||
assert (
|
||||
"Cannot process request for webhook mock_webhook_id as no linked agent user is found:"
|
||||
"Cannot process request for webhook **REDACTED** as no linked agent user is found:"
|
||||
in caplog.text
|
||||
)
|
||||
|
|
94
tests/helpers/test_redact.py
Normal file
94
tests/helpers/test_redact.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
"""Test the data redation helper."""
|
||||
from homeassistant.helpers.redact import REDACTED, async_redact_data, partial_redact
|
||||
|
||||
|
||||
def test_redact() -> None:
|
||||
"""Test the async_redact_data helper."""
|
||||
data = {
|
||||
"key1": "value1",
|
||||
"key2": ["value2_a", "value2_b"],
|
||||
"key3": [["value_3a", "value_3b"], ["value_3c", "value_3d"]],
|
||||
"key4": {
|
||||
"key4_1": "value4_1",
|
||||
"key4_2": ["value4_2a", "value4_2b"],
|
||||
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
|
||||
},
|
||||
"key5": None,
|
||||
"key6": "",
|
||||
"key7": False,
|
||||
}
|
||||
|
||||
to_redact = {
|
||||
"key1",
|
||||
"key3",
|
||||
"key4_1",
|
||||
"key5",
|
||||
"key6",
|
||||
"key7",
|
||||
}
|
||||
|
||||
assert async_redact_data(data, to_redact) == {
|
||||
"key1": REDACTED,
|
||||
"key2": ["value2_a", "value2_b"],
|
||||
"key3": REDACTED,
|
||||
"key4": {
|
||||
"key4_1": REDACTED,
|
||||
"key4_2": ["value4_2a", "value4_2b"],
|
||||
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
|
||||
},
|
||||
"key5": None,
|
||||
"key6": "",
|
||||
"key7": REDACTED,
|
||||
}
|
||||
|
||||
|
||||
def test_redact_custom_redact_function() -> None:
|
||||
"""Test the async_redact_data helper."""
|
||||
data = {
|
||||
"key1": "val1val1val1val1",
|
||||
"key2": ["value2_a", "value2_b"],
|
||||
"key3": [
|
||||
["val_3avalue_3avalue_3a", "value_3bvalue_3bvalue_3b"],
|
||||
["value_3cvalue_3cvalue_3c", "value_3dvalue_3dvalue_3d"],
|
||||
],
|
||||
"key4": {
|
||||
"key4_1": "val4_1val4_1val4_1val4_1",
|
||||
"key4_2": ["value4_2a", "value4_2b"],
|
||||
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
|
||||
},
|
||||
"key5": None,
|
||||
"key6": "",
|
||||
"key7": False,
|
||||
}
|
||||
|
||||
to_redact = {
|
||||
"key1": partial_redact,
|
||||
"key3": partial_redact, # Value is a list, will default to REDACTED
|
||||
"key4_1": partial_redact,
|
||||
"key5": partial_redact,
|
||||
"key6": partial_redact,
|
||||
"key7": partial_redact, # Value is False, will default to REDACTED
|
||||
}
|
||||
|
||||
assert async_redact_data(data, to_redact) == {
|
||||
"key1": "val1***val1",
|
||||
"key2": ["value2_a", "value2_b"],
|
||||
"key3": REDACTED,
|
||||
"key4": {
|
||||
"key4_1": "val4***l4_1",
|
||||
"key4_2": ["value4_2a", "value4_2b"],
|
||||
"key4_3": [["value4_3a", "value4_3b"], ["value4_3c", "value4_3d"]],
|
||||
},
|
||||
"key5": None,
|
||||
"key6": "",
|
||||
"key7": REDACTED,
|
||||
}
|
||||
|
||||
|
||||
def test_partial_redact() -> None:
|
||||
"""Test the partial_redact helper."""
|
||||
assert partial_redact(None, 0, 0) == REDACTED
|
||||
assert partial_redact("short_string") == REDACTED
|
||||
assert partial_redact("long_enough_string") == "long***ring"
|
||||
assert partial_redact("long_enough_string", 2, 2) == "lo***ng"
|
||||
assert partial_redact("long_enough_string", 0, 0) == REDACTED
|
Loading…
Add table
Add a link
Reference in a new issue