From b770ca319e640e2af24221dfe96d9af530c85083 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 1 May 2022 21:04:05 -0500 Subject: [PATCH] Improve scrape performance by using lxml parser (#71087) * Improve scape performance by using lxml parser * load it * tweak * tweak * ensure libxml2 is installed in dev container --- Dockerfile.dev | 1 + homeassistant/components/scrape/manifest.json | 2 +- homeassistant/components/scrape/sensor.py | 2 +- requirements_all.txt | 3 +++ requirements_test_all.txt | 3 +++ 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Dockerfile.dev b/Dockerfile.dev index dc04efe56fb..322c63f53dd 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -18,6 +18,7 @@ RUN \ libavfilter-dev \ libpcap-dev \ libturbojpeg0 \ + libxml2 \ git \ cmake \ && apt-get clean \ diff --git a/homeassistant/components/scrape/manifest.json b/homeassistant/components/scrape/manifest.json index bf5865206e4..b1ccbb354a9 100644 --- a/homeassistant/components/scrape/manifest.json +++ b/homeassistant/components/scrape/manifest.json @@ -2,7 +2,7 @@ "domain": "scrape", "name": "Scrape", "documentation": "https://www.home-assistant.io/integrations/scrape", - "requirements": ["beautifulsoup4==4.11.1"], + "requirements": ["beautifulsoup4==4.11.1", "lxml==4.8.0"], "after_dependencies": ["rest"], "codeowners": ["@fabaff"], "iot_class": "cloud_polling" diff --git a/homeassistant/components/scrape/sensor.py b/homeassistant/components/scrape/sensor.py index 8f2a672ef06..e15f7c5ba97 100644 --- a/homeassistant/components/scrape/sensor.py +++ b/homeassistant/components/scrape/sensor.py @@ -154,7 +154,7 @@ class ScrapeSensor(SensorEntity): def _extract_value(self) -> Any: """Parse the html extraction in the executor.""" - raw_data = BeautifulSoup(self.rest.data, "html.parser") + raw_data = BeautifulSoup(self.rest.data, "lxml") _LOGGER.debug(raw_data) try: diff --git a/requirements_all.txt b/requirements_all.txt index a331efde6c2..dd808efa0bd 100644 --- a/requirements_all.txt +++ b/requirements_all.txt @@ -974,6 +974,9 @@ lupupy==0.0.24 # homeassistant.components.lw12wifi lw12==0.9.2 +# homeassistant.components.scrape +lxml==4.8.0 + # homeassistant.components.nmap_tracker mac-vendor-lookup==0.1.11 diff --git a/requirements_test_all.txt b/requirements_test_all.txt index 67bba2141d5..26f1c86d1af 100644 --- a/requirements_test_all.txt +++ b/requirements_test_all.txt @@ -663,6 +663,9 @@ lru-dict==1.1.7 # homeassistant.components.luftdaten luftdaten==0.7.2 +# homeassistant.components.scrape +lxml==4.8.0 + # homeassistant.components.nmap_tracker mac-vendor-lookup==0.1.11