Improve scrape performance by using lxml parser (#71087)
* Improve scape performance by using lxml parser * load it * tweak * tweak * ensure libxml2 is installed in dev container
This commit is contained in:
parent
c23866e5e5
commit
b770ca319e
5 changed files with 9 additions and 2 deletions
|
@ -18,6 +18,7 @@ RUN \
|
|||
libavfilter-dev \
|
||||
libpcap-dev \
|
||||
libturbojpeg0 \
|
||||
libxml2 \
|
||||
git \
|
||||
cmake \
|
||||
&& apt-get clean \
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"domain": "scrape",
|
||||
"name": "Scrape",
|
||||
"documentation": "https://www.home-assistant.io/integrations/scrape",
|
||||
"requirements": ["beautifulsoup4==4.11.1"],
|
||||
"requirements": ["beautifulsoup4==4.11.1", "lxml==4.8.0"],
|
||||
"after_dependencies": ["rest"],
|
||||
"codeowners": ["@fabaff"],
|
||||
"iot_class": "cloud_polling"
|
||||
|
|
|
@ -154,7 +154,7 @@ class ScrapeSensor(SensorEntity):
|
|||
|
||||
def _extract_value(self) -> Any:
|
||||
"""Parse the html extraction in the executor."""
|
||||
raw_data = BeautifulSoup(self.rest.data, "html.parser")
|
||||
raw_data = BeautifulSoup(self.rest.data, "lxml")
|
||||
_LOGGER.debug(raw_data)
|
||||
|
||||
try:
|
||||
|
|
|
@ -974,6 +974,9 @@ lupupy==0.0.24
|
|||
# homeassistant.components.lw12wifi
|
||||
lw12==0.9.2
|
||||
|
||||
# homeassistant.components.scrape
|
||||
lxml==4.8.0
|
||||
|
||||
# homeassistant.components.nmap_tracker
|
||||
mac-vendor-lookup==0.1.11
|
||||
|
||||
|
|
|
@ -663,6 +663,9 @@ lru-dict==1.1.7
|
|||
# homeassistant.components.luftdaten
|
||||
luftdaten==0.7.2
|
||||
|
||||
# homeassistant.components.scrape
|
||||
lxml==4.8.0
|
||||
|
||||
# homeassistant.components.nmap_tracker
|
||||
mac-vendor-lookup==0.1.11
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue