* Add index parameter to scrape sensor The scrape sensor selects the first element of the list returned by BeautifulSoup. This commit adds an optional index parameter to allow the selection of a different element from the list of results. To make this a non-breaking change, if no index value is configured, the sensor defaults to the previous behaviour of returning the first element. * Set default value for index to avoid later checks
139 lines
4.5 KiB
Python
139 lines
4.5 KiB
Python
"""
|
|
Support for getting data from websites with scraping.
|
|
|
|
For more details about this platform, please refer to the documentation at
|
|
https://home-assistant.io/components/sensor.scrape/
|
|
"""
|
|
import logging
|
|
|
|
import voluptuous as vol
|
|
from requests.auth import HTTPBasicAuth, HTTPDigestAuth
|
|
|
|
from homeassistant.components.sensor import PLATFORM_SCHEMA
|
|
from homeassistant.components.sensor.rest import RestData
|
|
from homeassistant.const import (
|
|
CONF_NAME, CONF_RESOURCE, CONF_UNIT_OF_MEASUREMENT,
|
|
CONF_VALUE_TEMPLATE, CONF_VERIFY_SSL, CONF_USERNAME, CONF_HEADERS,
|
|
CONF_PASSWORD, CONF_AUTHENTICATION, HTTP_BASIC_AUTHENTICATION,
|
|
HTTP_DIGEST_AUTHENTICATION)
|
|
from homeassistant.helpers.entity import Entity
|
|
from homeassistant.exceptions import PlatformNotReady
|
|
import homeassistant.helpers.config_validation as cv
|
|
|
|
REQUIREMENTS = ['beautifulsoup4==4.7.1']
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
CONF_ATTR = 'attribute'
|
|
CONF_SELECT = 'select'
|
|
CONF_INDEX = 'index'
|
|
|
|
DEFAULT_NAME = 'Web scrape'
|
|
DEFAULT_VERIFY_SSL = True
|
|
|
|
PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend({
|
|
vol.Required(CONF_RESOURCE): cv.string,
|
|
vol.Required(CONF_SELECT): cv.string,
|
|
vol.Optional(CONF_ATTR): cv.string,
|
|
vol.Optional(CONF_INDEX, default=0): cv.positive_int,
|
|
vol.Optional(CONF_AUTHENTICATION):
|
|
vol.In([HTTP_BASIC_AUTHENTICATION, HTTP_DIGEST_AUTHENTICATION]),
|
|
vol.Optional(CONF_HEADERS): vol.Schema({cv.string: cv.string}),
|
|
vol.Optional(CONF_NAME, default=DEFAULT_NAME): cv.string,
|
|
vol.Optional(CONF_PASSWORD): cv.string,
|
|
vol.Optional(CONF_UNIT_OF_MEASUREMENT): cv.string,
|
|
vol.Optional(CONF_USERNAME): cv.string,
|
|
vol.Optional(CONF_VALUE_TEMPLATE): cv.template,
|
|
vol.Optional(CONF_VERIFY_SSL, default=DEFAULT_VERIFY_SSL): cv.boolean,
|
|
})
|
|
|
|
|
|
def setup_platform(hass, config, add_entities, discovery_info=None):
|
|
"""Set up the Web scrape sensor."""
|
|
name = config.get(CONF_NAME)
|
|
resource = config.get(CONF_RESOURCE)
|
|
method = 'GET'
|
|
payload = None
|
|
headers = config.get(CONF_HEADERS)
|
|
verify_ssl = config.get(CONF_VERIFY_SSL)
|
|
select = config.get(CONF_SELECT)
|
|
attr = config.get(CONF_ATTR)
|
|
index = config.get(CONF_INDEX)
|
|
unit = config.get(CONF_UNIT_OF_MEASUREMENT)
|
|
username = config.get(CONF_USERNAME)
|
|
password = config.get(CONF_PASSWORD)
|
|
value_template = config.get(CONF_VALUE_TEMPLATE)
|
|
if value_template is not None:
|
|
value_template.hass = hass
|
|
|
|
if username and password:
|
|
if config.get(CONF_AUTHENTICATION) == HTTP_DIGEST_AUTHENTICATION:
|
|
auth = HTTPDigestAuth(username, password)
|
|
else:
|
|
auth = HTTPBasicAuth(username, password)
|
|
else:
|
|
auth = None
|
|
rest = RestData(method, resource, auth, headers, payload, verify_ssl)
|
|
rest.update()
|
|
|
|
if rest.data is None:
|
|
raise PlatformNotReady
|
|
|
|
add_entities([
|
|
ScrapeSensor(rest, name, select, attr, index, value_template, unit)],
|
|
True)
|
|
|
|
|
|
class ScrapeSensor(Entity):
|
|
"""Representation of a web scrape sensor."""
|
|
|
|
def __init__(self, rest, name, select, attr, index, value_template, unit):
|
|
"""Initialize a web scrape sensor."""
|
|
self.rest = rest
|
|
self._name = name
|
|
self._state = None
|
|
self._select = select
|
|
self._attr = attr
|
|
self._index = index
|
|
self._value_template = value_template
|
|
self._unit_of_measurement = unit
|
|
|
|
@property
|
|
def name(self):
|
|
"""Return the name of the sensor."""
|
|
return self._name
|
|
|
|
@property
|
|
def unit_of_measurement(self):
|
|
"""Return the unit the value is expressed in."""
|
|
return self._unit_of_measurement
|
|
|
|
@property
|
|
def state(self):
|
|
"""Return the state of the device."""
|
|
return self._state
|
|
|
|
def update(self):
|
|
"""Get the latest data from the source and updates the state."""
|
|
self.rest.update()
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
raw_data = BeautifulSoup(self.rest.data, 'html.parser')
|
|
_LOGGER.debug(raw_data)
|
|
|
|
try:
|
|
if self._attr is not None:
|
|
value = raw_data.select(self._select)[self._index][self._attr]
|
|
else:
|
|
value = raw_data.select(self._select)[self._index].text
|
|
_LOGGER.debug(value)
|
|
except IndexError:
|
|
_LOGGER.error("Unable to extract data from HTML")
|
|
return
|
|
|
|
if self._value_template is not None:
|
|
self._state = self._value_template.render_with_possible_json_value(
|
|
value, None)
|
|
else:
|
|
self._state = value
|