Make Feedreader component more extendable (#14342)

* moved regular updates definition to own method to be able to override behaviour in subclass

* moved filter by max entries to own method to be able to override behaviour in subclass

* event type used when firing events to the bus now based on variable to be able to override behaviour in subclass

* feed id introduced instead of url for storing meta-data about the feed to be able to fetch the same feed from different configs with different filtering rules applied

* keep the status of the last update; continue processing the entries retrieved even if a recoverable error was detected while fetching the feed

* added test cases for feedreader component

* better explanation around breaking change

* fixing lint issues and hound violations

* fixing lint issue

* using assert_called_once_with instead of assert_called_once to make it compatible with python 3.5
This commit is contained in:
Malte Franken 2018-05-16 04:43:27 +10:00 committed by Sebastian Muszynski
parent de50d5d9c1
commit e49e0b5a13
7 changed files with 362 additions and 19 deletions

View file

@ -421,7 +421,6 @@ omit =
homeassistant/components/emoncms_history.py
homeassistant/components/emulated_hue/upnp.py
homeassistant/components/fan/mqtt.py
homeassistant/components/feedreader.py
homeassistant/components/folder_watcher.py
homeassistant/components/foursquare.py
homeassistant/components/goalfeed.py

View file

@ -55,16 +55,28 @@ class FeedManager(object):
self._firstrun = True
self._storage = storage
self._last_entry_timestamp = None
self._last_update_successful = False
self._has_published_parsed = False
self._event_type = EVENT_FEEDREADER
self._feed_id = url
hass.bus.listen_once(
EVENT_HOMEASSISTANT_START, lambda _: self._update())
track_utc_time_change(
hass, lambda now: self._update(), minute=0, second=0)
self._init_regular_updates(hass)
def _log_no_entries(self):
"""Send no entries log at debug level."""
_LOGGER.debug("No new entries to be published in feed %s", self._url)
def _init_regular_updates(self, hass):
"""Schedule regular updates at the top of the clock."""
track_utc_time_change(
hass, lambda now: self._update(), minute=0, second=0)
@property
def last_update_successful(self):
"""Return True if the last feed update was successful."""
return self._last_update_successful
def _update(self):
"""Update the feed and publish new entries to the event bus."""
import feedparser
@ -76,26 +88,39 @@ class FeedManager(object):
else self._feed.get('modified'))
if not self._feed:
_LOGGER.error("Error fetching feed data from %s", self._url)
self._last_update_successful = False
else:
# The 'bozo' flag really only indicates that there was an issue
# during the initial parsing of the XML, but it doesn't indicate
# whether this is an unrecoverable error. In this case the
# feedparser lib is trying a less strict parsing approach.
# If an error is detected here, log error message but continue
# processing the feed entries if present.
if self._feed.bozo != 0:
_LOGGER.error("Error parsing feed %s", self._url)
_LOGGER.error("Error parsing feed %s: %s", self._url,
self._feed.bozo_exception)
# Using etag and modified, if there's no new data available,
# the entries list will be empty
elif self._feed.entries:
if self._feed.entries:
_LOGGER.debug("%s entri(es) available in feed %s",
len(self._feed.entries), self._url)
if len(self._feed.entries) > MAX_ENTRIES:
_LOGGER.debug("Processing only the first %s entries "
"in feed %s", MAX_ENTRIES, self._url)
self._feed.entries = self._feed.entries[0:MAX_ENTRIES]
self._filter_entries()
self._publish_new_entries()
if self._has_published_parsed:
self._storage.put_timestamp(
self._url, self._last_entry_timestamp)
self._feed_id, self._last_entry_timestamp)
else:
self._log_no_entries()
self._last_update_successful = True
_LOGGER.info("Fetch from feed %s completed", self._url)
def _filter_entries(self):
"""Filter the entries provided and return the ones to keep."""
if len(self._feed.entries) > MAX_ENTRIES:
_LOGGER.debug("Processing only the first %s entries "
"in feed %s", MAX_ENTRIES, self._url)
self._feed.entries = self._feed.entries[0:MAX_ENTRIES]
def _update_and_fire_entry(self, entry):
"""Update last_entry_timestamp and fire entry."""
# We are lucky, `published_parsed` data available, let's make use of
@ -109,12 +134,12 @@ class FeedManager(object):
_LOGGER.debug("No published_parsed info available for entry %s",
entry.title)
entry.update({'feed_url': self._url})
self._hass.bus.fire(EVENT_FEEDREADER, entry)
self._hass.bus.fire(self._event_type, entry)
def _publish_new_entries(self):
"""Publish new entries to the event bus."""
new_entries = False
self._last_entry_timestamp = self._storage.get_timestamp(self._url)
self._last_entry_timestamp = self._storage.get_timestamp(self._feed_id)
if self._last_entry_timestamp:
self._firstrun = False
else:
@ -157,18 +182,18 @@ class StoredData(object):
_LOGGER.error("Error loading data from pickled file %s",
self._data_file)
def get_timestamp(self, url):
"""Return stored timestamp for given url."""
def get_timestamp(self, feed_id):
"""Return stored timestamp for given feed id (usually the url)."""
self._fetch_data()
return self._data.get(url)
return self._data.get(feed_id)
def put_timestamp(self, url, timestamp):
"""Update timestamp for given URL."""
def put_timestamp(self, feed_id, timestamp):
"""Update timestamp for given feed id (usually the url)."""
self._fetch_data()
with self._lock, open(self._data_file, 'wb') as myfile:
self._data.update({url: timestamp})
self._data.update({feed_id: timestamp})
_LOGGER.debug("Overwriting feed %s timestamp in storage file %s",
url, self._data_file)
feed_id, self._data_file)
try:
pickle.dump(self._data, myfile)
except: # noqa: E722 # pylint: disable=bare-except

View file

@ -0,0 +1,149 @@
"""The tests for the feedreader component."""
import time
from datetime import datetime
import unittest
from genericpath import exists
from logging import getLogger
from os import remove
from unittest import mock
from unittest.mock import patch
from homeassistant.components import feedreader
from homeassistant.components.feedreader import CONF_URLS, FeedManager, \
StoredData, EVENT_FEEDREADER
from homeassistant.const import EVENT_HOMEASSISTANT_START
from homeassistant.core import callback
from homeassistant.setup import setup_component
from tests.common import get_test_home_assistant, assert_setup_component, \
load_fixture
_LOGGER = getLogger(__name__)
URL = 'http://some.rss.local/rss_feed.xml'
VALID_CONFIG_1 = {
feedreader.DOMAIN: {
CONF_URLS: [URL]
}
}
class TestFeedreaderComponent(unittest.TestCase):
"""Test the feedreader component."""
def setUp(self):
"""Initialize values for this testcase class."""
self.hass = get_test_home_assistant()
# Delete any previously stored data
data_file = self.hass.config.path("{}.pickle".format('feedreader'))
if exists(data_file):
remove(data_file)
def tearDown(self):
"""Stop everything that was started."""
self.hass.stop()
def test_setup_one_feed(self):
"""Test the general setup of this component."""
with assert_setup_component(1, 'feedreader'):
self.assertTrue(setup_component(self.hass, feedreader.DOMAIN,
VALID_CONFIG_1))
def setup_manager(self, feed_data):
"""Generic test setup method."""
events = []
@callback
def record_event(event):
"""Add recorded event to set."""
events.append(event)
self.hass.bus.listen(EVENT_FEEDREADER, record_event)
# Loading raw data from fixture and plug in to data object as URL
# works since the third-party feedparser library accepts a URL
# as well as the actual data.
data_file = self.hass.config.path("{}.pickle".format(
feedreader.DOMAIN))
storage = StoredData(data_file)
with patch("homeassistant.components.feedreader."
"track_utc_time_change") as track_method:
manager = FeedManager(feed_data, self.hass, storage)
# Can't use 'assert_called_once' here because it's not available
# in Python 3.5 yet.
track_method.assert_called_once_with(self.hass, mock.ANY, minute=0,
second=0)
# Artificially trigger update.
self.hass.bus.fire(EVENT_HOMEASSISTANT_START)
# Collect events.
self.hass.block_till_done()
return manager, events
def test_feed(self):
"""Test simple feed with valid data."""
feed_data = load_fixture('feedreader.xml')
manager, events = self.setup_manager(feed_data)
assert len(events) == 1
assert events[0].data.title == "Title 1"
assert events[0].data.description == "Description 1"
assert events[0].data.link == "http://www.example.com/link/1"
assert events[0].data.id == "GUID 1"
assert datetime.fromtimestamp(
time.mktime(events[0].data.published_parsed)) == \
datetime(2018, 4, 30, 5, 10, 0)
assert manager.last_update_successful is True
def test_feed_updates(self):
"""Test feed updates."""
# 1. Run
feed_data = load_fixture('feedreader.xml')
manager, events = self.setup_manager(feed_data)
assert len(events) == 1
# 2. Run
feed_data2 = load_fixture('feedreader1.xml')
# Must patch 'get_timestamp' method because the timestamp is stored
# with the URL which in these tests is the raw XML data.
with patch("homeassistant.components.feedreader.StoredData."
"get_timestamp", return_value=time.struct_time(
(2018, 4, 30, 5, 10, 0, 0, 120, 0))):
manager2, events2 = self.setup_manager(feed_data2)
assert len(events2) == 1
# 3. Run
feed_data3 = load_fixture('feedreader1.xml')
with patch("homeassistant.components.feedreader.StoredData."
"get_timestamp", return_value=time.struct_time(
(2018, 4, 30, 5, 11, 0, 0, 120, 0))):
manager3, events3 = self.setup_manager(feed_data3)
assert len(events3) == 0
def test_feed_max_length(self):
"""Test long feed beyond the 20 entry limit."""
feed_data = load_fixture('feedreader2.xml')
manager, events = self.setup_manager(feed_data)
assert len(events) == 20
def test_feed_without_publication_date(self):
"""Test simple feed with entry without publication date."""
feed_data = load_fixture('feedreader3.xml')
manager, events = self.setup_manager(feed_data)
assert len(events) == 2
def test_feed_invalid_data(self):
"""Test feed with invalid data."""
feed_data = "INVALID DATA"
manager, events = self.setup_manager(feed_data)
assert len(events) == 0
assert manager.last_update_successful is True
@mock.patch('feedparser.parse', return_value=None)
def test_feed_parsing_failed(self, mock_parse):
"""Test feed where parsing fails."""
data_file = self.hass.config.path("{}.pickle".format(
feedreader.DOMAIN))
storage = StoredData(data_file)
manager = FeedManager("FEED DATA", self.hass, storage)
# Artificially trigger update.
self.hass.bus.fire(EVENT_HOMEASSISTANT_START)
# Collect events.
self.hass.block_till_done()
assert manager.last_update_successful is False

20
tests/fixtures/feedreader.xml vendored Normal file
View file

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Sample</title>
<description>This is an example of an RSS feed</description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 30 Apr 2018 12:00:00 +1000 </lastBuildDate>
<pubDate>Mon, 30 Apr 2018 15:00:00 +1000</pubDate>
<ttl>1800</ttl>
<item>
<title>Title 1</title>
<description>Description 1</description>
<link>http://www.example.com/link/1</link>
<guid isPermaLink="false">GUID 1</guid>
<pubDate>Mon, 30 Apr 2018 15:10:00 +1000</pubDate>
</item>
</channel>
</rss>

27
tests/fixtures/feedreader1.xml vendored Normal file
View file

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Sample</title>
<description>This is an example of an RSS feed</description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 30 Apr 2018 12:00:00 +1000 </lastBuildDate>
<pubDate>Mon, 30 Apr 2018 15:00:00 +1000</pubDate>
<ttl>1800</ttl>
<item>
<title>Title 1</title>
<description>Description 1</description>
<link>http://www.example.com/link/1</link>
<guid isPermaLink="false">GUID 1</guid>
<pubDate>Mon, 30 Apr 2018 15:10:00 +1000</pubDate>
</item>
<item>
<title>Title 2</title>
<description>Description 2</description>
<link>http://www.example.com/link/2</link>
<guid isPermaLink="false">GUID 2</guid>
<pubDate>Mon, 30 Apr 2018 15:11:00 +1000</pubDate>
</item>
</channel>
</rss>

97
tests/fixtures/feedreader2.xml vendored Normal file
View file

@ -0,0 +1,97 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Sample</title>
<description>This is an example of an RSS feed</description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 30 Apr 2018 12:00:00 +1000 </lastBuildDate>
<pubDate>Mon, 30 Apr 2018 15:00:00 +1000</pubDate>
<ttl>1800</ttl>
<item>
<title>Title 1</title>
<pubDate>Mon, 30 Apr 2018 15:00:00 +1000</pubDate>
</item>
<item>
<title>Title 2</title>
<pubDate>Mon, 30 Apr 2018 15:01:00 +1000</pubDate>
</item>
<item>
<title>Title 3</title>
<pubDate>Mon, 30 Apr 2018 15:02:00 +1000</pubDate>
</item>
<item>
<title>Title 4</title>
<pubDate>Mon, 30 Apr 2018 15:03:00 +1000</pubDate>
</item>
<item>
<title>Title 5</title>
<pubDate>Mon, 30 Apr 2018 15:04:00 +1000</pubDate>
</item>
<item>
<title>Title 6</title>
<pubDate>Mon, 30 Apr 2018 15:05:00 +1000</pubDate>
</item>
<item>
<title>Title 7</title>
<pubDate>Mon, 30 Apr 2018 15:06:00 +1000</pubDate>
</item>
<item>
<title>Title 8</title>
<pubDate>Mon, 30 Apr 2018 15:07:00 +1000</pubDate>
</item>
<item>
<title>Title 9</title>
<pubDate>Mon, 30 Apr 2018 15:08:00 +1000</pubDate>
</item>
<item>
<title>Title 10</title>
<pubDate>Mon, 30 Apr 2018 15:09:00 +1000</pubDate>
</item>
<item>
<title>Title 11</title>
<pubDate>Mon, 30 Apr 2018 15:10:00 +1000</pubDate>
</item>
<item>
<title>Title 12</title>
<pubDate>Mon, 30 Apr 2018 15:11:00 +1000</pubDate>
</item>
<item>
<title>Title 13</title>
<pubDate>Mon, 30 Apr 2018 15:12:00 +1000</pubDate>
</item>
<item>
<title>Title 14</title>
<pubDate>Mon, 30 Apr 2018 15:13:00 +1000</pubDate>
</item>
<item>
<title>Title 15</title>
<pubDate>Mon, 30 Apr 2018 15:14:00 +1000</pubDate>
</item>
<item>
<title>Title 16</title>
<pubDate>Mon, 30 Apr 2018 15:15:00 +1000</pubDate>
</item>
<item>
<title>Title 17</title>
<pubDate>Mon, 30 Apr 2018 15:16:00 +1000</pubDate>
</item>
<item>
<title>Title 18</title>
<pubDate>Mon, 30 Apr 2018 15:17:00 +1000</pubDate>
</item>
<item>
<title>Title 19</title>
<pubDate>Mon, 30 Apr 2018 15:18:00 +1000</pubDate>
</item>
<item>
<title>Title 20</title>
<pubDate>Mon, 30 Apr 2018 15:19:00 +1000</pubDate>
</item>
<item>
<title>Title 21</title>
<pubDate>Mon, 30 Apr 2018 15:20:00 +1000</pubDate>
</item>
</channel>
</rss>

26
tests/fixtures/feedreader3.xml vendored Normal file
View file

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8" ?>
<rss version="2.0">
<channel>
<title>RSS Sample</title>
<description>This is an example of an RSS feed</description>
<link>http://www.example.com/main.html</link>
<lastBuildDate>Mon, 30 Apr 2018 12:00:00 +1000 </lastBuildDate>
<pubDate>Mon, 30 Apr 2018 15:00:00 +1000</pubDate>
<ttl>1800</ttl>
<item>
<title>Title 1</title>
<description>Description 1</description>
<link>http://www.example.com/link/1</link>
<guid isPermaLink="false">GUID 1</guid>
<pubDate>Mon, 30 Apr 2018 15:10:00 +1000</pubDate>
</item>
<item>
<title>Title 2</title>
<description>Description 2</description>
<link>http://www.example.com/link/2</link>
<guid isPermaLink="false">GUID 2</guid>
</item>
</channel>
</rss>