Move worker pool monitoring to be time based instead of add_job based. (#3439)

* Move worker pool monitoring to be time based instead of add_job based.

* Stub out worker pool monitor during tests

* Add test for monitor worker pool.

* Improve naming

* Test stop_monitor coroutine

* Add async_create_timer test

* Finish rename create_timer
This commit is contained in:
Paulus Schoutsen 2016-09-19 23:39:49 -07:00 committed by GitHub
parent d31f6bc3f0
commit be68fe0d85
7 changed files with 167 additions and 79 deletions

View file

@ -56,6 +56,9 @@ MIN_WORKER_THREAD = 2
# Pattern for validating entity IDs (format: <domain>.<entity>)
ENTITY_ID_PATTERN = re.compile(r"^(\w+)\.(\w+)$")
# Interval at which we check if the pool is getting busy
MONITOR_POOL_INTERVAL = 30
_LOGGER = logging.getLogger(__name__)
@ -190,7 +193,8 @@ class HomeAssistant(object):
This method is a coroutine.
"""
create_timer(self)
async_create_timer(self)
async_monitor_worker_pool(self)
self.bus.async_fire(EVENT_HOMEASSISTANT_START)
yield from self.loop.run_in_executor(None, self.pool.block_till_done)
self.state = CoreState.running
@ -1075,13 +1079,8 @@ class Config(object):
}
def create_timer(hass, interval=TIMER_INTERVAL):
def async_create_timer(hass, interval=TIMER_INTERVAL):
"""Create a timer that will start on HOMEASSISTANT_START."""
# We want to be able to fire every time a minute starts (seconds=0).
# We want this so other modules can use that to make sure they fire
# every minute.
assert 60 % interval == 0, "60 % TIMER_INTERVAL should be 0!"
stop_event = asyncio.Event(loop=hass.loop)
# Setting the Event inside the loop by marking it as a coroutine
@ -1160,14 +1159,48 @@ def create_worker_pool(worker_count=None):
# We do not want to crash our ThreadPool
_LOGGER.exception("BusHandler:Exception doing job")
def busy_callback(worker_count, current_jobs, pending_jobs_count):
"""Callback to be called when the pool queue gets too big."""
return util.ThreadPool(job_handler, worker_count)
def async_monitor_worker_pool(hass):
"""Create a monitor for the thread pool to check if pool is misbehaving."""
busy_threshold = hass.pool.worker_count * 3
handle = None
def schedule():
"""Schedule the monitor."""
nonlocal handle
handle = hass.loop.call_later(MONITOR_POOL_INTERVAL,
check_pool_threshold)
def check_pool_threshold():
"""Check pool size."""
nonlocal busy_threshold
pending_jobs = hass.pool.queue_size
if pending_jobs < busy_threshold:
schedule()
return
_LOGGER.warning(
"WorkerPool:All %d threads are busy and %d jobs pending",
worker_count, pending_jobs_count)
hass.pool.worker_count, pending_jobs)
for start, job in current_jobs:
_LOGGER.warning("WorkerPool:Current job from %s: %s",
for start, job in hass.pool.current_jobs:
_LOGGER.warning("WorkerPool:Current job started at %s: %s",
dt_util.as_local(start).isoformat(), job)
return util.ThreadPool(job_handler, worker_count, busy_callback)
busy_threshold *= 2
schedule()
schedule()
@asyncio.coroutine
def stop_monitor(event):
"""Stop the monitor."""
handle.cancel()
hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STOP, stop_monitor)

View file

@ -143,7 +143,7 @@ class HomeAssistant(ha.HomeAssistant):
'Unable to setup local API to receive events')
self.state = ha.CoreState.starting
ha.create_timer(self)
ha.async_create_timer(self)
self.bus.fire(ha.EVENT_HOMEASSISTANT_START,
origin=ha.EventOrigin.remote)

View file

@ -308,7 +308,7 @@ class ThreadPool(object):
"""A priority queue-based thread pool."""
# pylint: disable=too-many-instance-attributes
def __init__(self, job_handler, worker_count=0, busy_callback=None):
def __init__(self, job_handler, worker_count=0):
"""Initialize the pool.
job_handler: method to be called from worker thread to handle job
@ -318,13 +318,10 @@ class ThreadPool(object):
pending_jobs_count
"""
self._job_handler = job_handler
self._busy_callback = busy_callback
self.worker_count = 0
self.busy_warning_limit = 0
self._work_queue = queue.PriorityQueue()
self.current_jobs = []
self._lock = threading.RLock()
self._quit_task = object()
self.running = True
@ -332,70 +329,45 @@ class ThreadPool(object):
for _ in range(worker_count):
self.add_worker()
@property
def queue_size(self):
"""Return estimated number of jobs that are waiting to be processed."""
return self._work_queue.qsize()
def add_worker(self):
"""Add worker to the thread pool and reset warning limit."""
with self._lock:
if not self.running:
raise RuntimeError("ThreadPool not running")
if not self.running:
raise RuntimeError("ThreadPool not running")
worker = threading.Thread(
target=self._worker,
name='ThreadPool Worker {}'.format(self.worker_count))
worker.daemon = True
worker.start()
threading.Thread(
target=self._worker, daemon=True,
name='ThreadPool Worker {}'.format(self.worker_count)).start()
self.worker_count += 1
self.busy_warning_limit = self.worker_count * 3
self.worker_count += 1
def remove_worker(self):
"""Remove worker from the thread pool and reset warning limit."""
with self._lock:
if not self.running:
raise RuntimeError("ThreadPool not running")
if not self.running:
raise RuntimeError("ThreadPool not running")
self._work_queue.put(PriorityQueueItem(0, self._quit_task))
self._work_queue.put(PriorityQueueItem(0, self._quit_task))
self.worker_count -= 1
self.busy_warning_limit = self.worker_count * 3
self.worker_count -= 1
def add_job(self, priority, job):
"""Add a job to the queue."""
with self._lock:
if not self.running:
raise RuntimeError("ThreadPool not running")
if not self.running:
raise RuntimeError("ThreadPool not running")
self._work_queue.put(PriorityQueueItem(priority, job))
# Check if our queue is getting too big.
if self._work_queue.qsize() > self.busy_warning_limit \
and self._busy_callback is not None:
# Increase limit we will issue next warning.
self.busy_warning_limit *= 2
self._busy_callback(
self.worker_count, self.current_jobs,
self._work_queue.qsize())
self._work_queue.put(PriorityQueueItem(priority, job))
def add_many_jobs(self, jobs):
"""Add a list of jobs to the queue."""
with self._lock:
if not self.running:
raise RuntimeError("ThreadPool not running")
if not self.running:
raise RuntimeError("ThreadPool not running")
for priority, job in jobs:
self._work_queue.put(PriorityQueueItem(priority, job))
# Check if our queue is getting too big.
if self._work_queue.qsize() > self.busy_warning_limit \
and self._busy_callback is not None:
# Increase limit we will issue next warning.
self.busy_warning_limit *= 2
self._busy_callback(
self.worker_count, self.current_jobs,
self._work_queue.qsize())
for priority, job in jobs:
self._work_queue.put(PriorityQueueItem(priority, job))
def block_till_done(self):
"""Block till current work is done."""
@ -405,18 +377,17 @@ class ThreadPool(object):
"""Finish all the jobs and stops all the threads."""
self.block_till_done()
with self._lock:
if not self.running:
return
if not self.running:
return
# Tell the workers to quit
for _ in range(self.worker_count):
self.remove_worker()
# Tell the workers to quit
for _ in range(self.worker_count):
self.remove_worker()
self.running = False
self.running = False
# Wait till all workers have quit
self.block_till_done()
# Wait till all workers have quit
self.block_till_done()
def _worker(self):
"""Handle jobs for the thread pool."""

View file

@ -2,6 +2,7 @@ flake8>=3.0.4
pylint>=1.5.6
coveralls>=1.1
pytest>=2.9.2
pytest-asyncio>=0.5.0
pytest-cov>=2.3.1
pytest-timeout>=1.0.0
pytest-catchlog>=1.2.2

View file

@ -75,9 +75,11 @@ def get_test_home_assistant(num_threads=None):
"""Helper to start hass."""
with patch.object(hass.loop, 'run_forever', return_value=None):
with patch.object(hass, 'async_stop', return_value=fake_stop()):
with patch.object(ha, 'create_timer', return_value=None):
orig_start()
hass.block_till_done()
with patch.object(ha, 'async_create_timer', return_value=None):
with patch.object(ha, 'async_monitor_worker_pool',
return_value=None):
orig_start()
hass.block_till_done()
def stop_hass():
orig_stop()

View file

@ -4,7 +4,7 @@
import os
import signal
import unittest
from unittest.mock import patch
from unittest.mock import patch, MagicMock
from datetime import datetime, timedelta
import pytz
@ -459,3 +459,84 @@ class TestWorkerPool(unittest.TestCase):
pool.add_job(ha.JobPriority.EVENT_DEFAULT, (register_call, None))
pool.block_till_done()
self.assertEqual(1, len(calls))
class TestWorkerPoolMonitor(object):
"""Test monitor_worker_pool."""
@patch('homeassistant.core._LOGGER.warning')
def test_worker_pool_monitor(self, mock_warning, event_loop):
"""Test we log an error and increase threshold."""
hass = MagicMock()
hass.pool.worker_count = 3
schedule_handle = MagicMock()
hass.loop.call_later.return_value = schedule_handle
ha.async_monitor_worker_pool(hass)
assert hass.loop.call_later.called
assert hass.bus.async_listen_once.called
assert not schedule_handle.called
check_threshold = hass.loop.call_later.mock_calls[0][1][1]
hass.pool.queue_size = 8
check_threshold()
assert not mock_warning.called
hass.pool.queue_size = 9
check_threshold()
assert mock_warning.called
mock_warning.reset_mock()
assert not mock_warning.called
check_threshold()
assert not mock_warning.called
hass.pool.queue_size = 17
check_threshold()
assert not mock_warning.called
hass.pool.queue_size = 18
check_threshold()
assert mock_warning.called
event_loop.run_until_complete(
hass.bus.async_listen_once.mock_calls[0][1][1](None))
assert schedule_handle.cancel.called
class TestAsyncCreateTimer(object):
"""Test create timer."""
@patch('homeassistant.core.asyncio.Event')
@patch('homeassistant.core.dt_util.utcnow')
def test_create_timer(self, mock_utcnow, mock_event, event_loop):
"""Test create timer fires correctly."""
hass = MagicMock()
now = mock_utcnow()
event = mock_event()
now.second = 1
mock_utcnow.reset_mock()
ha.async_create_timer(hass)
assert len(hass.bus.async_listen_once.mock_calls) == 2
start_timer = hass.bus.async_listen_once.mock_calls[1][1][1]
event_loop.run_until_complete(start_timer(None))
assert hass.loop.create_task.called
timer = hass.loop.create_task.mock_calls[0][1][0]
event.is_set.side_effect = False, False, True
event_loop.run_until_complete(timer)
assert len(mock_utcnow.mock_calls) == 1
assert hass.loop.call_soon.called
event_type, event_data = hass.loop.call_soon.mock_calls[0][1][1:]
assert ha.EVENT_TIME_CHANGED == event_type
assert {ha.ATTR_NOW: now} == event_data
stop_timer = hass.bus.async_listen_once.mock_calls[0][1][1]
event_loop.run_until_complete(stop_timer(None))
assert event.set.called

View file

@ -69,7 +69,7 @@ def setUpModule(): # pylint: disable=invalid-name
{http.DOMAIN: {http.CONF_API_PASSWORD: API_PASSWORD,
http.CONF_SERVER_PORT: SLAVE_PORT}})
with patch.object(ha, 'create_timer', return_value=None):
with patch.object(ha, 'async_create_timer', return_value=None):
slave.start()