From 19def8547f5c33ed256fc092d89938b11298ff3b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Wed, 7 Oct 2020 07:52:48 -0500 Subject: [PATCH] Reduce orm overhead by grouping object expiration While #40982 solved the performance overhead of expiring every time we commit the event session, it caused a regression which was fixed in #41349. Ideally we could avoid the overhead of expiring objects on commit since we are never going to use them again by using expunge after the commit to remove the objects we no longer need. Unfortunately this causes sqlalchemy to spend quite a bit of time sorted out state when adding new objects to the event session after an expunge_all so it wasn't a viable option. As not expring the objects causes unexpected side effects, we can limit the impact of having to expring them by only doing the expire every 120 commits (by default every 2 minutes) instead of every commit (by default every second). This works because the expire operation itself is expensive reguardless of the number of objects it is expiring. To verify this approach does not leak memory, a scale test integration was created (https://github.com/bdraco/scaletest) and run for hours along with an objgraph dump setup every 30 seconds to verify States and Events were being properly disposed of (https://mg.pov.lt/objgraph/objgraph.html#locating-and-filtering-objects) In testing this took a machine running the scaletest integration from 100% cpu to 4% cpu --- homeassistant/components/recorder/__init__.py | 36 ++++++++++++++----- tests/components/recorder/common.py | 8 ++--- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/homeassistant/components/recorder/__init__.py b/homeassistant/components/recorder/__init__.py index 8e6a251f660d47..3736ecaaf29d89 100644 --- a/homeassistant/components/recorder/__init__.py +++ b/homeassistant/components/recorder/__init__.py @@ -58,8 +58,13 @@ DEFAULT_DB_INTEGRITY_CHECK = True DEFAULT_DB_MAX_RETRIES = 10 DEFAULT_DB_RETRY_WAIT = 3 +DEFAULT_COMMIT_INTERVAL = 1 KEEPALIVE_TIME = 30 +# Controls how often we clean up +# States and Events objects +EXPIRE_AFTER_COMMITS = 120 + CONF_AUTO_PURGE = "auto_purge" CONF_DB_URL = "db_url" CONF_DB_MAX_RETRIES = "db_max_retries" @@ -91,9 +96,9 @@ vol.Coerce(int), vol.Range(min=0) ), vol.Optional(CONF_DB_URL): cv.string, - vol.Optional(CONF_COMMIT_INTERVAL, default=1): vol.All( - vol.Coerce(int), vol.Range(min=0) - ), + vol.Optional( + CONF_COMMIT_INTERVAL, default=DEFAULT_COMMIT_INTERVAL + ): vol.All(vol.Coerce(int), vol.Range(min=0)), vol.Optional( CONF_DB_MAX_RETRIES, default=DEFAULT_DB_MAX_RETRIES ): cv.positive_int, @@ -238,6 +243,7 @@ def __init__( self.exclude_t = exclude_t self._timechanges_seen = 0 + self._commits_without_expire = 0 self._keepalive_count = 0 self._old_states = {} self._pending_expunge = [] @@ -345,6 +351,7 @@ def async_purge(now): ) self.event_session = self.get_session() + self.event_session.expire_on_commit = False # Use a session for the event read loop # with a commit every time the event time # has changed. This reduces the disk io. @@ -485,24 +492,35 @@ def _reopen_event_session(self): try: self.event_session = self.get_session() + self.event_session.expire_on_commit = False except Exception as err: # pylint: disable=broad-except # Must catch the exception to prevent the loop from collapsing _LOGGER.exception("Error while creating new event session: %s", err) def _commit_event_session(self): + self._commits_without_expire += 1 + try: - self.event_session.flush() - for dbstate in self._pending_expunge: - # Expunge the state so its not expired - # until we use it later for dbstate.old_state - self.event_session.expunge(dbstate) - self._pending_expunge = [] + if self._pending_expunge: + self.event_session.flush() + for dbstate in self._pending_expunge: + # Expunge the state so its not expired + # until we use it later for dbstate.old_state + self.event_session.expunge(dbstate) + self._pending_expunge = [] self.event_session.commit() except Exception as err: _LOGGER.error("Error executing query: %s", err) self.event_session.rollback() raise + # Expire is an expensive operation (frequently more expensive + # than the flush and commit itself) so we only + # do it after EXPIRE_AFTER_COMMITS commits + if self._commits_without_expire == EXPIRE_AFTER_COMMITS: + self._commits_without_expire = 0 + self.event_session.expire_all() + @callback def event_listener(self, event): """Listen for new events and put them in the process queue.""" diff --git a/tests/components/recorder/common.py b/tests/components/recorder/common.py index 4c25771398c46c..1d0e6dbbfa00e5 100644 --- a/tests/components/recorder/common.py +++ b/tests/components/recorder/common.py @@ -1,12 +1,12 @@ """Common test utils for working with recorder.""" +from datetime import timedelta + from homeassistant.components import recorder from homeassistant.util import dt as dt_util from tests.common import fire_time_changed -DB_COMMIT_INTERVAL = 50 - def wait_recording_done(hass): """Block till recording is done.""" @@ -18,6 +18,6 @@ def wait_recording_done(hass): def trigger_db_commit(hass): """Force the recorder to commit.""" - for _ in range(DB_COMMIT_INTERVAL): + for _ in range(recorder.DEFAULT_COMMIT_INTERVAL): # We only commit on time change - fire_time_changed(hass, dt_util.utcnow()) + fire_time_changed(hass, dt_util.utcnow() + timedelta(seconds=1))