From 02a952ed3b1e9e6fed25c5f59c1cfef509193e56 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 8 Jan 2021 08:54:58 -1000 Subject: [PATCH 01/24] Adjust for MS SQL --- homeassistant/components/recorder/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homeassistant/components/recorder/models.py b/homeassistant/components/recorder/models.py index 9481e954bde8ff..69e2115ce34026 100644 --- a/homeassistant/components/recorder/models.py +++ b/homeassistant/components/recorder/models.py @@ -117,7 +117,7 @@ class States(Base): # type: ignore last_updated = Column(DateTime(timezone=True), default=dt_util.utcnow, index=True) created = Column(DateTime(timezone=True), default=dt_util.utcnow) old_state_id = Column( - Integer, ForeignKey("states.state_id", ondelete="SET NULL"), index=True + Integer, ForeignKey("states.state_id", ondelete="NO ACTION"), index=True ) event = relationship("Events", uselist=False) old_state = relationship("States", remote_side=[state_id]) From 6512bba127837bfbfd322e7df161fd4a5cb98880 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 8 Jan 2021 08:55:51 -1000 Subject: [PATCH 02/24] Adjust for MS SQL --- homeassistant/components/recorder/purge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 43e84785f7d4f2..dd55fcdf873bdd 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -44,6 +44,7 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: _LOGGER.debug("Purging states and events before %s", batch_purge_before) + # TODO: update old_state_id to NULL before deleting to accommodate MSSQL deleted_rows = ( session.query(States) .filter(States.last_updated < batch_purge_before) From 712231bff411f93b920537393840bf86f2554fe5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 16 Feb 2021 17:56:40 -1000 Subject: [PATCH 03/24] Adjust tests --- homeassistant/components/recorder/purge.py | 18 +++++++++++++++++- tests/components/recorder/test_purge.py | 2 +- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index dd55fcdf873bdd..6b11986b17f654 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -44,7 +44,23 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: _LOGGER.debug("Purging states and events before %s", batch_purge_before) - # TODO: update old_state_id to NULL before deleting to accommodate MSSQL + # Update old_state_id to NULL before deleting to ensure + # the delete does not fail due to a foreign key constraint + # since some databases (MSSQL) cannot do the ON DELETE CASCADE + # for us. + disconnected_rows = ( + session.query(States) + .filter( + States.old_state_id.in_( + session.query(States.state_id) + .filter(States.last_updated < batch_purge_before) + .subquery() + ) + ) + .update({"old_state_id": None}, synchronize_session=False) + ) + _LOGGER.debug("Updated %s states to remove old_state_id", disconnected_rows) + deleted_rows = ( session.query(States) .filter(States.last_updated < batch_purge_before) diff --git a/tests/components/recorder/test_purge.py b/tests/components/recorder/test_purge.py index 791bd84b11b603..54246d05c8bfdf 100644 --- a/tests/components/recorder/test_purge.py +++ b/tests/components/recorder/test_purge.py @@ -138,7 +138,7 @@ def test_purge_method(hass, hass_recorder): hass.data[DATA_INSTANCE].block_till_done() wait_recording_done(hass) assert ( - mock_logger.debug.mock_calls[5][1][0] + mock_logger.debug.mock_calls[6][1][0] == "Vacuuming SQL DB to free space" ) From 78c4d2f5d03d2bc234c184d5201dcc8751fd3214 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 16 Feb 2021 20:41:17 -1000 Subject: [PATCH 04/24] Update homeassistant/components/recorder/purge.py Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com> --- homeassistant/components/recorder/purge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 6b11986b17f654..c2a1d14b55b85f 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -46,7 +46,7 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: # Update old_state_id to NULL before deleting to ensure # the delete does not fail due to a foreign key constraint - # since some databases (MSSQL) cannot do the ON DELETE CASCADE + # since some databases (MSSQL) cannot do the ON DELETE SET NULL # for us. disconnected_rows = ( session.query(States) From c6f36a9a541cf6504f9eed51b05f951a68f815e1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 19 Feb 2021 07:26:01 -1000 Subject: [PATCH 05/24] Update homeassistant/components/recorder/purge.py Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com> --- homeassistant/components/recorder/purge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index c2a1d14b55b85f..2de565053e7420 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -50,6 +50,7 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: # for us. disconnected_rows = ( session.query(States) + .filter(States.last_updated >= batch_purge_before) .filter( States.old_state_id.in_( session.query(States.state_id) From 0c6170a2d515ec1fd970f7a7fd8eb30deb6cd584 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 19 Feb 2021 08:08:11 -1000 Subject: [PATCH 06/24] Revert "Update homeassistant/components/recorder/purge.py" This reverts commit eaba6a8117b7fc122faa6522b795084dab198223. --- homeassistant/components/recorder/purge.py | 1 - 1 file changed, 1 deletion(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 2de565053e7420..c2a1d14b55b85f 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -50,7 +50,6 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: # for us. disconnected_rows = ( session.query(States) - .filter(States.last_updated >= batch_purge_before) .filter( States.old_state_id.in_( session.query(States.state_id) From 41ff99b44eb5489de11aa6e547d2226c51aab65c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 12:20:50 -1000 Subject: [PATCH 07/24] switch purge method to be a batch of 5000 events at a time --- homeassistant/components/recorder/purge.py | 170 ++++++++++++--------- tests/components/recorder/test_purge.py | 43 +++--- 2 files changed, 117 insertions(+), 96 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index c2a1d14b55b85f..a4fb9a4a87d1d7 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -8,10 +8,12 @@ import homeassistant.util.dt as dt_util from .models import Events, RecorderRuns, States -from .util import execute, session_scope +from .util import session_scope _LOGGER = logging.getLogger(__name__) +MAX_ROWS_TO_PURGE = 5000 + def purge_old_data(instance, purge_days: int, repack: bool) -> bool: """Purge events and states older than purge_days ago. @@ -23,83 +25,21 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: try: with session_scope(session=instance.get_session()) as session: - # Purge a max of 1 hour, based on the oldest states or events record - batch_purge_before = purge_before - - query = session.query(States).order_by(States.last_updated.asc()).limit(1) - states = execute(query, to_native=True, validate_entity_ids=False) - if states: - batch_purge_before = min( - batch_purge_before, - states[0].last_updated + timedelta(hours=1), - ) - - query = session.query(Events).order_by(Events.time_fired.asc()).limit(1) - events = execute(query, to_native=True) - if events: - batch_purge_before = min( - batch_purge_before, - events[0].time_fired + timedelta(hours=1), - ) - - _LOGGER.debug("Purging states and events before %s", batch_purge_before) - - # Update old_state_id to NULL before deleting to ensure - # the delete does not fail due to a foreign key constraint - # since some databases (MSSQL) cannot do the ON DELETE SET NULL - # for us. - disconnected_rows = ( - session.query(States) - .filter( - States.old_state_id.in_( - session.query(States.state_id) - .filter(States.last_updated < batch_purge_before) - .subquery() - ) - ) - .update({"old_state_id": None}, synchronize_session=False) - ) - _LOGGER.debug("Updated %s states to remove old_state_id", disconnected_rows) - - deleted_rows = ( - session.query(States) - .filter(States.last_updated < batch_purge_before) - .delete(synchronize_session=False) - ) - _LOGGER.debug("Deleted %s states", deleted_rows) - - deleted_rows = ( - session.query(Events) - .filter(Events.time_fired < batch_purge_before) - .delete(synchronize_session=False) - ) - _LOGGER.debug("Deleted %s events", deleted_rows) - - # If states or events purging isn't processing the purge_before yet, - # return false, as we are not done yet. - if batch_purge_before != purge_before: + # Purge a max of MAX_ROWS_TO_PURGE, based on the oldest states or events record + event_ids = _select_event_ids_to_purge(session, purge_before) + state_ids = _select_state_ids_to_purge(session, event_ids) + if state_ids: + _disconnect_states_about_to_be_purge(session, state_ids) + _purge_state_ids(session, state_ids) + if event_ids: + _purge_event_ids(session, event_ids) + # If states or events purging isn't processing the purge_before yet, + # return false, as we are not done yet. _LOGGER.debug("Purging hasn't fully completed yet") return False - - # Recorder runs is small, no need to batch run it - deleted_rows = ( - session.query(RecorderRuns) - .filter(RecorderRuns.start < purge_before) - .filter(RecorderRuns.run_id != instance.run_info.run_id) - .delete(synchronize_session=False) - ) - _LOGGER.debug("Deleted %s recorder_runs", deleted_rows) - + _purge_old_recorder_runs(instance, session, purge_before) if repack: - # Execute sqlite or postgresql vacuum command to free up space on disk - if instance.engine.driver in ("pysqlite", "postgresql"): - _LOGGER.debug("Vacuuming SQL DB to free space") - instance.engine.execute("VACUUM") - # Optimize mysql / mariadb tables to free up space on disk - elif instance.engine.driver in ("mysqldb", "pymysql"): - _LOGGER.debug("Optimizing SQL DB to free space") - instance.engine.execute("OPTIMIZE TABLE states, events, recorder_runs") - + _repack_database(instance) except OperationalError as err: # Retry when one of the following MySQL errors occurred: # 1205: Lock wait timeout exceeded; try restarting transaction @@ -118,3 +58,83 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: except SQLAlchemyError as err: _LOGGER.warning("Error purging history: %s", err) return True + + +def _select_event_ids_to_purge(session, purge_before): + """Return a list of event ids to purge.""" + events = ( + session.query(Events.event_id) + .filter(Events.time_fired < purge_before) + .limit(MAX_ROWS_TO_PURGE) + .all() + ) + _LOGGER.debug("Selected %s event ids to remove", len(events)) + return [event.event_id for event in events] + + +def _select_state_ids_to_purge(session, event_ids): + """Return a list of state ids to purge.""" + states = session.query(States.state_id).filter(States.event_id.in_(event_ids)).all() + _LOGGER.debug("Selected %s state ids to remove", len(states)) + return [state.state_id for state in states] + + +def _disconnect_states_about_to_be_purge(session, state_ids): + # Update old_state_id to NULL before deleting to ensure + # the delete does not fail due to a foreign key constraint + # since some databases (MSSQL) cannot do the ON DELETE SET NULL + # for us. + disconnected_rows = ( + session.query(States) + .filter(States.old_state_id.in_(state_ids)) + .update({"old_state_id": None}, synchronize_session=False) + ) + _LOGGER.debug("Updated %s states to remove old_state_id", disconnected_rows) + + +def _purge_state_ids(session, state_ids): + """Delete by state id.""" + deleted_rows = ( + session.query(States) + .filter(States.state_id.in_(state_ids)) + .delete(synchronize_session=False) + ) + _LOGGER.debug("Deleted %s states", deleted_rows) + + +def _purge_event_ids(session, event_ids): + """Delete by event id.""" + deleted_rows = ( + session.query(Events) + .filter(Events.event_id.in_(event_ids)) + .delete(synchronize_session=False) + ) + _LOGGER.debug("Deleted %s events", deleted_rows) + + +def _purge_old_recorder_runs(instance, session, purge_before): + """Purge all old recorder runs.""" + # Recorder runs is small, no need to batch run it + deleted_rows = ( + session.query(RecorderRuns) + .filter(RecorderRuns.start < purge_before) + .filter(RecorderRuns.run_id != instance.run_info.run_id) + .delete(synchronize_session=False) + ) + _LOGGER.debug("Deleted %s recorder_runs", deleted_rows) + + +def _repack_database(instance): + """Repack based on engine type.""" + + # Execute sqlite or postgresql vacuum command to free up space on disk + if instance.engine.driver in ("pysqlite", "postgresql"): + _LOGGER.debug("Vacuuming SQL DB to free space") + instance.engine.execute("VACUUM") + return + + # Optimize mysql / mariadb tables to free up space on disk + if instance.engine.driver in ("mysqldb", "pymysql"): + _LOGGER.debug("Optimizing SQL DB to free space") + instance.engine.execute("OPTIMIZE TABLE states, events, recorder_runs") + return diff --git a/tests/components/recorder/test_purge.py b/tests/components/recorder/test_purge.py index 54246d05c8bfdf..fb06ed164ce439 100644 --- a/tests/components/recorder/test_purge.py +++ b/tests/components/recorder/test_purge.py @@ -1,7 +1,6 @@ """Test data purging.""" from datetime import datetime, timedelta import json -from unittest.mock import patch from homeassistant.components import recorder from homeassistant.components.recorder.const import DATA_INSTANCE @@ -22,12 +21,10 @@ def test_purge_old_states(hass, hass_recorder): with session_scope(hass=hass) as session: states = session.query(States) assert states.count() == 6 + events = session.query(Events).filter(Events.event_type == "state_changed") + assert events.count() == 6 # run purge_old_data() - finished = purge_old_data(hass.data[DATA_INSTANCE], 4, repack=False) - assert not finished - assert states.count() == 4 - finished = purge_old_data(hass.data[DATA_INSTANCE], 4, repack=False) assert not finished assert states.count() == 2 @@ -47,10 +44,6 @@ def test_purge_old_events(hass, hass_recorder): assert events.count() == 6 # run purge_old_data() - finished = purge_old_data(hass.data[DATA_INSTANCE], 4, repack=False) - assert not finished - assert events.count() == 4 - finished = purge_old_data(hass.data[DATA_INSTANCE], 4, repack=False) assert not finished assert events.count() == 2 @@ -72,12 +65,15 @@ def test_purge_old_recorder_runs(hass, hass_recorder): assert recorder_runs.count() == 7 # run purge_old_data() + finished = purge_old_data(hass.data[DATA_INSTANCE], 0, repack=False) + assert not finished + finished = purge_old_data(hass.data[DATA_INSTANCE], 0, repack=False) assert finished assert recorder_runs.count() == 1 -def test_purge_method(hass, hass_recorder): +def test_purge_method(hass, hass_recorder, caplog): """Test purge method.""" hass = hass_recorder() service_data = {"keep_days": 4} @@ -131,16 +127,12 @@ def test_purge_method(hass, hass_recorder): assert not ("EVENT_TEST_PURGE" in (event.event_type for event in events.all())) # run purge method - correct service data, with repack - with patch("homeassistant.components.recorder.purge._LOGGER") as mock_logger: - service_data["repack"] = True - hass.services.call("recorder", "purge", service_data=service_data) - hass.block_till_done() - hass.data[DATA_INSTANCE].block_till_done() - wait_recording_done(hass) - assert ( - mock_logger.debug.mock_calls[6][1][0] - == "Vacuuming SQL DB to free space" - ) + service_data["repack"] = True + hass.services.call("recorder", "purge", service_data=service_data) + hass.block_till_done() + hass.data[DATA_INSTANCE].block_till_done() + wait_recording_done(hass) + assert "Vacuuming SQL DB to free space" in caplog.text def _add_test_states(hass): @@ -166,6 +158,15 @@ def _add_test_states(hass): timestamp = now state = "dontpurgeme" + event = Events( + event_type="state_changed", + event_data="{}", + origin="LOCAL", + created=timestamp, + time_fired=timestamp, + ) + session.add(event) + session.flush() session.add( States( entity_id="test.recorder2", @@ -175,7 +176,7 @@ def _add_test_states(hass): last_changed=timestamp, last_updated=timestamp, created=timestamp, - event_id=event_id + 1000, + event_id=event.event_id, ) ) From e4e3f7f8f696d07a38a4bc270b93c0b9a6289329 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 12:30:54 -1000 Subject: [PATCH 08/24] augment coverage --- tests/components/recorder/test_purge.py | 32 ++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/components/recorder/test_purge.py b/tests/components/recorder/test_purge.py index fb06ed164ce439..c04aa38fd15d5b 100644 --- a/tests/components/recorder/test_purge.py +++ b/tests/components/recorder/test_purge.py @@ -21,6 +21,9 @@ def test_purge_old_states(hass, hass_recorder): with session_scope(hass=hass) as session: states = session.query(States) assert states.count() == 6 + assert states[0].old_state_id is None + assert states[-1].old_state_id == states[-2].state_id + events = session.query(Events).filter(Events.event_type == "state_changed") assert events.count() == 6 @@ -29,6 +32,10 @@ def test_purge_old_states(hass, hass_recorder): assert not finished assert states.count() == 2 + states_after_purge = session.query(States) + assert states_after_purge[1].old_state_id == states_after_purge[0].state_id + assert states_after_purge[0].old_state_id is None + finished = purge_old_data(hass.data[DATA_INSTANCE], 4, repack=False) assert finished assert states.count() == 2 @@ -147,6 +154,7 @@ def _add_test_states(hass): wait_recording_done(hass) with recorder.session_scope(hass=hass) as session: + old_state_id = None for event_id in range(6): if event_id < 2: timestamp = eleven_days_ago @@ -167,18 +175,20 @@ def _add_test_states(hass): ) session.add(event) session.flush() - session.add( - States( - entity_id="test.recorder2", - domain="sensor", - state=state, - attributes=json.dumps(attributes), - last_changed=timestamp, - last_updated=timestamp, - created=timestamp, - event_id=event.event_id, - ) + state = States( + entity_id="test.recorder2", + domain="sensor", + state=state, + attributes=json.dumps(attributes), + last_changed=timestamp, + last_updated=timestamp, + created=timestamp, + event_id=event.event_id, + old_state_id=old_state_id, ) + session.add(state) + session.flush() + old_state_id = state.state_id def _add_test_events(hass): From c21493b35ef29bad85a485c2bfa21050b5338eda Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 12:33:24 -1000 Subject: [PATCH 09/24] naming --- homeassistant/components/recorder/purge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index a4fb9a4a87d1d7..b125a8a73c1209 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -29,7 +29,7 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: event_ids = _select_event_ids_to_purge(session, purge_before) state_ids = _select_state_ids_to_purge(session, event_ids) if state_ids: - _disconnect_states_about_to_be_purge(session, state_ids) + _disconnect_states_about_to_be_purged(session, state_ids) _purge_state_ids(session, state_ids) if event_ids: _purge_event_ids(session, event_ids) @@ -79,7 +79,7 @@ def _select_state_ids_to_purge(session, event_ids): return [state.state_id for state in states] -def _disconnect_states_about_to_be_purge(session, state_ids): +def _disconnect_states_about_to_be_purged(session, state_ids): # Update old_state_id to NULL before deleting to ensure # the delete does not fail due to a foreign key constraint # since some databases (MSSQL) cannot do the ON DELETE SET NULL From dad57b02e8803f11c0cc91ea6faf1a5ee8e53e73 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 12:38:54 -1000 Subject: [PATCH 10/24] use name --- homeassistant/components/recorder/purge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index b125a8a73c1209..617493ae6d0925 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -128,13 +128,13 @@ def _repack_database(instance): """Repack based on engine type.""" # Execute sqlite or postgresql vacuum command to free up space on disk - if instance.engine.driver in ("pysqlite", "postgresql"): + if instance.engine.dialect.name in ("postgresql", "sqlite"): _LOGGER.debug("Vacuuming SQL DB to free space") instance.engine.execute("VACUUM") return # Optimize mysql / mariadb tables to free up space on disk - if instance.engine.driver in ("mysqldb", "pymysql"): + if instance.engine.dialect.name == "mysql": _LOGGER.debug("Optimizing SQL DB to free space") instance.engine.execute("OPTIMIZE TABLE states, events, recorder_runs") return From 92e502d6c631fb63235863f16f81224cd39bd21b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 12:55:36 -1000 Subject: [PATCH 11/24] lower rows to purge at once --- homeassistant/components/recorder/purge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 617493ae6d0925..04fb8a91716834 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -12,7 +12,7 @@ _LOGGER = logging.getLogger(__name__) -MAX_ROWS_TO_PURGE = 5000 +MAX_ROWS_TO_PURGE = 1000 def purge_old_data(instance, purge_days: int, repack: bool) -> bool: From 43d6a42a99aac699db4c73f85941b7442c973605 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 13:15:26 -1000 Subject: [PATCH 12/24] fix postgres vacuum --- homeassistant/components/recorder/purge.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 04fb8a91716834..b418b71df8feb3 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -127,12 +127,21 @@ def _purge_old_recorder_runs(instance, session, purge_before): def _repack_database(instance): """Repack based on engine type.""" - # Execute sqlite or postgresql vacuum command to free up space on disk - if instance.engine.dialect.name in ("postgresql", "sqlite"): + # Execute sqlite command to free up space on disk + if instance.engine.dialect.name == "sqlite": _LOGGER.debug("Vacuuming SQL DB to free space") instance.engine.execute("VACUUM") return + # Execute postgresql vacuum command to free up space on disk + if instance.engine.dialect.name == "postgresql": + _LOGGER.debug("Vacuuming SQL DB to free space") + with instance.engine.connect().execution_options( + isolation_level="AUTOCOMMIT" + ) as conn: + conn.execute("VACUUM") + return + # Optimize mysql / mariadb tables to free up space on disk if instance.engine.dialect.name == "mysql": _LOGGER.debug("Optimizing SQL DB to free space") From 1f67bf848d585bf48faad6f3ceecfbf6f36a533d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 28 Feb 2021 13:32:42 -1000 Subject: [PATCH 13/24] isolate repack code --- .coveragerc | 1 + homeassistant/components/recorder/purge.py | 28 ++----------------- homeassistant/components/recorder/repack.py | 30 +++++++++++++++++++++ 3 files changed, 33 insertions(+), 26 deletions(-) create mode 100644 homeassistant/components/recorder/repack.py diff --git a/.coveragerc b/.coveragerc index 2347ee1902eca0..db940ed642b450 100644 --- a/.coveragerc +++ b/.coveragerc @@ -785,6 +785,7 @@ omit = homeassistant/components/raspyrfm/* homeassistant/components/recollect_waste/__init__.py homeassistant/components/recollect_waste/sensor.py + homeassistant/components/recorder/repack.py homeassistant/components/recswitch/switch.py homeassistant/components/reddit/* homeassistant/components/rejseplanen/sensor.py diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index b418b71df8feb3..72db8a44f893c4 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -8,6 +8,7 @@ import homeassistant.util.dt as dt_util from .models import Events, RecorderRuns, States +from .repack import repack_database from .util import session_scope _LOGGER = logging.getLogger(__name__) @@ -39,7 +40,7 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: return False _purge_old_recorder_runs(instance, session, purge_before) if repack: - _repack_database(instance) + repack_database(instance) except OperationalError as err: # Retry when one of the following MySQL errors occurred: # 1205: Lock wait timeout exceeded; try restarting transaction @@ -122,28 +123,3 @@ def _purge_old_recorder_runs(instance, session, purge_before): .delete(synchronize_session=False) ) _LOGGER.debug("Deleted %s recorder_runs", deleted_rows) - - -def _repack_database(instance): - """Repack based on engine type.""" - - # Execute sqlite command to free up space on disk - if instance.engine.dialect.name == "sqlite": - _LOGGER.debug("Vacuuming SQL DB to free space") - instance.engine.execute("VACUUM") - return - - # Execute postgresql vacuum command to free up space on disk - if instance.engine.dialect.name == "postgresql": - _LOGGER.debug("Vacuuming SQL DB to free space") - with instance.engine.connect().execution_options( - isolation_level="AUTOCOMMIT" - ) as conn: - conn.execute("VACUUM") - return - - # Optimize mysql / mariadb tables to free up space on disk - if instance.engine.dialect.name == "mysql": - _LOGGER.debug("Optimizing SQL DB to free space") - instance.engine.execute("OPTIMIZE TABLE states, events, recorder_runs") - return diff --git a/homeassistant/components/recorder/repack.py b/homeassistant/components/recorder/repack.py new file mode 100644 index 00000000000000..72f3cd9a04c985 --- /dev/null +++ b/homeassistant/components/recorder/repack.py @@ -0,0 +1,30 @@ +"""Purge repack helper.""" + +import logging + +_LOGGER = logging.getLogger(__name__) + + +def repack_database(instance): + """Repack based on engine type.""" + + # Execute sqlite command to free up space on disk + if instance.engine.dialect.name == "sqlite": + _LOGGER.debug("Vacuuming SQL DB to free space") + instance.engine.execute("VACUUM") + return + + # Execute postgresql vacuum command to free up space on disk + if instance.engine.dialect.name == "postgresql": + _LOGGER.debug("Vacuuming SQL DB to free space") + with instance.engine.connect().execution_options( + isolation_level="AUTOCOMMIT" + ) as conn: + conn.execute("VACUUM") + return + + # Optimize mysql / mariadb tables to free up space on disk + if instance.engine.dialect.name == "mysql": + _LOGGER.debug("Optimizing SQL DB to free space") + instance.engine.execute("OPTIMIZE TABLE states, events, recorder_runs") + return From 5894532db75644276100b886c575230919bef8a1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 8 Mar 2021 13:13:35 -1000 Subject: [PATCH 14/24] Update homeassistant/components/recorder/purge.py Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com> --- homeassistant/components/recorder/purge.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 72db8a44f893c4..0c6b8107f3dd8b 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -75,6 +75,8 @@ def _select_event_ids_to_purge(session, purge_before): def _select_state_ids_to_purge(session, event_ids): """Return a list of state ids to purge.""" + if not event_ids: + return [] states = session.query(States.state_id).filter(States.event_id.in_(event_ids)).all() _LOGGER.debug("Selected %s state ids to remove", len(states)) return [state.state_id for state in states] From 84ffc3810cdeae7a9ba9ceacbb765eec8dd4d275 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 8 Mar 2021 13:23:20 -1000 Subject: [PATCH 15/24] typing --- homeassistant/components/recorder/purge.py | 23 ++++++++++++--------- homeassistant/components/recorder/repack.py | 4 +++- setup.cfg | 2 +- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 0c6b8107f3dd8b..48ed55f6a5b5c4 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -1,12 +1,14 @@ """Purge old data helper.""" -from datetime import timedelta +from datetime import datetime, timedelta import logging import time from sqlalchemy.exc import OperationalError, SQLAlchemyError +from sqlalchemy.orm.session import Session import homeassistant.util.dt as dt_util +from . import Recorder from .models import Events, RecorderRuns, States from .repack import repack_database from .util import session_scope @@ -16,16 +18,15 @@ MAX_ROWS_TO_PURGE = 1000 -def purge_old_data(instance, purge_days: int, repack: bool) -> bool: +def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: """Purge events and states older than purge_days ago. Cleans up an timeframe of an hour, based on the oldest record. """ purge_before = dt_util.utcnow() - timedelta(days=purge_days) _LOGGER.debug("Purging states and events before target %s", purge_before) - try: - with session_scope(session=instance.get_session()) as session: + with session_scope(session=instance.get_session()) as session: # type: ignore # Purge a max of MAX_ROWS_TO_PURGE, based on the oldest states or events record event_ids = _select_event_ids_to_purge(session, purge_before) state_ids = _select_state_ids_to_purge(session, event_ids) @@ -61,7 +62,7 @@ def purge_old_data(instance, purge_days: int, repack: bool) -> bool: return True -def _select_event_ids_to_purge(session, purge_before): +def _select_event_ids_to_purge(session: Session, purge_before: datetime) -> list: """Return a list of event ids to purge.""" events = ( session.query(Events.event_id) @@ -73,7 +74,7 @@ def _select_event_ids_to_purge(session, purge_before): return [event.event_id for event in events] -def _select_state_ids_to_purge(session, event_ids): +def _select_state_ids_to_purge(session: Session, event_ids: list) -> list: """Return a list of state ids to purge.""" if not event_ids: return [] @@ -82,7 +83,7 @@ def _select_state_ids_to_purge(session, event_ids): return [state.state_id for state in states] -def _disconnect_states_about_to_be_purged(session, state_ids): +def _disconnect_states_about_to_be_purged(session: Session, state_ids: list) -> None: # Update old_state_id to NULL before deleting to ensure # the delete does not fail due to a foreign key constraint # since some databases (MSSQL) cannot do the ON DELETE SET NULL @@ -95,7 +96,7 @@ def _disconnect_states_about_to_be_purged(session, state_ids): _LOGGER.debug("Updated %s states to remove old_state_id", disconnected_rows) -def _purge_state_ids(session, state_ids): +def _purge_state_ids(session: Session, state_ids: list) -> None: """Delete by state id.""" deleted_rows = ( session.query(States) @@ -105,7 +106,7 @@ def _purge_state_ids(session, state_ids): _LOGGER.debug("Deleted %s states", deleted_rows) -def _purge_event_ids(session, event_ids): +def _purge_event_ids(session: Session, event_ids: list) -> None: """Delete by event id.""" deleted_rows = ( session.query(Events) @@ -115,7 +116,9 @@ def _purge_event_ids(session, event_ids): _LOGGER.debug("Deleted %s events", deleted_rows) -def _purge_old_recorder_runs(instance, session, purge_before): +def _purge_old_recorder_runs( + instance: Recorder, session: Session, purge_before: datetime +) -> None: """Purge all old recorder runs.""" # Recorder runs is small, no need to batch run it deleted_rows = ( diff --git a/homeassistant/components/recorder/repack.py b/homeassistant/components/recorder/repack.py index 72f3cd9a04c985..1878e8a3d92d70 100644 --- a/homeassistant/components/recorder/repack.py +++ b/homeassistant/components/recorder/repack.py @@ -2,10 +2,12 @@ import logging +from . import Recorder + _LOGGER = logging.getLogger(__name__) -def repack_database(instance): +def repack_database(instance: Recorder): """Repack based on engine type.""" # Execute sqlite command to free up space on disk diff --git a/setup.cfg b/setup.cfg index 98a01278838bbe..dbdb61b5fcf9e2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,7 +43,7 @@ warn_redundant_casts = true warn_unused_configs = true -[mypy-homeassistant.block_async_io,homeassistant.bootstrap,homeassistant.components,homeassistant.config_entries,homeassistant.config,homeassistant.const,homeassistant.core,homeassistant.data_entry_flow,homeassistant.exceptions,homeassistant.__init__,homeassistant.loader,homeassistant.__main__,homeassistant.requirements,homeassistant.runner,homeassistant.setup,homeassistant.util,homeassistant.auth.*,homeassistant.components.automation.*,homeassistant.components.binary_sensor.*,homeassistant.components.bond.*,homeassistant.components.calendar.*,homeassistant.components.cover.*,homeassistant.components.device_automation.*,homeassistant.components.frontend.*,homeassistant.components.geo_location.*,homeassistant.components.group.*,homeassistant.components.history.*,homeassistant.components.http.*,homeassistant.components.huawei_lte.*,homeassistant.components.hyperion.*,homeassistant.components.image_processing.*,homeassistant.components.integration.*,homeassistant.components.light.*,homeassistant.components.lock.*,homeassistant.components.mailbox.*,homeassistant.components.media_player.*,homeassistant.components.notify.*,homeassistant.components.number.*,homeassistant.components.persistent_notification.*,homeassistant.components.proximity.*,homeassistant.components.remote.*,homeassistant.components.scene.*,homeassistant.components.sensor.*,homeassistant.components.slack.*,homeassistant.components.sun.*,homeassistant.components.switch.*,homeassistant.components.systemmonitor.*,homeassistant.components.tts.*,homeassistant.components.vacuum.*,homeassistant.components.water_heater.*,homeassistant.components.weather.*,homeassistant.components.websocket_api.*,homeassistant.components.zone.*,homeassistant.components.zwave_js.*,homeassistant.helpers.*,homeassistant.scripts.*,homeassistant.util.*,tests.components.hyperion.*] +[mypy-homeassistant.block_async_io,homeassistant.bootstrap,homeassistant.components,homeassistant.config_entries,homeassistant.config,homeassistant.const,homeassistant.core,homeassistant.data_entry_flow,homeassistant.exceptions,homeassistant.__init__,homeassistant.loader,homeassistant.__main__,homeassistant.requirements,homeassistant.runner,homeassistant.setup,homeassistant.util,homeassistant.auth.*,homeassistant.components.automation.*,homeassistant.components.binary_sensor.*,homeassistant.components.bond.*,homeassistant.components.calendar.*,homeassistant.components.cover.*,homeassistant.components.device_automation.*,homeassistant.components.frontend.*,homeassistant.components.geo_location.*,homeassistant.components.group.*,homeassistant.components.history.*,homeassistant.components.http.*,homeassistant.components.huawei_lte.*,homeassistant.components.hyperion.*,homeassistant.components.image_processing.*,homeassistant.components.integration.*,homeassistant.components.light.*,homeassistant.components.lock.*,homeassistant.components.mailbox.*,homeassistant.components.media_player.*,homeassistant.components.notify.*,homeassistant.components.number.*,homeassistant.components.persistent_notification.*,homeassistant.components.proximity.*,homeassistant.components.recorder.purge,homeassistant.components.recorder.repack,homeassistant.components.remote.*,homeassistant.components.scene.*,homeassistant.components.sensor.*,homeassistant.components.slack.*,homeassistant.components.sun.*,homeassistant.components.switch.*,homeassistant.components.systemmonitor.*,homeassistant.components.tts.*,homeassistant.components.vacuum.*,homeassistant.components.water_heater.*,homeassistant.components.weather.*,homeassistant.components.websocket_api.*,homeassistant.components.zone.*,homeassistant.components.zwave_js.*,homeassistant.helpers.*,homeassistant.scripts.*,homeassistant.util.*,tests.components.hyperion.*] strict = true ignore_errors = false warn_unreachable = true From 190ad66dfc97356e2874830f52f3c9fe0a7ca74a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 8 Mar 2021 13:30:11 -1000 Subject: [PATCH 16/24] fix typing --- homeassistant/components/recorder/purge.py | 9 ++++++--- homeassistant/components/recorder/repack.py | 6 ++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 48ed55f6a5b5c4..967904d5b8ffb5 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -2,23 +2,26 @@ from datetime import datetime, timedelta import logging import time +from typing import TYPE_CHECKING from sqlalchemy.exc import OperationalError, SQLAlchemyError from sqlalchemy.orm.session import Session import homeassistant.util.dt as dt_util -from . import Recorder from .models import Events, RecorderRuns, States from .repack import repack_database from .util import session_scope +if TYPE_CHECKING: + from . import Recorder + _LOGGER = logging.getLogger(__name__) MAX_ROWS_TO_PURGE = 1000 -def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: +def purge_old_data(instance: "Recorder", purge_days: int, repack: bool) -> bool: """Purge events and states older than purge_days ago. Cleans up an timeframe of an hour, based on the oldest record. @@ -117,7 +120,7 @@ def _purge_event_ids(session: Session, event_ids: list) -> None: def _purge_old_recorder_runs( - instance: Recorder, session: Session, purge_before: datetime + instance: "Recorder", session: Session, purge_before: datetime ) -> None: """Purge all old recorder runs.""" # Recorder runs is small, no need to batch run it diff --git a/homeassistant/components/recorder/repack.py b/homeassistant/components/recorder/repack.py index 1878e8a3d92d70..547b0c77daa5ea 100644 --- a/homeassistant/components/recorder/repack.py +++ b/homeassistant/components/recorder/repack.py @@ -1,13 +1,15 @@ """Purge repack helper.""" import logging +from typing import TYPE_CHECKING -from . import Recorder +if TYPE_CHECKING: + from . import Recorder _LOGGER = logging.getLogger(__name__) -def repack_database(instance: Recorder): +def repack_database(instance: "Recorder"): """Repack based on engine type.""" # Execute sqlite command to free up space on disk From 75efce6a4d8e3db414f4f9d525a57640dfc46600 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 8 Mar 2021 13:38:56 -1000 Subject: [PATCH 17/24] typing --- homeassistant/components/recorder/repack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homeassistant/components/recorder/repack.py b/homeassistant/components/recorder/repack.py index 547b0c77daa5ea..7b3e17b2adb2e2 100644 --- a/homeassistant/components/recorder/repack.py +++ b/homeassistant/components/recorder/repack.py @@ -9,7 +9,7 @@ _LOGGER = logging.getLogger(__name__) -def repack_database(instance: "Recorder"): +def repack_database(instance: "Recorder") -> None: """Repack based on engine type.""" # Execute sqlite command to free up space on disk From 90aae2a55809831330a68c4481dd6491fca95288 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 8 Mar 2021 13:41:23 -1000 Subject: [PATCH 18/24] remove quotes --- homeassistant/components/recorder/purge.py | 6 ++++-- homeassistant/components/recorder/repack.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 967904d5b8ffb5..b1f2556cc72ada 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -1,4 +1,6 @@ """Purge old data helper.""" +from __future__ import annotations + from datetime import datetime, timedelta import logging import time @@ -21,7 +23,7 @@ MAX_ROWS_TO_PURGE = 1000 -def purge_old_data(instance: "Recorder", purge_days: int, repack: bool) -> bool: +def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: """Purge events and states older than purge_days ago. Cleans up an timeframe of an hour, based on the oldest record. @@ -120,7 +122,7 @@ def _purge_event_ids(session: Session, event_ids: list) -> None: def _purge_old_recorder_runs( - instance: "Recorder", session: Session, purge_before: datetime + instance: Recorder, session: Session, purge_before: datetime ) -> None: """Purge all old recorder runs.""" # Recorder runs is small, no need to batch run it diff --git a/homeassistant/components/recorder/repack.py b/homeassistant/components/recorder/repack.py index 7b3e17b2adb2e2..68d7d5954c92b6 100644 --- a/homeassistant/components/recorder/repack.py +++ b/homeassistant/components/recorder/repack.py @@ -1,4 +1,5 @@ """Purge repack helper.""" +from __future__ import annotations import logging from typing import TYPE_CHECKING @@ -9,7 +10,7 @@ _LOGGER = logging.getLogger(__name__) -def repack_database(instance: "Recorder") -> None: +def repack_database(instance: Recorder) -> None: """Repack based on engine type.""" # Execute sqlite command to free up space on disk From d26b40cbe41903cd9bcdf5f04aaeb628e4d59b19 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 8 Mar 2021 17:39:08 -1000 Subject: [PATCH 19/24] switch tests to utcnow --- tests/components/recorder/test_purge.py | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/components/recorder/test_purge.py b/tests/components/recorder/test_purge.py index c04aa38fd15d5b..aaf5300086531b 100644 --- a/tests/components/recorder/test_purge.py +++ b/tests/components/recorder/test_purge.py @@ -1,5 +1,5 @@ """Test data purging.""" -from datetime import datetime, timedelta +from datetime import timedelta import json from homeassistant.components import recorder @@ -144,9 +144,9 @@ def test_purge_method(hass, hass_recorder, caplog): def _add_test_states(hass): """Add multiple states to the db for testing.""" - now = datetime.now() - five_days_ago = now - timedelta(days=5) - eleven_days_ago = now - timedelta(days=11) + utcnow = dt_util.utcnow() + five_days_ago = utcnow - timedelta(days=5) + eleven_days_ago = utcnow - timedelta(days=11) attributes = {"test_attr": 5, "test_attr_10": "nice"} hass.block_till_done() @@ -163,7 +163,7 @@ def _add_test_states(hass): timestamp = five_days_ago state = "purgeme" else: - timestamp = now + timestamp = utcnow state = "dontpurgeme" event = Events( @@ -193,9 +193,9 @@ def _add_test_states(hass): def _add_test_events(hass): """Add a few events for testing.""" - now = datetime.now() - five_days_ago = now - timedelta(days=5) - eleven_days_ago = now - timedelta(days=11) + utcnow = dt_util.utcnow() + five_days_ago = utcnow - timedelta(days=5) + eleven_days_ago = utcnow - timedelta(days=11) event_data = {"test_attr": 5, "test_attr_10": "nice"} hass.block_till_done() @@ -211,7 +211,7 @@ def _add_test_events(hass): timestamp = five_days_ago event_type = "EVENT_TEST_PURGE" else: - timestamp = now + timestamp = utcnow event_type = "EVENT_TEST" session.add( @@ -227,9 +227,9 @@ def _add_test_events(hass): def _add_test_recorder_runs(hass): """Add a few recorder_runs for testing.""" - now = datetime.now() - five_days_ago = now - timedelta(days=5) - eleven_days_ago = now - timedelta(days=11) + utcnow = dt_util.utcnow() + five_days_ago = utcnow - timedelta(days=5) + eleven_days_ago = utcnow - timedelta(days=11) hass.block_till_done() hass.data[DATA_INSTANCE].block_till_done() @@ -242,7 +242,7 @@ def _add_test_recorder_runs(hass): elif rec_id < 4: timestamp = five_days_ago else: - timestamp = now + timestamp = utcnow session.add( RecorderRuns( From 85ffc0011f3319d91d60f4ebf45080d85b298267 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 9 Mar 2021 07:24:52 -1000 Subject: [PATCH 20/24] add last_updated --- homeassistant/components/recorder/purge.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index b1f2556cc72ada..e1cd0184713010 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -34,7 +34,7 @@ def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: with session_scope(session=instance.get_session()) as session: # type: ignore # Purge a max of MAX_ROWS_TO_PURGE, based on the oldest states or events record event_ids = _select_event_ids_to_purge(session, purge_before) - state_ids = _select_state_ids_to_purge(session, event_ids) + state_ids = _select_state_ids_to_purge(session, purge_before, event_ids) if state_ids: _disconnect_states_about_to_be_purged(session, state_ids) _purge_state_ids(session, state_ids) @@ -79,11 +79,18 @@ def _select_event_ids_to_purge(session: Session, purge_before: datetime) -> list return [event.event_id for event in events] -def _select_state_ids_to_purge(session: Session, event_ids: list) -> list: +def _select_state_ids_to_purge( + session: Session, purge_before: datetime, event_ids: list +) -> list: """Return a list of state ids to purge.""" if not event_ids: return [] - states = session.query(States.state_id).filter(States.event_id.in_(event_ids)).all() + states = ( + session.query(States.state_id) + .filter(States.last_updated < purge_before) + .filter(States.event_id.in_(event_ids)) + .all() + ) _LOGGER.debug("Selected %s state ids to remove", len(states)) return [state.state_id for state in states] From 811562f33b5f4aebce69f4d7242984da7086b753 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 9 Mar 2021 08:23:50 -1000 Subject: [PATCH 21/24] Update homeassistant/components/recorder/purge.py Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com> --- homeassistant/components/recorder/purge.py | 1 + 1 file changed, 1 insertion(+) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index e1cd0184713010..e22172bec47455 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -102,6 +102,7 @@ def _disconnect_states_about_to_be_purged(session: Session, state_ids: list) -> # for us. disconnected_rows = ( session.query(States) + .filter(States.last_updated < purge_before) .filter(States.old_state_id.in_(state_ids)) .update({"old_state_id": None}, synchronize_session=False) ) From ce4eae227c7e3ac9271094ebcf8e213fb4a18957 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 9 Mar 2021 08:51:08 -1000 Subject: [PATCH 22/24] pass purge before --- homeassistant/components/recorder/purge.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index e22172bec47455..8fe77c7513b3ba 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -36,7 +36,7 @@ def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: event_ids = _select_event_ids_to_purge(session, purge_before) state_ids = _select_state_ids_to_purge(session, purge_before, event_ids) if state_ids: - _disconnect_states_about_to_be_purged(session, state_ids) + _disconnect_states_about_to_be_purged(session, purge_before, state_ids) _purge_state_ids(session, state_ids) if event_ids: _purge_event_ids(session, event_ids) @@ -95,7 +95,9 @@ def _select_state_ids_to_purge( return [state.state_id for state in states] -def _disconnect_states_about_to_be_purged(session: Session, state_ids: list) -> None: +def _disconnect_states_about_to_be_purged( + session: Session, purge_before: datetime, state_ids: list +) -> None: # Update old_state_id to NULL before deleting to ensure # the delete does not fail due to a foreign key constraint # since some databases (MSSQL) cannot do the ON DELETE SET NULL From febbef3453452b01b1aec12dbf9f63030955b383 Mon Sep 17 00:00:00 2001 From: Marc Mueller <30130371+cdce8p@users.noreply.github.com> Date: Tue, 9 Mar 2021 19:58:24 +0100 Subject: [PATCH 23/24] Revert last optimization --- homeassistant/components/recorder/purge.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index 8fe77c7513b3ba..e1cd0184713010 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -36,7 +36,7 @@ def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: event_ids = _select_event_ids_to_purge(session, purge_before) state_ids = _select_state_ids_to_purge(session, purge_before, event_ids) if state_ids: - _disconnect_states_about_to_be_purged(session, purge_before, state_ids) + _disconnect_states_about_to_be_purged(session, state_ids) _purge_state_ids(session, state_ids) if event_ids: _purge_event_ids(session, event_ids) @@ -95,16 +95,13 @@ def _select_state_ids_to_purge( return [state.state_id for state in states] -def _disconnect_states_about_to_be_purged( - session: Session, purge_before: datetime, state_ids: list -) -> None: +def _disconnect_states_about_to_be_purged(session: Session, state_ids: list) -> None: # Update old_state_id to NULL before deleting to ensure # the delete does not fail due to a foreign key constraint # since some databases (MSSQL) cannot do the ON DELETE SET NULL # for us. disconnected_rows = ( session.query(States) - .filter(States.last_updated < purge_before) .filter(States.old_state_id.in_(state_ids)) .update({"old_state_id": None}, synchronize_session=False) ) From cbb5173ad9cd325a55661671d4592d77b2766e80 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Tue, 9 Mar 2021 09:39:43 -1000 Subject: [PATCH 24/24] move const --- homeassistant/components/recorder/const.py | 3 +++ homeassistant/components/recorder/purge.py | 3 +-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/homeassistant/components/recorder/const.py b/homeassistant/components/recorder/const.py index a2b5ffc6f2a786..026628a32dfc0b 100644 --- a/homeassistant/components/recorder/const.py +++ b/homeassistant/components/recorder/const.py @@ -5,3 +5,6 @@ DOMAIN = "recorder" CONF_DB_INTEGRITY_CHECK = "db_integrity_check" + +# The maximum number of rows (events) we purge in one delete statement +MAX_ROWS_TO_PURGE = 1000 diff --git a/homeassistant/components/recorder/purge.py b/homeassistant/components/recorder/purge.py index e1cd0184713010..ac10dadc227e58 100644 --- a/homeassistant/components/recorder/purge.py +++ b/homeassistant/components/recorder/purge.py @@ -11,6 +11,7 @@ import homeassistant.util.dt as dt_util +from .const import MAX_ROWS_TO_PURGE from .models import Events, RecorderRuns, States from .repack import repack_database from .util import session_scope @@ -20,8 +21,6 @@ _LOGGER = logging.getLogger(__name__) -MAX_ROWS_TO_PURGE = 1000 - def purge_old_data(instance: Recorder, purge_days: int, repack: bool) -> bool: """Purge events and states older than purge_days ago.