From bbc35152c59ef4febc8c898a217e18676bb3ca75 Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Wed, 10 Dec 2025 16:12:58 +0000 Subject: [PATCH 1/3] Suppress roborock failures under some unavailability threshold We aggressively refresh roborock devices local channel (every 30 seconds) and there is a known issue where devices go unavailable around 3am every day for a period of ~1 minute which causes log spam during a non-critical background refresh. We instead will suppress refresh failures until a minimum unavailability threshold has passed. --- .../components/roborock/coordinator.py | 71 +++++++++++------ tests/components/roborock/test_init.py | 77 ++++++++++++++++++- 2 files changed, 124 insertions(+), 24 deletions(-) diff --git a/homeassistant/components/roborock/coordinator.py b/homeassistant/components/roborock/coordinator.py index 54911795cd80f..8c24fac4827fe 100644 --- a/homeassistant/components/roborock/coordinator.py +++ b/homeassistant/components/roborock/coordinator.py @@ -43,6 +43,12 @@ SCAN_INTERVAL = timedelta(seconds=30) +# Roborock devices have a known issue where they go offline for a short period +# around 3AM local time for ~1 minute and reset both the local connection +# and MQTT connection. To avoid log spam, we will avoid failures refreshing +# data until this duration has passed. +MIN_UNAVAILABLE_DURATION = timedelta(minutes=2) + _LOGGER = logging.getLogger(__name__) @@ -102,6 +108,9 @@ def __init__( # Keep track of last attempt to refresh maps/rooms to know when to try again. self._last_home_update_attempt: datetime self.last_home_update: datetime | None = None + # Tracks the last successful update to control when we report failure + # to the base class. This is reset on successful data update. + self._last_update_success_time: datetime | None = None @cached_property def dock_device_info(self) -> DeviceInfo: @@ -161,7 +170,7 @@ async def update_map(self) -> None: await self.properties_api.home.discover_home() await self.properties_api.home.refresh() except RoborockException as ex: - raise HomeAssistantError( + raise UpdateFailed( translation_domain=DOMAIN, translation_key="map_failure", ) from ex @@ -169,7 +178,7 @@ async def update_map(self) -> None: self.last_home_update = dt_util.utcnow() async def _verify_api(self) -> None: - """Verify that the api is reachable. If it is not, switch clients.""" + """Verify that the api is reachable.""" if self._device.is_connected: if self._device.is_local_connected: async_delete_issue( @@ -217,26 +226,25 @@ async def _async_update_data(self) -> DeviceState: try: # Update device props and standard api information await self._update_device_prop() - - # If the vacuum is currently cleaning and it has been IMAGE_CACHE_INTERVAL - # since the last map update, you can update the map. - new_status = self.properties_api.status - if ( - new_status.in_cleaning - and (dt_util.utcnow() - self._last_home_update_attempt) - > IMAGE_CACHE_INTERVAL - ) or self.last_update_state != new_status.state_name: - self._last_home_update_attempt = dt_util.utcnow() - try: - await self.update_map() - except HomeAssistantError as err: - _LOGGER.debug("Failed to update map: %s", err) - except RoborockException as ex: - _LOGGER.debug("Failed to update data: %s", ex) - raise UpdateFailed( - translation_domain=DOMAIN, - translation_key="update_data_fail", - ) from ex + except UpdateFailed: + if self._should_suppress_update_failure(): + _LOGGER.debug( + "Suppressing update failure until unavailable duration passed" + ) + return self.data + raise + + # If the vacuum is currently cleaning and it has been IMAGE_CACHE_INTERVAL + # since the last map update, you can update the map. + new_status = self.properties_api.status + if ( + new_status.in_cleaning + and (dt_util.utcnow() - self._last_home_update_attempt) + > IMAGE_CACHE_INTERVAL + ) or self.last_update_state != new_status.state_name: + self._last_home_update_attempt = dt_util.utcnow() + # Will raise UpdateFailed on failure + await self.update_map() if self.properties_api.status.in_cleaning: if self._device.is_local_connected: @@ -248,6 +256,8 @@ async def _async_update_data(self) -> DeviceState: else: self.update_interval = V1_CLOUD_NOT_CLEANING_INTERVAL self.last_update_state = self.properties_api.status.state_name + self._last_update_success_time = dt_util.utcnow() + _LOGGER.debug("Data update successful %s", self._last_update_success_time) return DeviceState( status=self.properties_api.status, dnd_timer=self.properties_api.dnd, @@ -255,6 +265,23 @@ async def _async_update_data(self) -> DeviceState: clean_summary=self.properties_api.clean_summary, ) + def _should_suppress_update_failure(self) -> bool: + """Determine if we should suppress update failure reporting. + + We suppress reporting update failures until a minimum duration has + passed since the last successful update. This is used to avoid reporting + the device as unavailable for short periods, a known issue. + + The intent is to apply to routine background state refreshes and not + other failures such as the first update or map updates. + """ + if self._last_update_success_time is None: + # Never had a successful update, do not suppress + return False + failure_duration = dt_util.utcnow() - self._last_update_success_time + _LOGGER.debug("Update failure duration: %s", failure_duration) + return failure_duration < MIN_UNAVAILABLE_DURATION + async def get_routines(self) -> list[HomeDataScene]: """Get routines.""" try: diff --git a/tests/components/roborock/test_init.py b/tests/components/roborock/test_init.py index 4773badbe14ff..66c0f20993fdf 100644 --- a/tests/components/roborock/test_init.py +++ b/tests/components/roborock/test_init.py @@ -1,19 +1,26 @@ """Test for Roborock init.""" +import datetime import pathlib from typing import Any from unittest.mock import AsyncMock, patch +from freezegun.api import FrozenDateTimeFactory import pytest from roborock import ( RoborockInvalidCredentials, RoborockInvalidUserAgreement, RoborockNoUserAgreement, ) +from roborock.exceptions import RoborockException +from homeassistant.components.homeassistant import ( + DOMAIN as HA_DOMAIN, + SERVICE_UPDATE_ENTITY, +) from homeassistant.components.roborock.const import DOMAIN from homeassistant.config_entries import ConfigEntryState -from homeassistant.const import Platform +from homeassistant.const import ATTR_ENTITY_ID, Platform from homeassistant.core import HomeAssistant from homeassistant.helpers import issue_registry as ir from homeassistant.helpers.device_registry import DeviceRegistry @@ -22,7 +29,7 @@ from .conftest import FakeDevice from .mock_data import ROBOROCK_RRUID, USER_EMAIL -from tests.common import MockConfigEntry +from tests.common import MockConfigEntry, async_fire_time_changed from tests.typing import ClientSessionGenerator @@ -294,6 +301,72 @@ async def test_migrate_config_entry_unique_id( assert config_entry.unique_id == ROBOROCK_RRUID +@pytest.mark.parametrize("platforms", [[Platform.SENSOR]]) +async def test_update_unavailability_threshold( + hass: HomeAssistant, + freezer: FrozenDateTimeFactory, + setup_entry: MockConfigEntry, + fake_vacuum: FakeDevice, +) -> None: + """Test that a small number of updates failures are suppressed before marking a device unavailable.""" + await async_setup_component(hass, HA_DOMAIN, {}) + assert setup_entry.state is ConfigEntryState.LOADED + + # We pick an arbitrary sensor to test for availability + sensor_entity_id = "sensor.roborock_s7_maxv_battery" + expected_state = "100" + state = hass.states.get(sensor_entity_id) + assert state is not None + assert state.state == expected_state + + # Simulate a few update failures below the threshold + assert fake_vacuum.v1_properties is not None + fake_vacuum.v1_properties.status.refresh.side_effect = RoborockException( + "Simulated update failure" + ) + + # Move forward in time less than the threshold + freezer.tick(datetime.timedelta(seconds=90)) + async_fire_time_changed(hass) + await hass.async_block_till_done() + + # Force a coordinator refresh. + await hass.services.async_call( + HA_DOMAIN, + SERVICE_UPDATE_ENTITY, + {ATTR_ENTITY_ID: sensor_entity_id}, + blocking=True, + ) + await hass.async_block_till_done() + + # Verify that the entity is still available + state = hass.states.get(sensor_entity_id) + assert state is not None + assert state.state == expected_state + + # Move forward in time to exceed the threshold + freezer.tick(datetime.timedelta(minutes=3)) + async_fire_time_changed(hass) + await hass.async_block_till_done() + + # Verify that the entity is now unavailable + state = hass.states.get(sensor_entity_id) + assert state is not None + assert state.state == "unavailable" + + # Now restore normal update behavior and refresh. + fake_vacuum.v1_properties.status.refresh.side_effect = None + + freezer.tick(datetime.timedelta(seconds=45)) + async_fire_time_changed(hass) + await hass.async_block_till_done() + + # Verify that the entity recovers and is available again + state = hass.states.get(sensor_entity_id) + assert state is not None + assert state.state == expected_state + + async def test_cloud_api_repair( hass: HomeAssistant, mock_roborock_entry: MockConfigEntry, From 1a465ebd339648252f0536c2d48a5f5e40a6dab7 Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Thu, 11 Dec 2025 03:22:50 +0000 Subject: [PATCH 2/3] Revert unnecessary changes to map updating logic --- homeassistant/components/roborock/coordinator.py | 8 +++++--- tests/components/roborock/test_init.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/homeassistant/components/roborock/coordinator.py b/homeassistant/components/roborock/coordinator.py index 8c24fac4827fe..dd7d54f5a61df 100644 --- a/homeassistant/components/roborock/coordinator.py +++ b/homeassistant/components/roborock/coordinator.py @@ -170,7 +170,7 @@ async def update_map(self) -> None: await self.properties_api.home.discover_home() await self.properties_api.home.refresh() except RoborockException as ex: - raise UpdateFailed( + raise HomeAssistantError( translation_domain=DOMAIN, translation_key="map_failure", ) from ex @@ -243,8 +243,10 @@ async def _async_update_data(self) -> DeviceState: > IMAGE_CACHE_INTERVAL ) or self.last_update_state != new_status.state_name: self._last_home_update_attempt = dt_util.utcnow() - # Will raise UpdateFailed on failure - await self.update_map() + try: + await self.update_map() + except HomeAssistantError as err: + _LOGGER.debug("Failed to update map: %s", err) if self.properties_api.status.in_cleaning: if self._device.is_local_connected: diff --git a/tests/components/roborock/test_init.py b/tests/components/roborock/test_init.py index 66c0f20993fdf..96e5f9dac3b83 100644 --- a/tests/components/roborock/test_init.py +++ b/tests/components/roborock/test_init.py @@ -308,7 +308,7 @@ async def test_update_unavailability_threshold( setup_entry: MockConfigEntry, fake_vacuum: FakeDevice, ) -> None: - """Test that a small number of updates failures are suppressed before marking a device unavailable.""" + """Test that a small number of update failures are suppressed before marking a device unavailable.""" await async_setup_component(hass, HA_DOMAIN, {}) assert setup_entry.state is ConfigEntryState.LOADED From c36c797fcfd71b44cf9de5de37b59753ba5c708b Mon Sep 17 00:00:00 2001 From: Joost Lekkerkerker Date: Sat, 13 Dec 2025 22:25:05 +0100 Subject: [PATCH 3/3] Update homeassistant/components/roborock/coordinator.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- homeassistant/components/roborock/coordinator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/homeassistant/components/roborock/coordinator.py b/homeassistant/components/roborock/coordinator.py index dd7d54f5a61df..bda782f2e4610 100644 --- a/homeassistant/components/roborock/coordinator.py +++ b/homeassistant/components/roborock/coordinator.py @@ -45,7 +45,7 @@ # Roborock devices have a known issue where they go offline for a short period # around 3AM local time for ~1 minute and reset both the local connection -# and MQTT connection. To avoid log spam, we will avoid failures refreshing +# and MQTT connection. To avoid log spam, we will avoid reporting failures refreshing # data until this duration has passed. MIN_UNAVAILABLE_DURATION = timedelta(minutes=2)