Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 50 additions & 21 deletions homeassistant/components/roborock/coordinator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@

SCAN_INTERVAL = timedelta(seconds=30)

# Roborock devices have a known issue where they go offline for a short period
# around 3AM local time for ~1 minute and reset both the local connection
# and MQTT connection. To avoid log spam, we will avoid reporting failures refreshing
# data until this duration has passed.
MIN_UNAVAILABLE_DURATION = timedelta(minutes=2)

_LOGGER = logging.getLogger(__name__)


Expand Down Expand Up @@ -102,6 +108,9 @@ def __init__(
# Keep track of last attempt to refresh maps/rooms to know when to try again.
self._last_home_update_attempt: datetime
self.last_home_update: datetime | None = None
# Tracks the last successful update to control when we report failure
# to the base class. This is reset on successful data update.
self._last_update_success_time: datetime | None = None

@cached_property
def dock_device_info(self) -> DeviceInfo:
Expand Down Expand Up @@ -169,7 +178,7 @@ async def update_map(self) -> None:
self.last_home_update = dt_util.utcnow()

async def _verify_api(self) -> None:
"""Verify that the api is reachable. If it is not, switch clients."""
"""Verify that the api is reachable."""
if self._device.is_connected:
if self._device.is_local_connected:
async_delete_issue(
Expand Down Expand Up @@ -217,26 +226,27 @@ async def _async_update_data(self) -> DeviceState:
try:
# Update device props and standard api information
await self._update_device_prop()

# If the vacuum is currently cleaning and it has been IMAGE_CACHE_INTERVAL
# since the last map update, you can update the map.
new_status = self.properties_api.status
if (
new_status.in_cleaning
and (dt_util.utcnow() - self._last_home_update_attempt)
> IMAGE_CACHE_INTERVAL
) or self.last_update_state != new_status.state_name:
self._last_home_update_attempt = dt_util.utcnow()
try:
await self.update_map()
except HomeAssistantError as err:
_LOGGER.debug("Failed to update map: %s", err)
except RoborockException as ex:
_LOGGER.debug("Failed to update data: %s", ex)
raise UpdateFailed(
translation_domain=DOMAIN,
translation_key="update_data_fail",
) from ex
except UpdateFailed:
if self._should_suppress_update_failure():
_LOGGER.debug(
"Suppressing update failure until unavailable duration passed"
)
return self.data
raise

# If the vacuum is currently cleaning and it has been IMAGE_CACHE_INTERVAL
# since the last map update, you can update the map.
new_status = self.properties_api.status
if (
new_status.in_cleaning
and (dt_util.utcnow() - self._last_home_update_attempt)
> IMAGE_CACHE_INTERVAL
) or self.last_update_state != new_status.state_name:
self._last_home_update_attempt = dt_util.utcnow()
try:
await self.update_map()
except HomeAssistantError as err:
_LOGGER.debug("Failed to update map: %s", err)

if self.properties_api.status.in_cleaning:
if self._device.is_local_connected:
Expand All @@ -248,13 +258,32 @@ async def _async_update_data(self) -> DeviceState:
else:
self.update_interval = V1_CLOUD_NOT_CLEANING_INTERVAL
self.last_update_state = self.properties_api.status.state_name
self._last_update_success_time = dt_util.utcnow()
_LOGGER.debug("Data update successful %s", self._last_update_success_time)
return DeviceState(
status=self.properties_api.status,
dnd_timer=self.properties_api.dnd,
consumable=self.properties_api.consumables,
clean_summary=self.properties_api.clean_summary,
)

def _should_suppress_update_failure(self) -> bool:
"""Determine if we should suppress update failure reporting.

We suppress reporting update failures until a minimum duration has
passed since the last successful update. This is used to avoid reporting
the device as unavailable for short periods, a known issue.

The intent is to apply to routine background state refreshes and not
other failures such as the first update or map updates.
"""
if self._last_update_success_time is None:
# Never had a successful update, do not suppress
return False
failure_duration = dt_util.utcnow() - self._last_update_success_time
_LOGGER.debug("Update failure duration: %s", failure_duration)
return failure_duration < MIN_UNAVAILABLE_DURATION

async def get_routines(self) -> list[HomeDataScene]:
"""Get routines."""
try:
Expand Down
77 changes: 75 additions & 2 deletions tests/components/roborock/test_init.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
"""Test for Roborock init."""

import datetime
import pathlib
from typing import Any
from unittest.mock import AsyncMock, patch

from freezegun.api import FrozenDateTimeFactory
import pytest
from roborock import (
RoborockInvalidCredentials,
RoborockInvalidUserAgreement,
RoborockNoUserAgreement,
)
from roborock.exceptions import RoborockException

from homeassistant.components.homeassistant import (
DOMAIN as HA_DOMAIN,
SERVICE_UPDATE_ENTITY,
)
from homeassistant.components.roborock.const import DOMAIN
from homeassistant.config_entries import ConfigEntryState
from homeassistant.const import Platform
from homeassistant.const import ATTR_ENTITY_ID, Platform
from homeassistant.core import HomeAssistant
from homeassistant.helpers import issue_registry as ir
from homeassistant.helpers.device_registry import DeviceRegistry
Expand All @@ -22,7 +29,7 @@
from .conftest import FakeDevice
from .mock_data import ROBOROCK_RRUID, USER_EMAIL

from tests.common import MockConfigEntry
from tests.common import MockConfigEntry, async_fire_time_changed
from tests.typing import ClientSessionGenerator


Expand Down Expand Up @@ -294,6 +301,72 @@ async def test_migrate_config_entry_unique_id(
assert config_entry.unique_id == ROBOROCK_RRUID


@pytest.mark.parametrize("platforms", [[Platform.SENSOR]])
async def test_update_unavailability_threshold(
hass: HomeAssistant,
freezer: FrozenDateTimeFactory,
setup_entry: MockConfigEntry,
fake_vacuum: FakeDevice,
) -> None:
"""Test that a small number of update failures are suppressed before marking a device unavailable."""
await async_setup_component(hass, HA_DOMAIN, {})
assert setup_entry.state is ConfigEntryState.LOADED

# We pick an arbitrary sensor to test for availability
sensor_entity_id = "sensor.roborock_s7_maxv_battery"
expected_state = "100"
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == expected_state

# Simulate a few update failures below the threshold
assert fake_vacuum.v1_properties is not None
fake_vacuum.v1_properties.status.refresh.side_effect = RoborockException(
"Simulated update failure"
)

# Move forward in time less than the threshold
freezer.tick(datetime.timedelta(seconds=90))
async_fire_time_changed(hass)
await hass.async_block_till_done()

# Force a coordinator refresh.
await hass.services.async_call(
HA_DOMAIN,
SERVICE_UPDATE_ENTITY,
{ATTR_ENTITY_ID: sensor_entity_id},
blocking=True,
)
await hass.async_block_till_done()

# Verify that the entity is still available
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == expected_state

# Move forward in time to exceed the threshold
freezer.tick(datetime.timedelta(minutes=3))
async_fire_time_changed(hass)
await hass.async_block_till_done()

# Verify that the entity is now unavailable
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == "unavailable"

# Now restore normal update behavior and refresh.
fake_vacuum.v1_properties.status.refresh.side_effect = None

freezer.tick(datetime.timedelta(seconds=45))
async_fire_time_changed(hass)
await hass.async_block_till_done()

# Verify that the entity recovers and is available again
state = hass.states.get(sensor_entity_id)
assert state is not None
assert state.state == expected_state


async def test_cloud_api_repair(
hass: HomeAssistant,
mock_roborock_entry: MockConfigEntry,
Expand Down
Loading