-
-
Notifications
You must be signed in to change notification settings - Fork 37.8k
ESPHome trigger reconnect immediately when mDNS record received #48129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 2 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
0295f64
ESPHome trigger reconnect immediately when mDNS record received
OttoWinter 5d79424
Cleanup
OttoWinter 3ae32b7
Small optimization
OttoWinter 49778c9
Fix deprecated method call
OttoWinter 919df67
Also track wait task
OttoWinter e9d82df
Log errors from reconnect loop
OttoWinter f949552
Apply suggestions
OttoWinter File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| UserServiceArgType, | ||
| ) | ||
| import voluptuous as vol | ||
| from zeroconf import DNSPointer, DNSRecord, RecordUpdateListener, Zeroconf | ||
|
|
||
| from homeassistant import const | ||
| from homeassistant.components import zeroconf | ||
|
|
@@ -199,93 +200,213 @@ async def on_login() -> None: | |
| # Re-connection logic will trigger after this | ||
| await cli.disconnect() | ||
|
|
||
| try_connect = await _setup_auto_reconnect_logic(hass, cli, entry, host, on_login) | ||
| reconnect_logic = ReconnectLogic( | ||
| hass, cli, entry, host, on_login, zeroconf_instance | ||
| ) | ||
|
|
||
| async def complete_setup() -> None: | ||
| """Complete the config entry setup.""" | ||
| infos, services = await entry_data.async_load_from_store() | ||
| await entry_data.async_update_static_infos(hass, entry, infos) | ||
| await _setup_services(hass, entry_data, services) | ||
|
|
||
| # Create connection attempt outside of HA's tracked task in order | ||
| # not to delay startup. | ||
| hass.loop.create_task(try_connect(is_disconnect=False)) | ||
| await reconnect_logic.start() | ||
| entry_data.cleanup_callbacks.append(reconnect_logic.stop_callback) | ||
|
|
||
| hass.async_create_task(complete_setup()) | ||
| return True | ||
|
|
||
|
|
||
| async def _setup_auto_reconnect_logic( | ||
| hass: HomeAssistantType, cli: APIClient, entry: ConfigEntry, host: str, on_login | ||
| ): | ||
| """Set up the re-connect logic for the API client.""" | ||
| class ReconnectLogic(RecordUpdateListener): | ||
| """Reconnectiong logic handler for ESPHome config entries. | ||
|
|
||
| async def try_connect(tries: int = 0, is_disconnect: bool = True) -> None: | ||
| """Try connecting to the API client. Will retry if not successful.""" | ||
| if entry.entry_id not in hass.data[DOMAIN]: | ||
| # When removing/disconnecting manually | ||
| return | ||
| Contains two reconnect strategies: | ||
| - Connect with increasing time between connection attempts. | ||
| - Listen to zeroconf mDNS records, if any records are found for this device, try reconnecting immediately. | ||
| """ | ||
|
|
||
| device_registry = await hass.helpers.device_registry.async_get_registry() | ||
| devices = dr.async_entries_for_config_entry(device_registry, entry.entry_id) | ||
| for device in devices: | ||
| # There is only one device in ESPHome | ||
| if device.disabled: | ||
| # Don't attempt to connect if it's disabled | ||
| return | ||
| def __init__( | ||
| self, | ||
| hass: HomeAssistantType, | ||
| cli: APIClient, | ||
| entry: ConfigEntry, | ||
| host: str, | ||
| on_login, | ||
| zc: Zeroconf, | ||
| ): | ||
| """Initialize ReconnectingLogic.""" | ||
| self._hass = hass | ||
| self._cli = cli | ||
| self._entry = entry | ||
| self._host = host | ||
| self._on_login = on_login | ||
| self._zc = zc | ||
| # Flag to check if the device is connected | ||
| self._connected = True | ||
| self._connected_lock = asyncio.Lock() | ||
| # Event the different strategies use for issuing a reconnect attempt. | ||
| self._reconnect_event = asyncio.Event() | ||
| # The task containing the infinite reconnect loop while running | ||
| self._loop_task: asyncio.Task | None = None | ||
| # How many reconnect attempts have there been already, used for exponential wait time | ||
| self._tries = 0 | ||
| self._tries_lock = asyncio.Lock() | ||
|
|
||
| data: RuntimeEntryData = hass.data[DOMAIN][entry.entry_id] | ||
| for disconnect_cb in data.disconnect_callbacks: | ||
| @property | ||
| def _entry_data(self) -> RuntimeEntryData | None: | ||
| return self._hass.data[DOMAIN].get(self._entry.entry_id) | ||
|
|
||
| async def _on_disconnect(self): | ||
| """Log and issue callbacks when disconnecting.""" | ||
| if self._entry_data is None: | ||
| return | ||
| # This can happen often depending on WiFi signal strength. | ||
| # So therefore all these connection warnings are logged | ||
| # as infos. The "unavailable" logic will still trigger so the | ||
| # user knows if the device is not connected. | ||
| _LOGGER.info("Disconnected from ESPHome API for %s", self._host) | ||
|
|
||
| # Run disconnect hooks | ||
| for disconnect_cb in self._entry_data.disconnect_callbacks: | ||
| disconnect_cb() | ||
| data.disconnect_callbacks = [] | ||
| data.available = False | ||
| data.async_update_device_state(hass) | ||
|
|
||
| if is_disconnect: | ||
| # This can happen often depending on WiFi signal strength. | ||
| # So therefore all these connection warnings are logged | ||
| # as infos. The "unavailable" logic will still trigger so the | ||
| # user knows if the device is not connected. | ||
| _LOGGER.info("Disconnected from ESPHome API for %s", host) | ||
|
|
||
| if tries != 0: | ||
| # If not first re-try, wait and print message | ||
| # Cap wait time at 1 minute. This is because while working on the | ||
| # device (e.g. soldering stuff), users don't want to have to wait | ||
| # a long time for their device to show up in HA again (this was | ||
| # mentioned a lot in early feedback) | ||
| # | ||
| # In the future another API will be set up so that the ESP can | ||
| # notify HA of connectivity directly, but for new we'll use a | ||
| # really short reconnect interval. | ||
| tries = min(tries, 10) # prevent OverflowError | ||
| wait_time = int(round(min(1.8 ** tries, 60.0))) | ||
| if tries == 1: | ||
| _LOGGER.info("Trying to reconnect to %s in the background", host) | ||
| _LOGGER.debug("Retrying %s in %d seconds", host, wait_time) | ||
| await asyncio.sleep(wait_time) | ||
| self._entry_data.disconnect_callbacks = [] | ||
| self._entry_data.available = False | ||
| self._entry_data.async_update_device_state(self._hass) | ||
|
|
||
| # Reset tries | ||
| async with self._tries_lock: | ||
| self._tries = 0 | ||
| # Connected needs to be reset before the reconnect event (opposite order of check) | ||
| async with self._connected_lock: | ||
| self._connected = False | ||
| self._reconnect_event.set() | ||
|
|
||
| async def _wait_and_start_reconnect(self): | ||
| """Wait for exponentially increasing time to issue next reconnect event.""" | ||
| async with self._tries_lock: | ||
| tries = self._tries | ||
| # If not first re-try, wait and print message | ||
| # Cap wait time at 1 minute. This is because while working on the | ||
| # device (e.g. soldering stuff), users don't want to have to wait | ||
| # a long time for their device to show up in HA again (this was | ||
| # mentioned a lot in early feedback) | ||
| tries = min(tries, 10) # prevent OverflowError | ||
| wait_time = int(round(min(1.8 ** tries, 60.0))) | ||
| if tries == 1: | ||
| _LOGGER.info("Trying to reconnect to %s in the background", self._host) | ||
| _LOGGER.debug("Retrying %s in %d seconds", self._host, wait_time) | ||
| await asyncio.sleep(wait_time) | ||
| self._reconnect_event.set() | ||
|
|
||
| async def _try_connect(self): | ||
| """Try connecting to the API client.""" | ||
| async with self._tries_lock: | ||
| tries = self._tries | ||
| self._tries += 1 | ||
|
|
||
| try: | ||
| await cli.connect(on_stop=try_connect, login=True) | ||
| await self._cli.connect(on_stop=self._on_disconnect, login=True) | ||
| except APIConnectionError as error: | ||
| level = logging.WARNING if tries == 0 else logging.DEBUG | ||
| _LOGGER.log( | ||
| level, | ||
| "Can't connect to ESPHome API for %s (%s): %s", | ||
| entry.unique_id, | ||
| host, | ||
| self._entry.unique_id, | ||
| self._host, | ||
| error, | ||
| ) | ||
| # Schedule re-connect in event loop in order not to delay HA | ||
| # startup. First connect is scheduled in tracked tasks. | ||
| data.reconnect_task = hass.loop.create_task( | ||
| try_connect(tries + 1, is_disconnect=False) | ||
| ) | ||
| self._hass.loop.create_task(self._wait_and_start_reconnect()) | ||
| else: | ||
| _LOGGER.info("Successfully connected to %s", host) | ||
| hass.async_create_task(on_login()) | ||
| _LOGGER.info("Successfully connected to %s", self._host) | ||
| async with self._tries_lock: | ||
| self._tries = 0 | ||
| async with self._connected_lock: | ||
| self._connected = True | ||
| self._hass.async_create_task(self._on_login()) | ||
|
|
||
| async def _reconnect_loop(self): | ||
| while True: | ||
| # Wait and clear reconnection event | ||
| await self._reconnect_event.wait() | ||
| self._reconnect_event.clear() | ||
|
|
||
| # If in connected state, do not try to connect again. | ||
| async with self._connected_lock: | ||
| if self._connected: | ||
| continue | ||
|
|
||
| # Check if the entry got removed or disabled, in which case we shouldn't reconnect | ||
| if self._entry.entry_id not in self._hass.data[DOMAIN]: | ||
| # When removing/disconnecting manually | ||
| return | ||
| device_registry = ( | ||
| await self._hass.helpers.device_registry.async_get_registry() | ||
| ) | ||
| devices = dr.async_entries_for_config_entry( | ||
| device_registry, self._entry.entry_id | ||
| ) | ||
| for device in devices: | ||
| # There is only one device in ESPHome | ||
| if device.disabled: | ||
| # Don't attempt to connect if it's disabled | ||
| return | ||
|
|
||
| return try_connect | ||
| await self._try_connect() | ||
|
|
||
| async def start(self): | ||
| """Start the reconnecting logic background task.""" | ||
| # Create reconnection loop outside of HA's tracked tasks in order | ||
| # not to delay startup. | ||
| self._loop_task = self._hass.loop.create_task(self._reconnect_loop()) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
| # Listen for mDNS records so we can reconnect directly if a received mDNS record | ||
| # indicates the node is up again | ||
| await self._hass.async_add_executor_job(self._zc.add_listener, self, None) | ||
|
|
||
| async with self._connected_lock: | ||
| self._connected = False | ||
| self._reconnect_event.set() | ||
|
|
||
| async def stop(self): | ||
| """Stop the reconnecting logic background task. Does not disconnect the client.""" | ||
| if self._loop_task is not None: | ||
| self._loop_task.cancel() | ||
| self._loop_task = None | ||
| await self._hass.async_add_executor_job(self._zc.remove_listener, self) | ||
|
|
||
| @callback | ||
| def stop_callback(self): | ||
| """Stop as an async callback function.""" | ||
| self._hass.async_create_task(self.stop()) | ||
|
|
||
| def update_record(self, zc: Zeroconf, now: float, record: DNSRecord) -> None: | ||
| """Listen to zeroconf updated mDNS records.""" | ||
| if not isinstance(record, DNSPointer): | ||
| # We only consider PTR records and match using the alias name | ||
| return | ||
| if self._entry_data is None or self._entry_data.device_info is None: | ||
| # Either the entry was already teared down or we haven't received device info yet | ||
| return | ||
| filter_alias = f"{self._entry_data.device_info.name}._esphomelib._tcp.local." | ||
| if record.alias != filter_alias: | ||
| return | ||
|
|
||
| # This is a mDNS record from the device and could mean it just woke up | ||
| # Check if already connected, no lock needed for this access | ||
| if self._connected: | ||
| return | ||
|
|
||
| # Tell reconnection logic to retry connection attempt now (even before reconnect timer finishes) | ||
| async def set_reconnect(): | ||
| self._reconnect_event.set() | ||
|
|
||
| _LOGGER.debug( | ||
| "%s: Triggering reconnect because of received mDNS record %s", | ||
| self._host, | ||
| record, | ||
| ) | ||
| self._hass.add_job(set_reconnect) | ||
|
OttoWinter marked this conversation as resolved.
Outdated
|
||
|
|
||
|
|
||
| async def _async_setup_device_registry( | ||
|
|
@@ -421,8 +542,6 @@ async def _cleanup_instance( | |
| ) -> RuntimeEntryData: | ||
| """Cleanup the esphome client if it exists.""" | ||
| data: RuntimeEntryData = hass.data[DOMAIN].pop(entry.entry_id) | ||
| if data.reconnect_task is not None: | ||
| data.reconnect_task.cancel() | ||
| for disconnect_cb in data.disconnect_callbacks: | ||
| disconnect_cb() | ||
| for cleanup_callback in data.cleanup_callbacks: | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.