-
-
Notifications
You must be signed in to change notification settings - Fork 37.4k
Refactor cert_expiry for 3.7+, retry transient errors, wait for HA HTTP #32001
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| """Errors for the cert_expiry integration.""" | ||
| from homeassistant.exceptions import HomeAssistantError | ||
|
|
||
|
|
||
| class CertExpiryException(HomeAssistantError): | ||
| """Base class for cert_expiry exceptions.""" | ||
|
|
||
|
|
||
| class TemporaryFailure(CertExpiryException): | ||
| """Temporary failure has occurred.""" | ||
|
|
||
|
|
||
| class PermanentFailure(CertExpiryException): | ||
| """Permanent failure has occurred.""" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,8 +17,11 @@ | |
| from homeassistant.core import callback | ||
| import homeassistant.helpers.config_validation as cv | ||
| from homeassistant.helpers.entity import Entity | ||
| from homeassistant.helpers.event import async_track_point_in_utc_time | ||
| from homeassistant.util import dt as dt_util | ||
|
|
||
| from .const import DEFAULT_NAME, DEFAULT_PORT, DOMAIN | ||
| from .errors import PermanentFailure, TemporaryFailure | ||
| from .helper import get_cert | ||
|
|
||
| _LOGGER = logging.getLogger(__name__) | ||
|
|
@@ -71,12 +74,23 @@ def __init__(self, sensor_name, server_name, server_port): | |
| self._state = None | ||
| self._available = False | ||
| self._valid = False | ||
| self._retry_attempts = 0 | ||
|
|
||
| @property | ||
| def name(self): | ||
| """Return the name of the sensor.""" | ||
| return self._name | ||
|
|
||
| @property | ||
| def next_check(self): | ||
| """Return the timestamp of the next update retry attempt .""" | ||
| return dt_util.utcnow() + timedelta(seconds=self.retry_delay) | ||
|
|
||
| @property | ||
| def retry_delay(self): | ||
| """Return the retry delay in seconds.""" | ||
| return int(min(2 ** (self._retry_attempts - 1) * 30, 3600)) | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm personally not a fan of such retry logic if that is just simple fix with
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simpler is definitely better. However, some (but not all) users monitoring the cert of the With this retry in place, the startup delays based on What do you think?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, I now see the |
||
| @property | ||
| def unique_id(self): | ||
| """Return a unique id for the sensor.""" | ||
|
|
@@ -116,30 +130,62 @@ def do_update(_): | |
| # Delay until HA is fully started in case we're checking our own cert. | ||
| self.hass.bus.async_listen_once(EVENT_HOMEASSISTANT_START, do_update) | ||
|
|
||
| def update(self): | ||
| async def async_update(self): | ||
| """Fetch the certificate information.""" | ||
| try: | ||
| cert = get_cert(self.server_name, self.server_port) | ||
| except socket.gaierror: | ||
| _LOGGER.error("Cannot resolve hostname: %s", self.server_name) | ||
| self._available = False | ||
| self._valid = False | ||
| return | ||
| except socket.timeout: | ||
| _LOGGER.error("Connection timeout with server: %s", self.server_name) | ||
| try: | ||
| cert = await self.hass.async_add_executor_job( | ||
| get_cert, self.server_name, self.server_port | ||
| ) | ||
| except socket.gaierror: | ||
| raise TemporaryFailure("Cannot resolve hostname: %s, will retry in %ds") | ||
| except socket.timeout: | ||
| raise TemporaryFailure( | ||
| "Connection timeout with server: %s, will retry in %ds" | ||
| ) | ||
| except ConnectionRefusedError: | ||
| raise TemporaryFailure( | ||
| "Connection refused by server: %s, will retry in %ds" | ||
| ) | ||
| except ssl.CertificateError as err: | ||
| raise PermanentFailure( # pylint: disable=raising-format-tuple | ||
| "Certificate error with server: %s [%s]", err.verify_message | ||
| ) | ||
| except ssl.SSLError as err: | ||
| raise PermanentFailure( # pylint: disable=raising-format-tuple | ||
| "SSL error with server: %s [%s]", err.args[0] | ||
| ) | ||
|
|
||
| except TemporaryFailure as err: | ||
|
|
||
| def scheduled_update(_): | ||
| self.async_schedule_update_ha_state(True) | ||
|
|
||
| _LOGGER.error(err, self.server_name, self.retry_delay) | ||
| self._available = False | ||
| self._valid = False | ||
| async_track_point_in_utc_time(self.hass, scheduled_update, self.next_check) | ||
| self._retry_attempts += 1 | ||
| return | ||
| except (ssl.CertificateError, ssl.SSLError): | ||
|
|
||
| except PermanentFailure as err: | ||
| _LOGGER.error(err.args[0], self.server_name, err.args[1]) | ||
| self._available = True | ||
| self._state = 0 | ||
| self._valid = False | ||
| return | ||
|
|
||
| except Exception: # pylint: disable=broad-except | ||
| _LOGGER.exception("Unknown error checking server: %s", self.server_name) | ||
| self._available = False | ||
| self._valid = False | ||
| return | ||
|
|
||
| ts_seconds = ssl.cert_time_to_seconds(cert["notAfter"]) | ||
| timestamp = datetime.fromtimestamp(ts_seconds) | ||
| expiry = timestamp - datetime.today() | ||
| self._available = True | ||
| self._retry_attempts = 0 | ||
| self._state = expiry.days | ||
| self._valid = True | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this an error for setting up the config flow? Isn't the whole reason this sensor exists to check this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The error is meant to interactively help fix an existing broken setup. We could create a new sensor and assume the user will check the logs for more info on why the sensor is reporting a failed cert, but presenting this info in the UI feels like it would help resolve the problem faster.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But what if the user wants to set up the sensor, see that it's invalid, then fix the sensor and see it jump to valid ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You wouldn't prevent a leak detector from being paired if it was currently detecting water.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@balloob : I'm not so sure... this component give us the number of days before expiration. If the certificate is already expired, the state will be negative and I'm not sure it will really work.
I would agree with you if the state was "IsExpired: yes/no" and one of the attribute was the number of days to go to the expiration.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After some thought I agree with @balloob and this will be allowed in a new PR.