Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 64 additions & 2 deletions homeassistant/components/sensor/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
from homeassistant.util.decorator import Registry
from homeassistant.helpers.entity import Entity
from homeassistant.helpers.event import async_track_state_change
import homeassistant.util.dt as dt_util

_LOGGER = logging.getLogger(__name__)

FILTER_NAME_LOWPASS = 'lowpass'
FILTER_NAME_OUTLIER = 'outlier'
FILTER_NAME_THROTTLE = 'throttle'
FILTER_NAME_TIME_SMA = 'time_simple_moving_average'
FILTERS = Registry()

CONF_FILTERS = 'filters'
Expand All @@ -34,6 +36,9 @@
CONF_FILTER_PRECISION = 'precision'
CONF_FILTER_RADIUS = 'radius'
CONF_FILTER_TIME_CONSTANT = 'time_constant'
CONF_TIME_SMA_TYPE = 'type'

TIME_SMA_LAST = 'last'

DEFAULT_WINDOW_SIZE = 1
DEFAULT_PRECISION = 2
Expand All @@ -44,24 +49,37 @@
ICON = 'mdi:chart-line-variant'

FILTER_SCHEMA = vol.Schema({
vol.Optional(CONF_FILTER_WINDOW_SIZE,
default=DEFAULT_WINDOW_SIZE): vol.Coerce(int),
vol.Optional(CONF_FILTER_PRECISION,
default=DEFAULT_PRECISION): vol.Coerce(int),
})

# pylint: disable=redefined-builtin
FILTER_OUTLIER_SCHEMA = FILTER_SCHEMA.extend({
vol.Required(CONF_FILTER_NAME): FILTER_NAME_OUTLIER,
vol.Optional(CONF_FILTER_WINDOW_SIZE,
default=DEFAULT_WINDOW_SIZE): vol.Coerce(int),
vol.Optional(CONF_FILTER_RADIUS,
default=DEFAULT_FILTER_RADIUS): vol.Coerce(float),
})

FILTER_LOWPASS_SCHEMA = FILTER_SCHEMA.extend({
vol.Required(CONF_FILTER_NAME): FILTER_NAME_LOWPASS,
vol.Optional(CONF_FILTER_WINDOW_SIZE,
default=DEFAULT_WINDOW_SIZE): vol.Coerce(int),
vol.Optional(CONF_FILTER_TIME_CONSTANT,
default=DEFAULT_FILTER_TIME_CONSTANT): vol.Coerce(int),
})

FILTER_TIME_SMA_SCHEMA = FILTER_SCHEMA.extend({
vol.Required(CONF_FILTER_NAME): FILTER_NAME_TIME_SMA,
vol.Optional(CONF_TIME_SMA_TYPE,
default=TIME_SMA_LAST): vol.In(
[None, TIME_SMA_LAST]),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is None required here? I mean we have a default. The only configuration that I believe would spit out None here would be the following:

- platform: filter
  # ...
  filters:
    - filter: time_simple_moving_average
      type:

And I think we should break when the user writes that.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, Otto is right.

Copy link
Copy Markdown
Contributor Author

@dgomes dgomes Mar 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are absolutely right! Just created a new PR #13326


vol.Required(CONF_FILTER_WINDOW_SIZE): vol.All(cv.time_period,
cv.positive_timedelta)
})

FILTER_THROTTLE_SCHEMA = FILTER_SCHEMA.extend({
vol.Required(CONF_FILTER_NAME): FILTER_NAME_THROTTLE,
})
Expand All @@ -72,6 +90,7 @@
vol.Required(CONF_FILTERS): vol.All(cv.ensure_list,
[vol.Any(FILTER_OUTLIER_SCHEMA,
FILTER_LOWPASS_SCHEMA,
FILTER_TIME_SMA_SCHEMA,
FILTER_THROTTLE_SCHEMA)])
})

Expand Down Expand Up @@ -277,6 +296,49 @@ def _filter_state(self, new_state):
return filtered


@FILTERS.register(FILTER_NAME_TIME_SMA)
class TimeSMAFilter(Filter):
"""Simple Moving Average (SMA) Filter.

The window_size is determined by time, and SMA is time weighted.

Args:
variant (enum): type of argorithm used to connect discrete values
"""

def __init__(self, window_size, precision, entity, type):
"""Initialize Filter."""
super().__init__(FILTER_NAME_TIME_SMA, 0, precision, entity)
self._time_window = int(window_size.total_seconds())
self.last_leak = None
self.queue = deque()

def _leak(self, now):
"""Remove timeouted elements."""
while self.queue:
timestamp, _ = self.queue[0]
if timestamp + self._time_window <= now:
self.last_leak = self.queue.popleft()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason this is assigned to an instance variable instead of a local variable and returned at the end of the while loop ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please look at the previous comment #13104 (comment)

Keeping this value assures me I always have some value to work with in the _filter_state()

The filter always lags 1 state behind, so using the new state in these cases (when there are no previous states in the windows) would make the behaviour erratic.

else:
return

def _filter_state(self, new_state):
now = int(dt_util.utcnow().timestamp())

self._leak(now)
self.queue.append((now, float(new_state)))
moving_sum = 0
start = now - self._time_window
_, prev_val = self.last_leak or (0, float(new_state))

for timestamp, val in self.queue:
moving_sum += (timestamp-start)*prev_val
Copy link
Copy Markdown
Member

@OttoWinter OttoWinter Mar 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What you're doing here is a lower bound of the moving average:

screen shot 2018-03-12 at 16 57 50

(Edit: This picture is not entirely true; Your code always takes the previous values for the "integral" approximation, not the lower of the two)

Ideally we would instead be using the mean of prev and val:

screen shot 2018-03-12 at 16 58 41

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I based the implementation in this paper:

http://www.eckner.com/papers/Algorithms%20for%20Unevenly%20Spaced%20Time%20Series.pdf

My implementation is that of SMA_last, what you propose is SMA_linear. Both are acceptable filters, yet different ones.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok that is true; I don't see much value in this SMA_last method over the trapezoids though. The latter just seems quite a bit more accurate in most cases.

Copy link
Copy Markdown
Contributor Author

@dgomes dgomes Mar 12, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will probably PR a second filter based on SMA_linear, but the use cases are different:

SMA_last: "discrete observation values"

  • sensors that are event based
  • binary_sensors in HA

SMA_linear: "discretely-observed continuous-time stochastic processes"

  • sensors which are polled

start, prev_val = timestamp, val
moving_sum += (now-start)*prev_val

return moving_sum/self._time_window
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we don't receive any value within self._time_window? I think we should be reporting an unknown state then. For example if my time window is 1 minute but I haven't received any state updates for 2 minutes, the output of this sensor should automatically be set to unknown (and not just repeat the same old state indefinitely).

I know this might be a bit of work to do but it's what I'd expect as a user. See usages of async_track_point_in_utc_time for ways how to do this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I disagree, If I don't get any values it just means that it hasn't changed.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem here is that the time has changed and time is also an input into this filter. Imagine that our current value is based off 2 states that happened during the window. If we wait long enough that 1 state drops out of our window, wouldn't you expect to base the value off just the other state?

Copy link
Copy Markdown
Contributor Author

@dgomes dgomes Mar 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@balloob that is exactly what it will happen.

Even if no points are left in the time window, the filter will report based on the last value that exited the windows (_, prev_val = self.last_leak )

Copy link
Copy Markdown
Member

@OttoWinter OttoWinter Mar 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dgomes I think you're misunderstanding Paulus' comment (or I'm not understanding it...)

If there's only one point within the time window, as a user I wouldn't expect it to use the last value outside the time window for any computation. It should just discard anything that's outside the time window.

In other words: only every consider the data points that are inside the time frame for computation. See this gif for an example (the blue points are the data points, the blue area is our time window and red line is the output):

out

Couple of things going on here:

  • First we only "see" the first value, so of course we should only report that value.
  • Then we start to see the next value. We should then take the weighted average (or whatever it's called in your paper) of the two.
  • If the first value exits the window, we should not use it for any computation anymore. In your current code that is however happening, not what I would expect.
  • Optional: In my opinion if there's no data points inside our window left, as I said we should report an unknown state. This might differ from the definition in the paper you mentioned, but IMHO our user base is not made up of scientists nor developers.

(The computation of the values while both data points are inside the time window is probably not right in my gif, but that's not the focus of this comment.)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@OttoWinter that is not the filter I'm implementing here.

Sure, that filter can be implemented, but it's not the one in this PR.

Please share how you are creating those graphs 😄

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dgomes I see that you report only on the last value but you only update the filter value when a new state comes in right?

Copy link
Copy Markdown
Member

@OttoWinter OttoWinter Mar 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dgomes Ok, sorry I will shut up about other filters 😅

Yeah those graphics were done with (ab)using GeoGebra, but I would describe it as the single most user-unfriendly maths tools ever written IMHO (but it's sadly what's taught here in Austria 🇦🇹). Seriously, it's incredibly hard to use if you're starting out. (incidentally, because of its poor usability the rest of my class will probably also have to re-take a recent maths exam 😥)

btw, if for some reason you're looking for a nice tool (in python, yay) for creating nice presentations, have a look at manim. For example, it has allowed me to create these fancy visualizations about graph theory: https://youtu.be/t2Gb1YA-HcU 😺

Copy link
Copy Markdown
Contributor Author

@dgomes dgomes Mar 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@balloob right, this filter always lags 1 state behind. The concept is that the raw sensor has maintained the previous status until the new state has arrived.

E.g.: Think about a water pump that publishes its rate (1 to 5). If the pump is at speed 5 and comes down to speed 4 (new state) the SMA is calculated with the period of time in which the pump was at 5 (we weight in the time the pump was at 5), 4 is of no use in the SMA since the period of time is 0 (it just got to 4).

@OttoWinter please contribute more filters :) It's just that I'm prioritising filters I can use in my setup. There are plenty of important filters missing which I hope some else can contribute 😄

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I'm going to shut up talking about filters as I have clearly not a clue what I am talking about 👍



@FILTERS.register(FILTER_NAME_THROTTLE)
class ThrottleFilter(Filter):
"""Throttle Filter.
Expand Down
18 changes: 17 additions & 1 deletion tests/components/sensor/test_filter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
"""The test for the data filter sensor platform."""
from datetime import timedelta
import unittest
from unittest.mock import patch

from homeassistant.components.sensor.filter import (
LowPassFilter, OutlierFilter, ThrottleFilter)
LowPassFilter, OutlierFilter, ThrottleFilter, TimeSMAFilter)
import homeassistant.util.dt as dt_util
from homeassistant.setup import setup_component
from tests.common import get_test_home_assistant, assert_setup_component

Expand Down Expand Up @@ -90,3 +93,16 @@ def test_throttle(self):
if not filt.skip_processing:
filtered.append(new_state)
self.assertEqual([20, 21], filtered)

def test_time_sma(self):
"""Test if time_sma filter works."""
filt = TimeSMAFilter(window_size=timedelta(minutes=2),
precision=2,
entity=None,
type='last')
past = dt_util.utcnow() - timedelta(minutes=5)
for state in self.values:
with patch('homeassistant.util.dt.utcnow', return_value=past):
filtered = filt.filter_state(state)
past += timedelta(minutes=1)
self.assertEqual(21.5, filtered)