Skip to content

Commit 0472408

Browse files
rbs333tylerhutcherson
authored andcommitted
Add datetime-enabled filters (#295)
Goal: 1. Create a new DatetimeFilter or TimestampFilter - Allow querying for a specific date without time - Allow querying for a specific date with time - Allow querying for a date range - Allow querying for a time range - Allow querying with or without a timezone - Default to timezone-aware UTC datetimes 2. Alternatively, create a new Timestamp field type that allows specifying via YAML or dictionary that a numeric field is actually a timestamp, with or without a timezone. --------- Co-authored-by: Tyler Hutcherson <[email protected]>
1 parent 2a8ff71 commit 0472408

File tree

6 files changed

+723
-50
lines changed

6 files changed

+723
-50
lines changed

docs/user_guide/02_hybrid_queries.ipynb

Lines changed: 170 additions & 41 deletions
Large diffs are not rendered by default.
62 Bytes
Binary file not shown.

redisvl/query/filter.py

Lines changed: 271 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import datetime
2+
import re
13
from enum import Enum
24
from functools import wraps
35
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
@@ -8,6 +10,19 @@
810
# mypy: disable-error-code="override"
911

1012

13+
class Inclusive(str, Enum):
14+
"""Enum for valid inclusive options"""
15+
16+
BOTH = "both"
17+
"""Inclusive of both sides of range (default)"""
18+
NEITHER = "neither"
19+
"""Inclusive of neither side of range"""
20+
LEFT = "left"
21+
"""Inclusive of only left"""
22+
RIGHT = "right"
23+
"""Inclusive of only right"""
24+
25+
1126
class FilterOperator(Enum):
1227
EQ = 1
1328
NE = 2
@@ -19,6 +34,7 @@ class FilterOperator(Enum):
1934
AND = 8
2035
LIKE = 9
2136
IN = 10
37+
BETWEEN = 11
2238

2339

2440
class FilterField:
@@ -267,6 +283,7 @@ class Num(FilterField):
267283
FilterOperator.GT: ">",
268284
FilterOperator.LE: "<=",
269285
FilterOperator.GE: ">=",
286+
FilterOperator.BETWEEN: "between",
270287
}
271288
OPERATOR_MAP: Dict[FilterOperator, str] = {
272289
FilterOperator.EQ: "@%s:[%s %s]",
@@ -275,8 +292,10 @@ class Num(FilterField):
275292
FilterOperator.LT: "@%s:[-inf (%s]",
276293
FilterOperator.GE: "@%s:[%s +inf]",
277294
FilterOperator.LE: "@%s:[-inf %s]",
295+
FilterOperator.BETWEEN: "@%s:[%s %s]",
278296
}
279-
SUPPORTED_VAL_TYPES = (int, float, type(None))
297+
298+
SUPPORTED_VAL_TYPES = (int, float, tuple, type(None))
280299

281300
def __eq__(self, other: int) -> "FilterExpression":
282301
"""Create a Numeric equality filter expression.
@@ -373,10 +392,51 @@ def __le__(self, other: int) -> "FilterExpression":
373392
self._set_value(other, self.SUPPORTED_VAL_TYPES, FilterOperator.LE)
374393
return FilterExpression(str(self))
375394

395+
@staticmethod
396+
def _validate_inclusive_string(inclusive: str) -> Inclusive:
397+
try:
398+
return Inclusive(inclusive)
399+
except:
400+
raise ValueError(
401+
f"Invalid inclusive value must be: {[i.value for i in Inclusive]}"
402+
)
403+
404+
def _format_inclusive_between(
405+
self, inclusive: Inclusive, start: int, end: int
406+
) -> str:
407+
if inclusive.value == Inclusive.BOTH.value:
408+
return f"@{self._field}:[{start} {end}]"
409+
410+
if inclusive.value == Inclusive.NEITHER.value:
411+
return f"@{self._field}:[({start} ({end}]"
412+
413+
if inclusive.value == Inclusive.LEFT.value:
414+
return f"@{self._field}:[{start} ({end}]"
415+
416+
if inclusive.value == Inclusive.RIGHT.value:
417+
return f"@{self._field}:[({start} {end}]"
418+
419+
raise ValueError(f"Inclusive value not found")
420+
421+
def between(
422+
self, start: int, end: int, inclusive: str = "both"
423+
) -> "FilterExpression":
424+
"""Operator for searching values between two numeric values."""
425+
inclusive = self._validate_inclusive_string(inclusive)
426+
expression = self._format_inclusive_between(inclusive, start, end)
427+
428+
return FilterExpression(expression)
429+
376430
def __str__(self) -> str:
377431
"""Return the Redis Query string for the Numeric filter"""
378432
if self._value is None:
379433
return "*"
434+
if self._operator == FilterOperator.BETWEEN:
435+
return self.OPERATOR_MAP[self._operator] % (
436+
self._field,
437+
self._value[0],
438+
self._value[1],
439+
)
380440
if self._operator == FilterOperator.EQ or self._operator == FilterOperator.NE:
381441
return self.OPERATOR_MAP[self._operator] % (
382442
self._field,
@@ -562,3 +622,213 @@ def __str__(self) -> str:
562622
if not self._filter:
563623
raise ValueError("Improperly initialized FilterExpression")
564624
return self._filter
625+
626+
627+
class Timestamp(Num):
628+
"""
629+
A timestamp filter for querying date/time fields in Redis.
630+
631+
This filter can handle various date and time formats, including:
632+
- datetime objects (with or without timezone)
633+
- date objects
634+
- ISO-8601 formatted strings
635+
- Unix timestamps (as integers or floats)
636+
637+
All timestamps are converted to Unix timestamps in UTC for consistency.
638+
"""
639+
640+
SUPPORTED_TYPES = (
641+
datetime.datetime,
642+
datetime.date,
643+
tuple, # Date range
644+
str, # ISO format
645+
int, # Unix timestamp
646+
float, # Unix timestamp with fractional seconds
647+
type(None),
648+
)
649+
650+
@staticmethod
651+
def _is_date(value: Any) -> bool:
652+
"""Check if the value is a date object. Either ISO string or datetime.date."""
653+
return (
654+
isinstance(value, datetime.date)
655+
and not isinstance(value, datetime.datetime)
656+
) or (isinstance(value, str) and Timestamp._is_date_only(value))
657+
658+
@staticmethod
659+
def _is_date_only(iso_string: str) -> bool:
660+
"""Check if an ISO formatted string only includes date information using regex."""
661+
# Match YYYY-MM-DD format exactly
662+
date_pattern = r"^\d{4}-\d{2}-\d{2}$"
663+
return bool(re.match(date_pattern, iso_string))
664+
665+
def _convert_to_timestamp(self, value, end_date=False):
666+
"""
667+
Convert various inputs to a Unix timestamp (seconds since epoch in UTC).
668+
669+
Args:
670+
value: A datetime, date, string, int, or float
671+
672+
Returns:
673+
float: Unix timestamp
674+
"""
675+
if value is None:
676+
return None
677+
678+
if isinstance(value, (int, float)):
679+
# Already a Unix timestamp
680+
return float(value)
681+
682+
if isinstance(value, str):
683+
# Parse ISO format
684+
try:
685+
value = datetime.datetime.fromisoformat(value)
686+
except ValueError:
687+
raise ValueError(f"String timestamp must be in ISO format: {value}")
688+
689+
if isinstance(value, datetime.date) and not isinstance(
690+
value, datetime.datetime
691+
):
692+
# Convert to max or min if for dates based on end or not
693+
if end_date:
694+
value = datetime.datetime.combine(value, datetime.time.max)
695+
else:
696+
value = datetime.datetime.combine(value, datetime.time.min)
697+
698+
# Ensure the datetime is timezone-aware (UTC)
699+
if isinstance(value, datetime.datetime):
700+
if value.tzinfo is None:
701+
value = value.replace(tzinfo=datetime.timezone.utc)
702+
else:
703+
value = value.astimezone(datetime.timezone.utc)
704+
705+
# Convert to Unix timestamp
706+
return value.timestamp()
707+
708+
raise TypeError(f"Unsupported type for timestamp conversion: {type(value)}")
709+
710+
def __eq__(self, other) -> FilterExpression:
711+
"""
712+
Filter for timestamps equal to the specified value.
713+
For date objects (without time), this matches the entire day.
714+
715+
Args:
716+
other: A datetime, date, ISO string, or Unix timestamp
717+
718+
Returns:
719+
self: The filter object for method chaining
720+
"""
721+
if self._is_date(other):
722+
# For date objects, match the entire day
723+
if isinstance(other, str):
724+
other = datetime.datetime.strptime(other, "%Y-%m-%d").date()
725+
start = datetime.datetime.combine(other, datetime.time.min).astimezone(
726+
datetime.timezone.utc
727+
)
728+
end = datetime.datetime.combine(other, datetime.time.max).astimezone(
729+
datetime.timezone.utc
730+
)
731+
return self.between(start, end)
732+
733+
timestamp = self._convert_to_timestamp(other)
734+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.EQ)
735+
return FilterExpression(str(self))
736+
737+
def __ne__(self, other) -> FilterExpression:
738+
"""
739+
Filter for timestamps not equal to the specified value.
740+
For date objects (without time), this excludes the entire day.
741+
742+
Args:
743+
other: A datetime, date, ISO string, or Unix timestamp
744+
745+
Returns:
746+
self: The filter object for method chaining
747+
"""
748+
if self._is_date(other):
749+
# For date objects, exclude the entire day
750+
if isinstance(other, str):
751+
other = datetime.datetime.strptime(other, "%Y-%m-%d").date()
752+
start = datetime.datetime.combine(other, datetime.time.min)
753+
end = datetime.datetime.combine(other, datetime.time.max)
754+
return self.between(start, end)
755+
756+
timestamp = self._convert_to_timestamp(other)
757+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.NE)
758+
return FilterExpression(str(self))
759+
760+
def __gt__(self, other):
761+
"""
762+
Filter for timestamps greater than the specified value.
763+
764+
Args:
765+
other: A datetime, date, ISO string, or Unix timestamp
766+
767+
Returns:
768+
self: The filter object for method chaining
769+
"""
770+
timestamp = self._convert_to_timestamp(other)
771+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.GT)
772+
return FilterExpression(str(self))
773+
774+
def __lt__(self, other):
775+
"""
776+
Filter for timestamps less than the specified value.
777+
778+
Args:
779+
other: A datetime, date, ISO string, or Unix timestamp
780+
781+
Returns:
782+
self: The filter object for method chaining
783+
"""
784+
timestamp = self._convert_to_timestamp(other)
785+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.LT)
786+
return FilterExpression(str(self))
787+
788+
def __ge__(self, other):
789+
"""
790+
Filter for timestamps greater than or equal to the specified value.
791+
792+
Args:
793+
other: A datetime, date, ISO string, or Unix timestamp
794+
795+
Returns:
796+
self: The filter object for method chaining
797+
"""
798+
timestamp = self._convert_to_timestamp(other)
799+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.GE)
800+
return FilterExpression(str(self))
801+
802+
def __le__(self, other):
803+
"""
804+
Filter for timestamps less than or equal to the specified value.
805+
806+
Args:
807+
other: A datetime, date, ISO string, or Unix timestamp
808+
809+
Returns:
810+
self: The filter object for method chaining
811+
"""
812+
timestamp = self._convert_to_timestamp(other)
813+
self._set_value(timestamp, self.SUPPORTED_TYPES, FilterOperator.LE)
814+
return FilterExpression(str(self))
815+
816+
def between(self, start, end, inclusive: str = "both"):
817+
"""
818+
Filter for timestamps between start and end (inclusive).
819+
820+
Args:
821+
start: A datetime, date, ISO string, or Unix timestamp
822+
end: A datetime, date, ISO string, or Unix timestamp
823+
824+
Returns:
825+
self: The filter object for method chaining
826+
"""
827+
inclusive = self._validate_inclusive_string(inclusive)
828+
829+
start_ts = self._convert_to_timestamp(start)
830+
end_ts = self._convert_to_timestamp(end, end_date=True)
831+
832+
expression = self._format_inclusive_between(inclusive, start_ts, end_ts)
833+
834+
return FilterExpression(expression)

tests/conftest.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from datetime import datetime, timezone
23

34
import pytest
45
from testcontainers.compose import DockerCompose
@@ -68,12 +69,22 @@ def client(redis_url):
6869

6970

7071
@pytest.fixture
71-
def sample_data():
72+
def sample_datetimes():
73+
return {
74+
"low": datetime(2025, 1, 16, 13).astimezone(timezone.utc),
75+
"mid": datetime(2025, 2, 16, 13).astimezone(timezone.utc),
76+
"high": datetime(2025, 3, 16, 13).astimezone(timezone.utc),
77+
}
78+
79+
80+
@pytest.fixture
81+
def sample_data(sample_datetimes):
7282
return [
7383
{
7484
"user": "john",
7585
"age": 18,
7686
"job": "engineer",
87+
"last_updated": sample_datetimes["low"].timestamp(),
7788
"credit_score": "high",
7889
"location": "-122.4194,37.7749",
7990
"user_embedding": [0.1, 0.1, 0.5],
@@ -82,6 +93,7 @@ def sample_data():
8293
"user": "mary",
8394
"age": 14,
8495
"job": "doctor",
96+
"last_updated": sample_datetimes["low"].timestamp(),
8597
"credit_score": "low",
8698
"location": "-122.4194,37.7749",
8799
"user_embedding": [0.1, 0.1, 0.5],
@@ -90,6 +102,7 @@ def sample_data():
90102
"user": "nancy",
91103
"age": 94,
92104
"job": "doctor",
105+
"last_updated": sample_datetimes["mid"].timestamp(),
93106
"credit_score": "high",
94107
"location": "-122.4194,37.7749",
95108
"user_embedding": [0.7, 0.1, 0.5],
@@ -98,6 +111,7 @@ def sample_data():
98111
"user": "tyler",
99112
"age": 100,
100113
"job": "engineer",
114+
"last_updated": sample_datetimes["mid"].timestamp(),
101115
"credit_score": "high",
102116
"location": "-110.0839,37.3861",
103117
"user_embedding": [0.1, 0.4, 0.5],
@@ -106,6 +120,7 @@ def sample_data():
106120
"user": "tim",
107121
"age": 12,
108122
"job": "dermatologist",
123+
"last_updated": sample_datetimes["mid"].timestamp(),
109124
"credit_score": "high",
110125
"location": "-110.0839,37.3861",
111126
"user_embedding": [0.4, 0.4, 0.5],
@@ -114,6 +129,7 @@ def sample_data():
114129
"user": "taimur",
115130
"age": 15,
116131
"job": "CEO",
132+
"last_updated": sample_datetimes["high"].timestamp(),
117133
"credit_score": "low",
118134
"location": "-110.0839,37.3861",
119135
"user_embedding": [0.6, 0.1, 0.5],
@@ -122,6 +138,7 @@ def sample_data():
122138
"user": "joe",
123139
"age": 35,
124140
"job": "dentist",
141+
"last_updated": sample_datetimes["high"].timestamp(),
125142
"credit_score": "medium",
126143
"location": "-110.0839,37.3861",
127144
"user_embedding": [0.9, 0.9, 0.1],

0 commit comments

Comments
 (0)