Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions redisvl/extensions/cache/llm/langcache.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
{
",": ",", # U+FF0C FULLWIDTH COMMA
"/": "∕", # U+2215 DIVISION SLASH
"\\": "\", # U+FF3C FULLWIDTH REVERSE SOLIDUS (backslash)
"?": "?", # U+FF1F FULLWIDTH QUESTION MARK
}
)

Expand Down
86 changes: 86 additions & 0 deletions tests/integration/test_langcache_semantic_cache_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,92 @@ def test_attribute_value_with_comma_and_slash_is_encoded_for_llm_string(
hit.get("metadata", {}).get("llm_string") == raw_llm_string for hit in hits
)

def test_attribute_value_with_all_tokenizer_separators_round_trip_and_filter(
self, langcache_with_attrs: LangCacheSemanticCache
) -> None:
"""All tokenizer separator characters should round-trip via filters.

This exercises the set of punctuation described in the underlying
RediSearch text-field tokenization docs to ensure that our
client-side encoding/decoding and LangCache's attribute handling
together can store and filter on values containing these characters.
"""

separators = ",.<>{}[]\"':;!@#$%^&*()-+=~"
raw_llm_string = f"tenant {separators} value"

prompt = "Attribute encoding for all tokenizer separators"
response = "Response for all tokenizer separators."

entry_id = langcache_with_attrs.store(
prompt=prompt,
response=response,
metadata={"llm_string": raw_llm_string},
)
assert entry_id

hits = langcache_with_attrs.check(
prompt=prompt,
attributes={"llm_string": raw_llm_string},
num_results=5,
)

assert hits, "No hits returned for llm_string value with separators"
assert any(
hit.get("prompt") == prompt
and hit.get("response") == response
and hit.get("metadata", {}).get("llm_string") == raw_llm_string
for hit in hits
)

@pytest.mark.parametrize(
"raw_value",
[
r"tenant\\with\\backslash",
"tenant?with?question",
],
)
def test_attribute_values_with_special_chars_round_trip_and_filter(
self,
langcache_with_attrs: LangCacheSemanticCache,
raw_value: str,
) -> None:
"""Backslash and question-mark values should round-trip via filters.

These values previously failed attribute filtering on this LangCache
instance; with client-side encoding/decoding they should now be
filterable and round-trip correctly.
"""

prompt = f"Special chars attribute {raw_value}"
response = f"Response for {raw_value}"

entry_id = langcache_with_attrs.store(
prompt=prompt,
response=response,
metadata={"llm_string": raw_value},
)
assert entry_id

hits = langcache_with_attrs.check(
prompt=prompt,
attributes={"llm_string": raw_value},
num_results=5,
)

# Look for a matching hit for this prompt/response/metadata triple.
match_found = any(
hit.get("prompt") == prompt
and hit.get("response") == response
and hit.get("metadata", {}).get("llm_string") == raw_value
for hit in hits
)

assert match_found, (
"Expected llm_string value to be filterable, but no matching "
f"hit was found: {raw_value!r}"
)


@pytest.mark.requires_api_keys
class TestLangCacheSemanticCacheIntegrationWithoutAttributes:
Expand Down
11 changes: 6 additions & 5 deletions tests/unit/test_langcache_semantic_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def test_check_with_attributes(self, mock_langcache_client):
# should decode them before exposing them to callers.
"attributes": {
"language": "python",
"topic": "programming,with∕encoding",
"topic": "programming,with∕encoding\and?",
},
}

Expand All @@ -289,7 +289,7 @@ def test_check_with_attributes(self, mock_langcache_client):
prompt="What is Python?",
attributes={
"language": "python",
"topic": "programming,with/encoding",
"topic": r"programming,with/encoding\and?",
},
)

Expand All @@ -301,14 +301,15 @@ def test_check_with_attributes(self, mock_langcache_client):
call_kwargs = mock_client.search.call_args.kwargs
assert call_kwargs["attributes"] == {
"language": "python",
# The comma and slash should be encoded for LangCache.
"topic": "programming,with∕encoding",
# The comma, slash, backslash, and question mark should be encoded
# for LangCache.
"topic": "programming,with∕encoding\and?",
}

# And the decoded, original values should appear in metadata
assert results[0]["metadata"] == {
"language": "python",
"topic": "programming,with/encoding",
"topic": r"programming,with/encoding\and?",
}

def test_store_with_empty_metadata_does_not_send_attributes(
Expand Down
Loading