diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ded2f84f..0b68e31e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -96,8 +96,8 @@ jobs: matrix: # 3.11 tests are run in the service-tests job python-version: ["3.9", "3.10", "3.12", "3.13"] - redis-py-version: ["5.x", "6.x"] - redis-version: ["6.2.6-v9", "latest", "8.0.2"] + redis-py-version: ["5.x", "6.x", "7.x"] + redis-version: ["6.2.6-v9", "latest", "8.4.0"] steps: - name: Check out repository uses: actions/checkout@v4 @@ -130,13 +130,15 @@ jobs: # Install right redis version based on redis py if [[ "${{ matrix.redis-py-version }}" == "5.x" ]]; then uv pip install "redis>=5,<6" - else + elif [[ "${{ matrix.redis-py-version }}" == "6.x" ]]; then uv pip install "redis>=6,<7" + else + uv pip install "redis>=7,<8" fi - name: Set Redis image name run: | - if [[ "${{ matrix.redis-version }}" == "8.0.2" ]]; then + if [[ "${{ matrix.redis-version }}" == "8.4.0" ]]; then echo "REDIS_IMAGE=redis:${{ matrix.redis-version }}" >> $GITHUB_ENV else echo "REDIS_IMAGE=redis/redis-stack-server:${{ matrix.redis-version }}" >> $GITHUB_ENV diff --git a/docs/api/query.rst b/docs/api/query.rst index 1410bd3a..22616007 100644 --- a/docs/api/query.rst +++ b/docs/api/query.rst @@ -105,24 +105,27 @@ VectorRangeQuery use_search_history='AUTO' # SVS-VAMANA only ) -HybridQuery +AggregateHybridQuery ================ .. currentmodule:: redisvl.query -.. autoclass:: HybridQuery +.. autoclass:: AggregateHybridQuery :members: :inherited-members: :show-inheritance: :exclude-members: add_filter,get_args,highlight,return_field,summarize .. note:: - The ``stopwords`` parameter in :class:`HybridQuery` (and :class:`AggregateHybridQuery`) controls query-time stopword filtering (client-side). + The ``stopwords`` parameter in :class:`AggregateHybridQuery` (and :class:`HybridQuery`) controls query-time stopword filtering (client-side). For index-level stopwords configuration (server-side), see :class:`redisvl.schema.IndexInfo.stopwords`. Using query-time stopwords with index-level ``STOPWORDS 0`` is counterproductive. +.. note:: + :class:`HybridQuery` and :class:`AggregateHybridQuery` apply linear combination inconsistently. :class:`HybridQuery` uses ``linear_alpha`` to weight the text score, while :class:`AggregateHybridQuery` uses ``alpha`` to weight the vector score. When switching between the two classes, take care to revise your ``alpha`` setting. + .. note:: **Runtime Parameters for Hybrid Queries** @@ -130,22 +133,44 @@ HybridQuery Runtime parameters (``ef_runtime``, ``search_window_size``, ``use_search_history``, ``search_buffer_capacity``) are only supported with FT.SEARCH commands. - For runtime parameter support, use :class:`VectorQuery` or :class:`VectorRangeQuery` instead of AggregateHybridQuery. + For runtime parameter support, use :class:`HybridQuery`, :class:`VectorQuery`, or :class:`VectorRangeQuery` instead of AggregateHybridQuery. - Example with VectorQuery (supports runtime parameters): + Example with HybridQuery (supports runtime parameters): .. code-block:: python - from redisvl.query import VectorQuery + from redisvl.query import HybridQuery - query = VectorQuery( + query = HybridQuery( + text="query string", + text_field_name="description", vector=[0.1, 0.2, 0.3], vector_field_name="embedding", + vector_search_method="KNN", + knn_ef_runtime=150, # Runtime parameters work with HybridQuery return_fields=["description"], num_results=10, - ef_runtime=150 # Runtime parameters work with VectorQuery ) +HybridQuery +================ + + +.. currentmodule:: redisvl.query.hybrid + + +.. autoclass:: HybridQuery + :members: + :inherited-members: + :show-inheritance: + +.. note:: + The ``stopwords`` parameter in :class:`HybridQuery` (and :class:`AggregateHybridQuery`) controls query-time stopword filtering (client-side). + For index-level stopwords configuration (server-side), see :class:`redisvl.schema.IndexInfo.stopwords`. + Using query-time stopwords with index-level ``STOPWORDS 0`` is counterproductive. + +.. note:: + :class:`HybridQuery` and :class:`AggregateHybridQuery` apply linear combination inconsistently. :class:`HybridQuery` uses ``linear_alpha`` to weight the text score, while :class:`AggregateHybridQuery` uses ``alpha`` to weight the vector score. When switching between the two classes, take care to revise your ``alpha`` setting. TextQuery ================ diff --git a/docs/overview/installation.md b/docs/overview/installation.md index b4a011c7..331b31aa 100644 --- a/docs/overview/installation.md +++ b/docs/overview/installation.md @@ -11,7 +11,7 @@ There are a few ways to install RedisVL. The easiest way is to use pip. ## Install RedisVL with Pip -Install `redisvl` into your Python (>=3.8) environment using `pip`: +Install `redisvl` into your Python (>=3.9) environment using `pip`: ```bash $ pip install -U redisvl diff --git a/docs/user_guide/11_advanced_queries.ipynb b/docs/user_guide/11_advanced_queries.ipynb index 3125d848..9ff55799 100644 --- a/docs/user_guide/11_advanced_queries.ipynb +++ b/docs/user_guide/11_advanced_queries.ipynb @@ -9,14 +9,15 @@ "In this notebook, we will explore advanced query types available in RedisVL:\n", "\n", "1. **`TextQuery`**: Full text search with advanced scoring\n", - "2. **`AggregateHybridQuery`**: Combines text and vector search for hybrid retrieval\n", + "2. **`AggregateHybridQuery` and `HybridQuery`**: Combines text and vector search for hybrid retrieval\n", "3. **`MultiVectorQuery`**: Search over multiple vector fields simultaneously\n", "\n", "These query types are powerful tools for building sophisticated search applications that go beyond simple vector similarity search.\n", "\n", "Prerequisites:\n", "- Ensure `redisvl` is installed in your Python environment.\n", - "- Have a running instance of [Redis Stack](https://redis.io/docs/install/install-stack/) or [Redis Cloud](https://redis.io/cloud).\n" + "- Have a running instance of [Redis Stack](https://redis.io/docs/install/install-stack/) or [Redis Cloud](https://redis.io/cloud).\n", + "- For `HybridQuery`, we will need Redis >= 8.4.0 and redis-py >= 7.1.0.\n" ] }, { @@ -38,8 +39,8 @@ "shell.execute_reply": "2025-11-21T00:42:12.301163Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:49.998123Z", - "start_time": "2025-11-21T21:27:49.993513Z" + "end_time": "2025-12-15T09:27:43.615445Z", + "start_time": "2025-12-15T09:27:43.522493Z" } }, "source": [ @@ -111,7 +112,7 @@ "]" ], "outputs": [], - "execution_count": 6 + "execution_count": 1 }, { "cell_type": "markdown", @@ -136,8 +137,8 @@ "shell.execute_reply": "2025-11-21T00:42:12.305407Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:50.362957Z", - "start_time": "2025-11-21T21:27:50.360561Z" + "end_time": "2025-12-15T09:27:43.620369Z", + "start_time": "2025-12-15T09:27:43.615922Z" } }, "source": [ @@ -178,7 +179,7 @@ "}" ], "outputs": [], - "execution_count": 7 + "execution_count": 2 }, { "cell_type": "markdown", @@ -197,8 +198,8 @@ "shell.execute_reply": "2025-11-21T00:42:12.415926Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:50.727271Z", - "start_time": "2025-11-21T21:27:50.715789Z" + "end_time": "2025-12-15T09:27:43.720506Z", + "start_time": "2025-12-15T09:27:43.620716Z" } }, "source": [ @@ -218,12 +219,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "16:27:50 redisvl.index.index INFO Index already exists, overwriting.\n", "Loaded 6 products into the index\n" ] } ], - "execution_count": 8 + "execution_count": 3 }, { "cell_type": "markdown", @@ -248,8 +248,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.708647Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:51.127508Z", - "start_time": "2025-11-21T21:27:51.123980Z" + "end_time": "2025-12-15T09:27:44.289286Z", + "start_time": "2025-12-15T09:27:43.721057Z" } }, "source": [ @@ -273,7 +273,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_descriptioncategoryprice
4.080705480646511prod_1comfortable running shoes for athletesfootwear89.99
1.4504838715161907prod_5basketball shoes with excellent ankle supportfootwear139.99
1.431980178975859prod_2lightweight running jacket with water resistanceouterwear129.99
" + "
scoreproduct_idbrief_descriptioncategoryprice
6.134713688880119prod_1comfortable running shoes for athletesfootwear89.99
6.134713688880119prod_1comfortable running shoes for athletesfootwear89.99
2.148612199701887prod_5basketball shoes with excellent ankle supportfootwear139.99
2.148612199701887prod_5basketball shoes with excellent ankle supportfootwear139.99
2.102960001841964prod_2lightweight running jacket with water resistanceouterwear129.99
" ] }, "metadata": {}, @@ -283,7 +283,7 @@ } } ], - "execution_count": 9 + "execution_count": 4 }, { "cell_type": "markdown", @@ -304,8 +304,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.748398Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:51.537001Z", - "start_time": "2025-11-21T21:27:51.532996Z" + "end_time": "2025-12-15T09:27:44.337057Z", + "start_time": "2025-12-15T09:27:44.321956Z" } }, "source": [ @@ -336,7 +336,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_descriptionprice
4.165936382048982prod_1comfortable running shoes for athletes89.99
1.769051138581863prod_4yoga mat with extra cushioning for comfort39.99
1.2306902673750557prod_5basketball shoes with excellent ankle support139.99
" + "
scoreproduct_idbrief_descriptionprice
6.340446315760713prod_1comfortable running shoes for athletes89.99
6.340446315760713prod_1comfortable running shoes for athletes89.99
2.148612199701887prod_5basketball shoes with excellent ankle support139.99
" ] }, "metadata": {}, @@ -346,7 +346,7 @@ } } ], - "execution_count": 10 + "execution_count": 5 }, { "cell_type": "code", @@ -358,8 +358,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.754345Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:51.747761Z", - "start_time": "2025-11-21T21:27:51.742796Z" + "end_time": "2025-12-15T09:27:44.352789Z", + "start_time": "2025-12-15T09:27:44.344825Z" } }, "source": [ @@ -390,7 +390,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_descriptionprice
1.3333333333333333prod_1comfortable running shoes for athletes89.99
1.3333333333333333prod_1comfortable running shoes for athletes89.99
1.0prod_5basketball shoes with excellent ankle support139.99
" + "
scoreproduct_idbrief_descriptionprice
2.0prod_1comfortable running shoes for athletes89.99
2.0prod_5basketball shoes with excellent ankle support139.99
2.0prod_1comfortable running shoes for athletes89.99
" ] }, "metadata": {}, @@ -400,7 +400,7 @@ } } ], - "execution_count": 11 + "execution_count": 6 }, { "cell_type": "markdown", @@ -421,8 +421,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.759844Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:52.153660Z", - "start_time": "2025-11-21T21:27:52.150061Z" + "end_time": "2025-12-15T09:27:44.374828Z", + "start_time": "2025-12-15T09:27:44.359984Z" } }, "source": [ @@ -447,7 +447,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_descriptioncategoryprice
2.385806908729779prod_1comfortable running shoes for athletesfootwear89.99
2.385806908729779prod_1comfortable running shoes for athletesfootwear89.99
1.9340948871093797prod_5basketball shoes with excellent ankle supportfootwear139.99
" + "
scoreproduct_idbrief_descriptioncategoryprice
4.050828128169667prod_1comfortable running shoes for athletesfootwear89.99
4.050828128169667prod_1comfortable running shoes for athletesfootwear89.99
3.2229182995528305prod_5basketball shoes with excellent ankle supportfootwear139.99
3.2229182995528305prod_5basketball shoes with excellent ankle supportfootwear139.99
" ] }, "metadata": {}, @@ -457,7 +457,7 @@ } } ], - "execution_count": 12 + "execution_count": 7 }, { "cell_type": "code", @@ -469,8 +469,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.765316Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:52.357623Z", - "start_time": "2025-11-21T21:27:52.351735Z" + "end_time": "2025-12-15T09:27:44.409910Z", + "start_time": "2025-12-15T09:27:44.378041Z" } }, "source": [ @@ -493,7 +493,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_descriptionprice
2.2775029612659465prod_1comfortable running shoes for athletes89.99
1.1387514806329733prod_1comfortable running shoes for athletes89.99
1.1190633543347508prod_4yoga mat with extra cushioning for comfort39.99
1.1190633543347508prod_4yoga mat with extra cushioning for comfort39.99
" + "
scoreproduct_idbrief_descriptionprice
3.3757130560793973prod_1comfortable running shoes for athletes89.99
3.3757130560793973prod_1comfortable running shoes for athletes89.99
1.6340629489648504prod_4yoga mat with extra cushioning for comfort39.99
1.6340629489648504prod_4yoga mat with extra cushioning for comfort39.99
" ] }, "metadata": {}, @@ -503,7 +503,7 @@ } } ], - "execution_count": 13 + "execution_count": 8 }, { "cell_type": "markdown", @@ -525,8 +525,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.770555Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:52.754720Z", - "start_time": "2025-11-21T21:27:52.751189Z" + "end_time": "2025-12-15T09:27:44.425817Z", + "start_time": "2025-12-15T09:27:44.412131Z" } }, "source": [ @@ -547,7 +547,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_description
3.040323653363804prod_1comfortable running shoes for athletes
3.040323653363804prod_1comfortable running shoes for athletes
1.289396591406253prod_5basketball shoes with excellent ankle support
" + "
scoreproduct_idbrief_description
5.1882832044423015prod_1comfortable running shoes for athletes
5.1882832044423015prod_1comfortable running shoes for athletes
2.148612199701887prod_5basketball shoes with excellent ankle support
" ] }, "metadata": {}, @@ -557,7 +557,7 @@ } } ], - "execution_count": 14 + "execution_count": 9 }, { "cell_type": "markdown", @@ -578,8 +578,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.775861Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:53.171295Z", - "start_time": "2025-11-21T21:27:53.167127Z" + "end_time": "2025-12-15T09:27:44.440583Z", + "start_time": "2025-12-15T09:27:44.427869Z" } }, "source": [ @@ -602,7 +602,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_description
4.1444591833267275prod_1comfortable running shoes for athletes
4.1444591833267275prod_1comfortable running shoes for athletes
1.4875097606385526prod_5basketball shoes with excellent ankle support
" + "
scoreproduct_idbrief_description
6.134713688880119prod_1comfortable running shoes for athletes
6.134713688880119prod_1comfortable running shoes for athletes
2.148612199701887prod_5basketball shoes with excellent ankle support
" ] }, "metadata": {}, @@ -612,7 +612,7 @@ } } ], - "execution_count": 15 + "execution_count": 10 }, { "cell_type": "code", @@ -624,8 +624,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.780713Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:53.528245Z", - "start_time": "2025-11-21T21:27:53.525116Z" + "end_time": "2025-12-15T09:27:44.461967Z", + "start_time": "2025-12-15T09:27:44.447726Z" } }, "source": [ @@ -648,7 +648,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_description
2.5107799078325prod_1comfortable running shoes for athletes
2.5107799078325prod_1comfortable running shoes for athletes
2.482820220115406prod_3professional tennis racket for competitive players
" + "
scoreproduct_idbrief_description
3.3757130560793973prod_1comfortable running shoes for athletes
3.3757130560793973prod_1comfortable running shoes for athletes
3.303218123358508prod_3professional tennis racket for competitive players
" ] }, "metadata": {}, @@ -658,7 +658,7 @@ } } ], - "execution_count": 16 + "execution_count": 11 }, { "cell_type": "code", @@ -670,8 +670,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.786617Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:53.892142Z", - "start_time": "2025-11-21T21:27:53.888038Z" + "end_time": "2025-12-15T09:27:44.478620Z", + "start_time": "2025-12-15T09:27:44.465051Z" } }, "source": [ @@ -694,7 +694,7 @@ "" ], "text/html": [ - "
scoreproduct_idbrief_description
3.69730364515632prod_1comfortable running shoes for athletes
3.69730364515632prod_1comfortable running shoes for athletes
1.5329921800414583prod_5basketball shoes with excellent ankle support
" + "
scoreproduct_idbrief_description
6.134713688880119prod_1comfortable running shoes for athletes
6.134713688880119prod_1comfortable running shoes for athletes
2.148612199701887prod_5basketball shoes with excellent ankle support
" ] }, "metadata": {}, @@ -704,26 +704,50 @@ } } ], - "execution_count": 17 + "execution_count": 12 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 2. AggregateHybridQuery: Combining Text and Vector Search\n", + "## 2. Hybrid Queries: Combining Text and Vector Search\n", "\n", - "The `AggregateHybridQuery` combines text search and vector similarity to provide the best of both worlds:\n", + "Hybrid queries combine text search and vector similarity to provide the best of both worlds:\n", "- **Text search**: Finds exact keyword matches\n", "- **Vector search**: Captures semantic similarity\n", "\n", - "Results are scored using a weighted combination:\n", + "As of Redis 8.4.0, Redis natively supports a [`FT.HYBRID`](https://redis.io/docs/latest/commands/ft.hybrid) search command. RedisVL provides a `HybridQuery` class that makes it easy to construct and execute hybrid queries. For earlier versions of Redis, RedisVL provides an `AggregateHybridQuery` class that uses Redis aggregation to achieve similar results." + ] + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.494712Z", + "start_time": "2025-12-15T09:27:44.481443Z" + } + }, + "cell_type": "code", + "source": [ + "from packaging.version import Version\n", "\n", - "```\n", - "hybrid_score = (alpha) * vector_score + (1 - alpha) * text_score\n", - "```\n", + "from redis import __version__ as _redis_py_version\n", "\n", - "Where `alpha` controls the balance between vector and text search (default: 0.7)." - ] + "redis_py_version = Version(_redis_py_version)\n", + "redis_version = Version(index.client.info()[\"redis_version\"])\n", + "\n", + "HYBRID_SEARCH_AVAILABLE = redis_version >= Version(\"8.4.0\") and redis_py_version >= Version(\"7.1.0\")\n", + "print(HYBRID_SEARCH_AVAILABLE)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "execution_count": 13 }, { "cell_type": "markdown", @@ -759,8 +783,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.794662Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:55.430188Z", - "start_time": "2025-11-21T21:27:55.420369Z" + "end_time": "2025-12-15T09:27:44.512721Z", + "start_time": "2025-12-15T09:27:44.497780Z" } }, "source": [ @@ -791,11 +815,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "Index created with STOPWORDS 0: \n" + "Index created with STOPWORDS 0: \n" ] } ], - "execution_count": 18 + "execution_count": 14 }, { "cell_type": "code", @@ -807,8 +831,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.802098Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:55.640718Z", - "start_time": "2025-11-21T21:27:55.635077Z" + "end_time": "2025-12-15T09:27:44.528568Z", + "start_time": "2025-12-15T09:27:44.513241Z" } }, "source": [ @@ -824,18 +848,18 @@ "for i, company in enumerate(companies):\n", " company_index.load([company], keys=[f\"company:{i}\"])\n", "\n", - "print(f\"\u2713 Loaded {len(companies)} companies\")" + "print(f\"✓ Loaded {len(companies)} companies\")" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u2713 Loaded 5 companies\n" + "✓ Loaded 5 companies\n" ] } ], - "execution_count": 19 + "execution_count": 15 }, { "cell_type": "code", @@ -847,8 +871,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.806491Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:55.833033Z", - "start_time": "2025-11-21T21:27:55.829220Z" + "end_time": "2025-12-15T09:27:44.534288Z", + "start_time": "2025-12-15T09:27:44.528999Z" } }, "source": [ @@ -876,7 +900,7 @@ ] } ], - "execution_count": 20 + "execution_count": 16 }, { "cell_type": "markdown", @@ -886,9 +910,9 @@ "\n", "If we had used the default stopwords (not specifying `stopwords` in the schema), the word \"of\" would be filtered out during indexing. This means:\n", "\n", - "- \u274c Searching for `\"Bank of Glasberliner\"` might not find exact matches\n", - "- \u274c The phrase would be indexed as `\"Bank Berlin\"` (without \"of\")\n", - "- \u2705 With `STOPWORDS 0`, all words including \"of\" are indexed\n", + "- ❌ Searching for `\"Bank of Glasberliner\"` might not find exact matches\n", + "- ❌ The phrase would be indexed as `\"Bank Berlin\"` (without \"of\")\n", + "- ✅ With `STOPWORDS 0`, all words including \"of\" are indexed\n", "\n", "**Custom Stopwords Example:**\n", "\n", @@ -905,8 +929,8 @@ "shell.execute_reply": "2025-11-21T00:42:13.810083Z" }, "ExecuteTime": { - "end_time": "2025-11-21T21:27:56.463470Z", - "start_time": "2025-11-21T21:27:56.461409Z" + "end_time": "2025-12-15T09:27:44.553398Z", + "start_time": "2025-12-15T09:27:44.541083Z" } }, "source": [ @@ -934,7 +958,7 @@ ] } ], - "execution_count": 21 + "execution_count": 17 }, { "cell_type": "markdown", @@ -975,48 +999,119 @@ "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2025-11-21T21:27:57.036397Z", - "start_time": "2025-11-21T21:27:57.030555Z" + "end_time": "2025-12-15T09:27:44.586789Z", + "start_time": "2025-12-15T09:27:44.556392Z" } }, "source": [ "# Cleanup\n", "company_index.delete(drop=True)\n", - "print(\"\u2713 Cleaned up company_index\")" + "print(\"✓ Cleaned up company_index\")" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u2713 Cleaned up company_index\n" + "✓ Cleaned up company_index\n" ] } ], - "execution_count": 22 + "execution_count": 18 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Basic Aggregate Hybrid Query\n", + "### Basic Hybrid Query\n", "\n", - "Let's search for \"running\" with both text and semantic search:" + "> NOTE: `HybridQuery` requires Redis >= 8.4.0 and redis-py >= 7.1.0.\n", + "\n", + "Let's search for \"running\" with both text and semantic search, combining the results' scores using a linear combination:" ] }, { "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2025-11-21T21:27:57.725041Z", - "start_time": "2025-11-21T21:27:57.719775Z" + "end_time": "2025-12-15T09:27:44.599394Z", + "start_time": "2025-12-15T09:27:44.589681Z" + } + }, + "source": [ + "if HYBRID_SEARCH_AVAILABLE:\n", + " from redisvl.query import HybridQuery\n", + "\n", + " # Create a hybrid query\n", + " hybrid_query = HybridQuery(\n", + " text=\"running shoes\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.1, 0.2, 0.1], # Query vector\n", + " vector_field_name=\"text_embedding\",\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5,\n", + " yield_text_score_as=\"text_score\",\n", + " yield_vsim_score_as=\"vector_similarity\",\n", + " combination_method=\"LINEAR\",\n", + " yield_combined_score_as=\"hybrid_score\",\n", + " )\n", + "\n", + " results = index.query(hybrid_query)\n", + " result_print(results)\n", + "\n", + "else:\n", + " print(\"Hybrid search is not available in this version of Redis/redis-py.\")" + ], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/vishal.bala/PycharmProjects/redis-vl-python/redisvl/query/hybrid.py:133: UserWarning: HybridPostProcessingConfig is an experimental and may change or be removed in future versions.\n", + " self.postprocessing_config = HybridPostProcessingConfig()\n", + "/Users/vishal.bala/PycharmProjects/redis-vl-python/redisvl/query/hybrid.py:237: UserWarning: HybridSearchQuery is an experimental and may change or be removed in future versions.\n", + " search_query = HybridSearchQuery(\n", + "/Users/vishal.bala/PycharmProjects/redis-vl-python/redisvl/query/hybrid.py:278: UserWarning: HybridVsimQuery is an experimental and may change or be removed in future versions.\n", + " vsim_query = HybridVsimQuery(\n", + "/Users/vishal.bala/PycharmProjects/redis-vl-python/redisvl/query/hybrid.py:352: UserWarning: CombineResultsMethod is an experimental and may change or be removed in future versions.\n", + " return CombineResultsMethod(\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
text_scoreproduct_idbrief_descriptioncategorypricevector_similarityhybrid_score
6.13471368888prod_1comfortable running shoes for athletesfootwear89.990.9999999701982.5404140858
6.13471368888prod_1comfortable running shoes for athletesfootwear89.990.9999999701982.5404140858
2.1486121997prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357141.34113527491
2.1486121997prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357141.34113527491
2.10296000184prod_2lightweight running jacket with water resistanceouterwear129.990.9950737357141.32743961555
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 19 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "For earlier versions of Redis, you can use `AggregateHybridQuery` instead:" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.619601Z", + "start_time": "2025-12-15T09:27:44.606514Z" } }, + "cell_type": "code", "source": [ "from redisvl.query import AggregateHybridQuery\n", "\n", - "# Create a hybrid query\n", - "hybrid_query = AggregateHybridQuery(\n", + "agg_hybrid_query = AggregateHybridQuery(\n", " text=\"running shoes\",\n", " text_field_name=\"brief_description\",\n", " vector=[0.1, 0.2, 0.1], # Query vector\n", @@ -1025,7 +1120,7 @@ " num_results=5\n", ")\n", "\n", - "results = index.query(hybrid_query)\n", + "results = index.query(agg_hybrid_query)\n", "result_print(results)" ], "outputs": [ @@ -1035,7 +1130,7 @@ "" ], "text/html": [ - "
vector_distanceproduct_idbrief_descriptioncategorypricevector_similaritytext_scorehybrid_score
5.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701984.829774426092.14893230697
5.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701984.829774426092.14893230697
5.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701984.829774426092.14893230697
0.0038834810257prod_4yoga mat with extra cushioning for comfortaccessories39.990.99805825948700.698640781641
0.0038834810257prod_4yoga mat with extra cushioning for comfortaccessories39.990.99805825948700.698640781641
" + "
vector_distanceproduct_idbrief_descriptioncategorypricevector_similaritytext_scorehybrid_score
5.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701986.134713688882.5404140858
5.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701986.134713688882.5404140858
0.00985252857208prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357142.14861219971.34113527491
0.0038834810257prod_4yoga mat with extra cushioning for comfortaccessories39.990.99805825948700.698640781641
0.0038834810257prod_4yoga mat with extra cushioning for comfortaccessories39.990.99805825948700.698640781641
" ] }, "metadata": {}, @@ -1045,7 +1140,7 @@ } } ], - "execution_count": 23 + "execution_count": 20 }, { "cell_type": "markdown", @@ -1053,18 +1148,83 @@ "source": [ "### Adjusting the Alpha Parameter\n", "\n", - "The `alpha` parameter controls the weight between vector and text search:\n", - "- `alpha=1.0`: Pure vector search\n", - "- `alpha=0.0`: Pure text search\n", - "- `alpha=0.7` (default): 70% vector, 30% text" + "Results are scored using a weighted combination:\n", + "\n", + "```\n", + "hybrid_score = (alpha) * text_score + (1 - alpha) * vector_score\n", + "```\n", + "\n", + "Where `alpha` controls the balance between text and vector search (default: 0.3 for `HybridQuery` and 0.7 for `AggregateHybridQuery`). Note that `AggregateHybridQuery` reverses the definition of `alpha` to be the weight of the vector score.\n", + "\n", + "The `alpha` parameter controls the weight between text and vector search:\n", + "- `alpha=1.0`: Pure text search (or pure vector search for `AggregateHybridQuery`)\n", + "- `alpha=0.0`: Pure vector search (or pure text search for `AggregateHybridQuery`)\n", + "- `alpha=0.3` (default - `HybridQuery`): 30% text, 70% vector" ] }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.636058Z", + "start_time": "2025-12-15T09:27:44.621406Z" + } + }, + "cell_type": "code", + "source": [ + "if HYBRID_SEARCH_AVAILABLE:\n", + " vector_heavy_query = HybridQuery(\n", + " text=\"comfortable\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.15, 0.25, 0.15],\n", + " vector_field_name=\"text_embedding\",\n", + " combination_method=\"LINEAR\",\n", + "\t\tlinear_alpha=0.1, # 10% text, 90% vector\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3,\n", + " yield_text_score_as=\"text_score\",\n", + " yield_vsim_score_as=\"vector_similarity\",\n", + " yield_combined_score_as=\"hybrid_score\",\n", + " )\n", + "\n", + " print(\"Results with alpha=0.1 (vector-heavy):\")\n", + " results = index.query(vector_heavy_query)\n", + " result_print(results)\n", + "\n", + "else:\n", + " print(\"Hybrid search is not available in this version of Redis/redis-py.\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with alpha=0.1 (vector-heavy):\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
text_scoreproduct_idbrief_descriptionvector_similarityhybrid_score
3.37571305608prod_1comfortable running shoes for athletes0.9980582594871.23582373915
3.37571305608prod_1comfortable running shoes for athletes0.9980582594871.23582373915
1.63406294896prod_4yoga mat with extra cushioning for comfort1.00000005961.06340634854
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 21 + }, { "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2025-11-21T21:28:02.908824Z", - "start_time": "2025-11-21T21:28:02.902585Z" + "end_time": "2025-12-15T09:27:44.651595Z", + "start_time": "2025-12-15T09:27:44.643458Z" } }, "source": [ @@ -1097,7 +1257,7 @@ "" ], "text/html": [ - "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
-1.19209289551e-07prod_4yoga mat with extra cushioning for comfort1.00000005961.538380705411.05383812419
-1.19209289551e-07prod_4yoga mat with extra cushioning for comfort1.00000005961.538380705411.05383812419
-1.19209289551e-07prod_4yoga mat with extra cushioning for comfort1.00000005961.538380705411.05383812419
" + "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
-1.19209289551e-07prod_4yoga mat with extra cushioning for comfort1.00000005961.634062948961.06340634854
-1.19209289551e-07prod_4yoga mat with extra cushioning for comfort1.00000005961.634062948961.06340634854
0.00136888027191prod_5basketball shoes with excellent ankle support0.99931555986400.899384003878
" ] }, "metadata": {}, @@ -1107,23 +1267,141 @@ } } ], - "execution_count": 24 + "execution_count": 22 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "##" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Reciprocal Rank Fusion (RRF)\n", + "\n", + "In addition to combining scores using a linear combination, `HybridQuery` also supports reciprocal rank fusion (RRF) for combining scores. This method is useful when you want to combine scores giving more weight to the top results from each query.\n", + "\n", + "`HybridQuery` allows for the following parameters to be specified for RRF:\n", + "- `rrf_window`: The window size to use for the RRF combination method. Limits the fusion scope.\n", + "- `rrf_constant`: The constant to use for the RRF combination method. Controls the decay of rank influence.\n", + "\n", + "`AggregateHybridQuery` does not support RRF, and only supports a linear combination of scores." + ] + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.664953Z", + "start_time": "2025-12-15T09:27:44.658451Z" + } + }, + "cell_type": "code", + "source": [ + "if HYBRID_SEARCH_AVAILABLE:\n", + " rrf_query = HybridQuery(\n", + " text=\"comfortable\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.15, 0.25, 0.15],\n", + " vector_field_name=\"text_embedding\",\n", + " combination_method=\"RRF\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3,\n", + " yield_text_score_as=\"text_score\",\n", + " yield_vsim_score_as=\"vector_similarity\",\n", + " yield_combined_score_as=\"hybrid_score\",\n", + " )\n", + "\n", + " results = index.query(rrf_query)\n", + " result_print(results)\n", + "\n", + "else:\n", + " print(\"Hybrid search is not available in this version of Redis/redis-py.\")" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
text_scoreproduct_idbrief_descriptionvector_similarityhybrid_score
1.63406294896prod_4yoga mat with extra cushioning for comfort1.00000005960.032266458496
1.63406294896prod_4yoga mat with extra cushioning for comfort1.00000005960.0317540322581
3.37571305608prod_1comfortable running shoes for athletes0.9980582594870.0313188157573
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 23 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Aggregate Hybrid Query with Filters\n", + "### Hybrid Query with Filters\n", "\n", "You can also combine hybrid search with filters:" ] }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.686355Z", + "start_time": "2025-12-15T09:27:44.672035Z" + } + }, + "cell_type": "code", + "source": [ + "if HYBRID_SEARCH_AVAILABLE:\n", + " # Hybrid search with a price filter\n", + " filtered_hybrid_query = HybridQuery(\n", + " text=\"professional equipment\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.9, 0.1, 0.05],\n", + " vector_field_name=\"text_embedding\",\n", + " filter_expression=Num(\"price\") > 100,\n", + " return_fields=[\"product_id\", \"brief_description\", \"category\", \"price\"],\n", + " num_results=5,\n", + " combination_method=\"LINEAR\",\n", + " yield_text_score_as=\"text_score\",\n", + " yield_vsim_score_as=\"vector_similarity\",\n", + " yield_combined_score_as=\"hybrid_score\",\n", + " )\n", + "\n", + " results = index.query(filtered_hybrid_query)\n", + " result_print(results)\n", + "\n", + "else:\n", + " print(\"Hybrid search is not available in this version of Redis/redis-py.\")" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
text_scoreproduct_idbrief_descriptioncategorypricevector_similarityhybrid_score
3.30321812336prod_3professional tennis racket for competitive playersequipment199.991.00000005961.69096547873
3.30321812336prod_3professional tennis racket for competitive playersequipment199.991.00000005961.69096547873
0prod_2lightweight running jacket with water resistanceouterwear129.990.7941712737080.555919891596
0prod_5basketball shoes with excellent ankle supportfootwear139.990.7941712737080.555919891596
0prod_2lightweight running jacket with water resistanceouterwear129.990.7941712737080.555919891596
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 24 + }, { "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2025-11-21T21:28:04.309151Z", - "start_time": "2025-11-21T21:28:04.302860Z" + "end_time": "2025-12-15T09:27:44.702984Z", + "start_time": "2025-12-15T09:27:44.689201Z" } }, "source": [ @@ -1148,7 +1426,7 @@ "" ], "text/html": [ - "
vector_distanceproduct_idbrief_descriptioncategorypricevector_similaritytext_scorehybrid_score
-1.19209289551e-07prod_3professional tennis racket for competitive playersequipment199.991.00000005961.547237055061.16417115824
-1.19209289551e-07prod_3professional tennis racket for competitive playersequipment199.991.00000005961.547237055061.16417115824
-1.19209289551e-07prod_3professional tennis racket for competitive playersequipment199.991.00000005961.547237055061.16417115824
0.411657452583prod_2lightweight running jacket with water resistanceouterwear129.990.79417127370800.555919891596
0.411657452583prod_2lightweight running jacket with water resistanceouterwear129.990.79417127370800.555919891596
" + "
vector_distanceproduct_idbrief_descriptioncategorypricevector_similaritytext_scorehybrid_score
-1.19209289551e-07prod_3professional tennis racket for competitive playersequipment199.991.00000005963.303218123361.69096547873
-1.19209289551e-07prod_3professional tennis racket for competitive playersequipment199.991.00000005963.303218123361.69096547873
0.411657452583prod_2lightweight running jacket with water resistanceouterwear129.990.79417127370800.555919891596
0.411657452583prod_5basketball shoes with excellent ankle supportfootwear139.990.79417127370800.555919891596
0.411657452583prod_2lightweight running jacket with water resistanceouterwear129.990.79417127370800.555919891596
" ] }, "metadata": {}, @@ -1166,15 +1444,65 @@ "source": [ "### Using Different Text Scorers\n", "\n", - "AggregateHybridQuery supports the same text scoring algorithms as TextQuery:" + "Hybrid queries support the same text scoring algorithms as TextQuery:" ] }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.715Z", + "start_time": "2025-12-15T09:27:44.706463Z" + } + }, + "cell_type": "code", + "source": [ + "if HYBRID_SEARCH_AVAILABLE:\n", + " # Aggregate Hybrid query with TFIDF scorer\n", + " hybrid_tfidf = HybridQuery(\n", + " text=\"shoes support\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.12, 0.18, 0.12],\n", + " vector_field_name=\"text_embedding\",\n", + " text_scorer=\"TFIDF\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3,\n", + " combination_method=\"LINEAR\",\n", + " yield_text_score_as=\"text_score\",\n", + " yield_vsim_score_as=\"vector_similarity\",\n", + " yield_combined_score_as=\"hybrid_score\",\n", + " )\n", + "\n", + " results = index.query(hybrid_tfidf)\n", + " result_print(results)\n", + "\n", + "else:\n", + " print(\"Hybrid search is not available in this version of Redis/redis-py.\")" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
text_scoreproduct_idbrief_descriptionvector_similarityhybrid_score
2.66666666667prod_1comfortable running shoes for athletes0.9950737357141.496551615
2.66666666667prod_1comfortable running shoes for athletes0.9950737357141.496551615
1.33333333333prod_5basketball shoes with excellent ankle support11.1
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 26 + }, { "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2025-11-21T21:28:05.455328Z", - "start_time": "2025-11-21T21:28:05.450590Z" + "end_time": "2025-12-15T09:27:44.728396Z", + "start_time": "2025-12-15T09:27:44.721838Z" } }, "source": [ @@ -1199,7 +1527,7 @@ "" ], "text/html": [ - "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
0prod_5basketball shoes with excellent ankle support131.6
0prod_2lightweight running jacket with water resistance100.7
0prod_2lightweight running jacket with water resistance100.7
" + "
vector_distanceproduct_idbrief_descriptionvector_similaritytext_scorehybrid_score
0prod_5basketball shoes with excellent ankle support141.9
0prod_2lightweight running jacket with water resistance100.7
0prod_2lightweight running jacket with water resistance100.7
" ] }, "metadata": {}, @@ -1209,7 +1537,7 @@ } } ], - "execution_count": 26 + "execution_count": 27 }, { "metadata": {}, @@ -1219,10 +1547,11 @@ "\n", "**Important:** `AggregateHybridQuery` uses FT.AGGREGATE commands which do NOT support runtime parameters.\n", "\n", - "Runtime parameters (such as `ef_runtime` for HNSW indexes or `search_window_size` for SVS-VAMANA indexes) are only supported with FT.SEARCH commands.\n", + "Runtime parameters (such as `ef_runtime` for HNSW indexes or `search_window_size` for SVS-VAMANA indexes) are only supported with FT.SEARCH (and partially FT.HYBRID) commands.\n", "\n", - "**For runtime parameter support, use `VectorQuery` or `VectorRangeQuery` instead:**\n", + "**For runtime parameter support, use `HybridQuery`, `VectorQuery`, or `VectorRangeQuery` instead:**\n", "\n", + "- `HybridQuery`: Supports `ef_runtime` for HNSW indexes\n", "- `VectorQuery`: Supports all runtime parameters (HNSW and SVS-VAMANA)\n", "- `VectorRangeQuery`: Supports all runtime parameters (HNSW and SVS-VAMANA)\n", "- `AggregateHybridQuery`: Does NOT support runtime parameters (uses FT.AGGREGATE)\n", @@ -1256,7 +1585,12 @@ }, { "cell_type": "code", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.742916Z", + "start_time": "2025-12-15T09:27:44.736787Z" + } + }, "source": [ "from redisvl.query import MultiVectorQuery, Vector\n", "\n", @@ -1285,8 +1619,24 @@ "results = index.query(multi_vector_query)\n", "result_print(results)" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
distance_0distance_1product_idbrief_descriptioncategoryscore_0score_1combined_score
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletesfootwear0.9999999701980.9999999701980.999999970198
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletesfootwear0.9999999701980.9999999701980.999999970198
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle supportfootwear0.9950737357140.9986668527130.996151670814
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle supportfootwear0.9950737357140.9986668527130.996151670814
0.009852528572080.0118260979652prod_2lightweight running jacket with water resistanceouterwear0.9950737357140.9940869510170.994777700305
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 28 }, { "cell_type": "markdown", @@ -1299,7 +1649,12 @@ }, { "cell_type": "code", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.764858Z", + "start_time": "2025-12-15T09:27:44.749806Z" + } + }, "source": [ "# More emphasis on image similarity\n", "text_vec = Vector(\n", @@ -1326,8 +1681,31 @@ "results = index.query(image_heavy_query)\n", "result_print(results)" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Results with emphasis on image similarity:\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
distance_0distance_1product_idbrief_descriptioncategoryscore_0score_1combined_score
-1.19209289551e-070prod_3professional tennis racket for competitive playersequipment1.000000059611.00000001192
-1.19209289551e-070prod_3professional tennis racket for competitive playersequipment1.000000059611.00000001192
0.145393729210.00900757312775prod_6swimming goggles with anti-fog coatingaccessories0.9273031353950.9954962134360.981857597828
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 29 }, { "cell_type": "markdown", @@ -1340,7 +1718,12 @@ }, { "cell_type": "code", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.786971Z", + "start_time": "2025-12-15T09:27:44.772064Z" + } + }, "source": [ "# Multi-vector search with category filter\n", "text_vec = Vector(\n", @@ -1367,8 +1750,24 @@ "results = index.query(filtered_multi_query)\n", "result_print(results)" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
distance_0distance_1product_idbrief_descriptioncategorypricescore_0score_1combined_score
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701980.9999999701980.999999970198
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletesfootwear89.990.9999999701980.9999999701980.999999970198
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357140.9986668527130.996510982513
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle supportfootwear139.990.9950737357140.9986668527130.996510982513
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 30 }, { "cell_type": "markdown", @@ -1381,7 +1780,12 @@ }, { "cell_type": "code", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.804045Z", + "start_time": "2025-12-15T09:27:44.788480Z" + } + }, "source": [ "# TextQuery - keyword-based search\n", "text_q = TextQuery(\n", @@ -1395,49 +1799,135 @@ "result_print(index.query(text_q))\n", "print()" ], - "outputs": [], - "execution_count": null + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TextQuery Results (keyword-based):\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
scoreproduct_idbrief_description
2.9647332596813154prod_1comfortable running shoes for athletes
2.9647332596813154prod_1comfortable running shoes for athletes
2.148612199701887prod_5basketball shoes with excellent ankle support
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "execution_count": 31 }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-11-21T00:42:13.860414Z", "iopub.status.busy": "2025-11-21T00:42:13.860347Z", "iopub.status.idle": "2025-11-21T00:42:13.864887Z", "shell.execute_reply": "2025-11-21T00:42:13.864461Z" + }, + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.832484Z", + "start_time": "2025-12-15T09:27:44.811853Z" } }, - "outputs": [], "source": [ - "# AggregateHybridQuery - combines text and vector search\n", - "hybrid_q = AggregateHybridQuery(\n", - " text=\"shoes\",\n", - " text_field_name=\"brief_description\",\n", - " vector=[0.1, 0.2, 0.1],\n", - " vector_field_name=\"text_embedding\",\n", - " return_fields=[\"product_id\", \"brief_description\"],\n", - " num_results=3\n", - ")\n", - "\n", - "print(\"AggregateHybridQuery Results (text + vector):\")\n", - "result_print(index.query(hybrid_q))\n", + "if HYBRID_SEARCH_AVAILABLE:\n", + " # HybridQuery - combines text and vector search\n", + " hybrid_q = HybridQuery(\n", + " text=\"shoes\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.1, 0.2, 0.1],\n", + " vector_field_name=\"text_embedding\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3,\n", + " combination_method=\"LINEAR\",\n", + " yield_text_score_as=\"text_score\",\n", + " yield_vsim_score_as=\"vector_similarity\",\n", + " yield_combined_score_as=\"hybrid_score\",\n", + " )\n", + "\n", + " results = index.query(hybrid_q)\n", + "\n", + "else:\n", + " hybrid_q = AggregateHybridQuery(\n", + " text=\"shoes\",\n", + " text_field_name=\"brief_description\",\n", + " vector=[0.1, 0.2, 0.1],\n", + " vector_field_name=\"text_embedding\",\n", + " return_fields=[\"product_id\", \"brief_description\"],\n", + " num_results=3,\n", + " )\n", + "\n", + " results = index.query(hybrid_q)\n", + "\n", + "\n", + "print(f\"{hybrid_q.__class__.__name__} Results (text + vector):\")\n", + "result_print(results)\n", "print()" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HybridQuery Results (text + vector):\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
text_scoreproduct_idbrief_descriptionvector_similarityhybrid_score
2.96473325968prod_1comfortable running shoes for athletes0.9999999701981.58941995704
2.96473325968prod_1comfortable running shoes for athletes0.9999999701981.58941995704
2.1486121997prod_5basketball shoes with excellent ankle support0.9950737357141.34113527491
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "execution_count": 32 }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2025-11-21T00:42:13.865922Z", "iopub.status.busy": "2025-11-21T00:42:13.865857Z", "iopub.status.idle": "2025-11-21T00:42:13.869441Z", "shell.execute_reply": "2025-11-21T00:42:13.868990Z" + }, + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.853040Z", + "start_time": "2025-12-15T09:27:44.833266Z" } }, - "outputs": [], "source": [ "# MultiVectorQuery - searches multiple vector fields\n", "mv_text = Vector(\n", @@ -1462,7 +1952,32 @@ "\n", "print(\"MultiVectorQuery Results (multiple vectors):\")\n", "result_print(index.query(multi_q))" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MultiVectorQuery Results (multiple vectors):\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
distance_0distance_1product_idbrief_descriptionscore_0score_1combined_score
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletes0.9999999701980.9999999701980.999999970198
5.96046447754e-085.96046447754e-08prod_1comfortable running shoes for athletes0.9999999701980.9999999701980.999999970198
0.009852528572080.00266629457474prod_5basketball shoes with excellent ankle support0.9950737357140.9986668527130.996870294213
" + ] + }, + "metadata": {}, + "output_type": "display_data", + "jetTransient": { + "display_id": null + } + } + ], + "execution_count": 33 }, { "cell_type": "markdown", @@ -1478,7 +1993,7 @@ " - When text relevance scoring is important\n", " - Example: Product search, document retrieval\n", "\n", - "2. **`AggregateHybridQuery`**:\n", + "2. **`HybridQuery`**:\n", " - When you want to combine keyword and semantic search\n", " - For improved search quality over pure text or vector search\n", " - When you have both text and vector representations of your data\n", @@ -1493,13 +2008,18 @@ }, { "cell_type": "code", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2025-12-15T09:27:44.865832Z", + "start_time": "2025-12-15T09:27:44.861322Z" + } + }, "source": [ "# Cleanup\n", "index.delete()" ], "outputs": [], - "execution_count": null + "execution_count": 34 } ], "metadata": { @@ -1522,4 +2042,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/pyproject.toml b/pyproject.toml index 5e07da47..359a3284 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ dependencies = [ "numpy>=1.26.0,<3", "pyyaml>=5.4,<7.0", - "redis>=5.0,<7.0", + "redis>=5.0,<7.2", "pydantic>=2,<3", "tenacity>=8.2.2", "ml-dtypes>=0.4.0,<1.0.0", diff --git a/redisvl/index/index.py b/redisvl/index/index.py index dcfaaab6..d3919f8d 100644 --- a/redisvl/index/index.py +++ b/redisvl/index/index.py @@ -28,6 +28,7 @@ from redis.asyncio.cluster import RedisCluster as AsyncRedisCluster from redis.cluster import RedisCluster +from redisvl.query.hybrid import HybridQuery from redisvl.query.query import VectorQuery from redisvl.redis.utils import ( _keys_share_hash_tag, @@ -101,6 +102,8 @@ logger = get_logger(__name__) +_HYBRID_SEARCH_ERROR_MESSAGE = "Hybrid search is not available in this version of redis-py. Please upgrade to redis-py >= 7.1.0." + REQUIRED_MODULES_FOR_INTROSPECTION = [ {"name": "search", "ver": 20810}, @@ -278,6 +281,18 @@ def _validate_query(self, query: BaseQuery) -> None: stacklevel=3, ) + def _validate_hybrid_query(self, query: Any) -> None: + """Validate that a hybrid query can be executed.""" + try: + from redis.commands.search.hybrid_result import HybridResult + + from redisvl.query.hybrid import HybridQuery + except (ImportError, ModuleNotFoundError): + raise ImportError(_HYBRID_SEARCH_ERROR_MESSAGE) + + if not isinstance(query, HybridQuery): + raise TypeError(f"query must be of type HybridQuery, got {type(query)}") + @property def name(self) -> str: """The name of the Redis search index.""" @@ -1003,6 +1018,56 @@ def search(self, *args, **kwargs) -> "Result": except Exception as e: raise RedisSearchError(f"Unexpected error while searching: {str(e)}") from e + def _hybrid_search(self, query: HybridQuery, **kwargs) -> List[Dict[str, Any]]: + """Perform a hybrid search against the index, combining text and vector search. + + Args: + query (HybridQuery): The text+vector search query to be performed, with configurable fusion methods and + post-processing. + kwargs: Additional arguments to pass to the redis-py hybrid_search method (e.g. timeout). + + Returns: + List[Dict[str, Any]]: The search results ordered by combined score unless otherwise specified. + + Notes: + Hybrid search is only available in Redis 8.4.0+, and requires redis-py >= 7.1.0. + + See Also: + - `FT.HYBRID command documentation `_ + - `redis-py hybrid_search documentation `_ + + .. code-block:: python + + from redisvl.query import HybridQuery + + hybrid_query = HybridQuery( + text="lorem ipsum dolor sit amet", + text_field_name="description", + vector=[0.1, 0.2, 0.3], + vector_field_name="embedding" + ) + + results = index.query(hybrid_query) + + """ + index = self._redis_client.ft(self.schema.index.name) + self._validate_hybrid_query(query) + + if not hasattr(index, "hybrid_search"): + raise ImportError(_HYBRID_SEARCH_ERROR_MESSAGE) + + results = index.hybrid_search( + query=query.query, + combine_method=query.combination_method, + post_processing=( + query.postprocessing_config + if query.postprocessing_config.build_args() + else None + ), + **kwargs, + ) # type: ignore + return [convert_bytes(r) for r in results.results] # type: ignore[union-attr] + def batch_query( self, queries: Sequence[BaseQuery], batch_size: int = 10 ) -> List[List[Dict[str, Any]]]: @@ -1028,14 +1093,16 @@ def _query(self, query: BaseQuery) -> List[Dict[str, Any]]: results = self.search(query.query, query_params=query.params) return process_results(results, query=query, schema=self.schema) - def query(self, query: Union[BaseQuery, AggregationQuery]) -> List[Dict[str, Any]]: + def query( + self, query: Union[BaseQuery, AggregationQuery, HybridQuery] + ) -> List[Dict[str, Any]]: """Execute a query on the index. - This method takes a BaseQuery or AggregationQuery object directly, and + This method takes a BaseQuery, AggregationQuery, or HybridQuery object directly, and handles post-processing of the search. Args: - query (Union[BaseQuery, AggregateQuery]): The query to run. + query (Union[BaseQuery, AggregateQuery, HybridQuery]): The query to run. Returns: List[Result]: A list of search results. @@ -1055,6 +1122,8 @@ def query(self, query: Union[BaseQuery, AggregationQuery]) -> List[Dict[str, Any """ if isinstance(query, AggregationQuery): return self._aggregate(query) + elif isinstance(query, HybridQuery): + return self._hybrid_search(query) else: return self._query(query) @@ -1824,6 +1893,55 @@ async def search(self, *args, **kwargs) -> "Result": except Exception as e: raise RedisSearchError(f"Unexpected error while searching: {str(e)}") from e + async def _hybrid_search( + self, query: HybridQuery, **kwargs + ) -> List[Dict[str, Any]]: + """Perform a hybrid search against the index, combining text and vector search. + + Args: + query (HybridQuery): The text+vector search query to be performed, with configurable fusion methods and + post-processing. + kwargs: Additional arguments to pass to the redis-py hybrid_search method (e.g. timeout). + + Returns: + List[Dict[str, Any]]: The search results ordered by combined score unless otherwise specified. + + Notes: + Hybrid search is only available in Redis 8.4.0+, and requires redis-py >= 7.1.0. + + .. code-block:: python + + from redisvl.query import HybridQuery + + hybrid_query = HybridQuery( + text="lorem ipsum dolor sit amet", + text_field_name="description", + vector=[0.1, 0.2, 0.3], + vector_field_name="embedding" + ) + + results = await async_index.query(hybrid_query) + + """ + client = await self._get_client() + index = client.ft(self.schema.index.name) + self._validate_hybrid_query(query) + + if not hasattr(index, "hybrid_search"): + raise ImportError(_HYBRID_SEARCH_ERROR_MESSAGE) + + results = await index.hybrid_search( + query=query.query, + combine_method=query.combination_method, + post_processing=( + query.postprocessing_config + if query.postprocessing_config.build_args() + else None + ), + **kwargs, + ) # type: ignore + return [convert_bytes(r) for r in results.results] # type: ignore[union-attr] + async def batch_query( self, queries: List[BaseQuery], batch_size: int = 10 ) -> List[List[Dict[str, Any]]]: @@ -1855,15 +1973,15 @@ async def _query(self, query: BaseQuery) -> List[Dict[str, Any]]: return process_results(results, query=query, schema=self.schema) async def query( - self, query: Union[BaseQuery, AggregationQuery] + self, query: Union[BaseQuery, AggregationQuery, HybridQuery] ) -> List[Dict[str, Any]]: """Asynchronously execute a query on the index. - This method takes a BaseQuery or AggregationQuery object directly, runs + This method takes a BaseQuery, AggregationQuery, or HybridQuery object directly, runs the search, and handles post-processing of the search. Args: - query (Union[BaseQuery, AggregateQuery]): The query to run. + query (Union[BaseQuery, AggregateQuery, HybridQuery]): The query to run. Returns: List[Result]: A list of search results. @@ -1882,6 +2000,8 @@ async def query( """ if isinstance(query, AggregationQuery): return await self._aggregate(query) + elif isinstance(query, HybridQuery): + return await self._hybrid_search(query) else: return await self._query(query) diff --git a/redisvl/query/__init__.py b/redisvl/query/__init__.py index b561fe60..6b46b54f 100644 --- a/redisvl/query/__init__.py +++ b/redisvl/query/__init__.py @@ -1,10 +1,10 @@ from redisvl.query.aggregate import ( AggregateHybridQuery, AggregationQuery, - HybridQuery, MultiVectorQuery, Vector, ) +from redisvl.query.hybrid import HybridQuery from redisvl.query.query import ( BaseQuery, BaseVectorQuery, diff --git a/redisvl/query/aggregate.py b/redisvl/query/aggregate.py index 09981ca1..1f1da2ac 100644 --- a/redisvl/query/aggregate.py +++ b/redisvl/query/aggregate.py @@ -1,5 +1,5 @@ import warnings -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Union from pydantic import BaseModel, field_validator, model_validator from redis.commands.search.aggregation import AggregateRequest, Desc @@ -8,7 +8,7 @@ from redisvl.query.filter import FilterExpression from redisvl.redis.utils import array_to_buffer from redisvl.schema.fields import VectorDataType -from redisvl.utils.token_escaper import TokenEscaper +from redisvl.utils.full_text_query_helper import FullTextQueryHelper from redisvl.utils.utils import lazy_import nltk = lazy_import("nltk") @@ -124,7 +124,7 @@ def __init__( num_results (int, optional): The number of results to return. Defaults to 10. return_fields (Optional[List[str]], optional): The fields to return. Defaults to None. stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the - provided text prior to searchuse. If a string such as "english" "german" is + provided text prior to search-use. If a string such as "english" "german" is provided then a default set of stopwords for that language will be used. if a list, set, or tuple of strings is provided then those will be used as stopwords. Defaults to "english". if set to "None" then no stopwords will be removed. @@ -159,8 +159,11 @@ def __init__( self._alpha = alpha self._dtype = dtype self._num_results = num_results - self._set_stopwords(stopwords) - self._text_weights = self._parse_text_weights(text_weights) + + self._ft_helper = FullTextQueryHelper( + stopwords=stopwords, + text_weights=text_weights, + ) query_string = self._build_query_string() super().__init__(query_string) @@ -198,115 +201,31 @@ def stopwords(self) -> Set[str]: Returns: Set[str]: The stopwords used in the query. """ - return self._stopwords.copy() if self._stopwords else set() - - def _set_stopwords(self, stopwords: Optional[Union[str, Set[str]]] = "english"): - """Set the stopwords to use in the query. - Args: - stopwords (Optional[Union[str, Set[str]]]): The stopwords to use. If a string - such as "english" "german" is provided then a default set of stopwords for that - language will be used. if a list, set, or tuple of strings is provided then those - will be used as stopwords. Defaults to "english". if set to "None" then no stopwords - will be removed. - - Raises: - TypeError: If the stopwords are not a set, list, or tuple of strings. - """ - if not stopwords: - self._stopwords = set() - elif isinstance(stopwords, str): - try: - nltk.download("stopwords", quiet=True) - self._stopwords = set(nltk_stopwords.words(stopwords)) - except ImportError: - raise ValueError( - f"Loading stopwords for {stopwords} failed: nltk is not installed." - ) - except Exception as e: - raise ValueError(f"Error trying to load {stopwords} from nltk. {e}") - elif isinstance(stopwords, (Set, List, Tuple)) and all( # type: ignore - isinstance(word, str) for word in stopwords - ): - self._stopwords = set(stopwords) - else: - raise TypeError("stopwords must be a set, list, or tuple of strings") + return self._ft_helper.stopwords - def _tokenize_and_escape_query(self, user_query: str) -> str: - """Convert a raw user query to a redis full text query joined by ORs - Args: - user_query (str): The user query to tokenize and escape. + @property + def text_weights(self) -> Dict[str, float]: + """Get the text weights. Returns: - str: The tokenized and escaped query string. - - Raises: - ValueError: If the text string becomes empty after stopwords are removed. + Dictionary of word:weight mappings. """ - escaper = TokenEscaper() - - tokens = [ - escaper.escape( - token.strip().strip(",").replace("“", "").replace("”", "").lower() - ) - for token in user_query.split() - ] - - token_list = [ - token for token in tokens if token and token not in self._stopwords - ] - for i, token in enumerate(token_list): - if token in self._text_weights: - token_list[i] = f"{token}=>{{$weight:{self._text_weights[token]}}}" - - if not token_list: - raise ValueError("text string cannot be empty after removing stopwords") - return " | ".join(token_list) - - def _parse_text_weights( - self, weights: Optional[Dict[str, float]] - ) -> Dict[str, float]: - parsed_weights: Dict[str, float] = {} - if not weights: - return parsed_weights - for word, weight in weights.items(): - word = word.strip().lower() - if not word or " " in word: - raise ValueError( - f"Only individual words may be weighted. Got {{ {word}:{weight} }}" - ) - if ( - not (isinstance(weight, float) or isinstance(weight, int)) - or weight < 0.0 - ): - raise ValueError( - f"Weights must be positive number. Got {{ {word}:{weight} }}" - ) - parsed_weights[word] = weight - return parsed_weights + return self._ft_helper.text_weights def set_text_weights(self, weights: Dict[str, float]): """Set or update the text weights for the query. Args: - text_weights: Dictionary of word:weight mappings + weights: Dictionary of word:weight mappings """ - self._text_weights = self._parse_text_weights(weights) + self._ft_helper.set_text_weights(weights) self._query = self._build_query_string() - @property - def text_weights(self) -> Dict[str, float]: - """Get the text weights. - - Returns: - Dictionary of word:weight mappings. - """ - return self._text_weights - def _build_query_string(self) -> str: """Build the full query string for text search with optional filtering.""" - filter_expression = self._filter_expression - if isinstance(self._filter_expression, FilterExpression): - filter_expression = str(self._filter_expression) + text = self._ft_helper.build_query_string( + self._text, self._text_field, self._filter_expression + ) # Build KNN query knn_query = ( @@ -316,38 +235,13 @@ def _build_query_string(self) -> str: # Add distance field alias knn_query += f" AS {self.DISTANCE_ID}" - text = f"(~@{self._text_field}:({self._tokenize_and_escape_query(self._text)})" - - if filter_expression and filter_expression != "*": - text += f" AND {filter_expression}" - - return f"{text})=>[{knn_query}]" + return f"{text}=>[{knn_query}]" def __str__(self) -> str: """Return the string representation of the query.""" return " ".join([str(x) for x in self.build_args()]) -class HybridQuery(AggregateHybridQuery): - """Backward compatibility wrapper for AggregateHybridQuery. - - .. deprecated:: - HybridQuery is a backward compatibility wrapper around AggregateHybridQuery - and will eventually be replaced with a new hybrid query implementation. - To maintain current functionality please use AggregateHybridQuery directly.", - """ - - def __init__(self, *args, **kwargs): - warnings.warn( - "HybridQuery is a backward compatibility wrapper around AggregateHybridQuery " - "and will eventually be replaced with a new hybrid query implementation. " - "To maintain current functionality please use AggregateHybridQuery directly.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) - - class MultiVectorQuery(AggregationQuery): """ MultiVectorQuery allows for search over multiple vector fields in a document simultaneously. diff --git a/redisvl/query/hybrid.py b/redisvl/query/hybrid.py new file mode 100644 index 00000000..cfcb0c93 --- /dev/null +++ b/redisvl/query/hybrid.py @@ -0,0 +1,363 @@ +from typing import Any, Dict, List, Literal, Optional, Set, Union + +from redis.commands.search.query import Filter + +from redisvl.query.filter import FilterExpression +from redisvl.redis.utils import array_to_buffer +from redisvl.utils.full_text_query_helper import FullTextQueryHelper + +_IMPORT_ERROR_MESSAGE = "Hybrid queries require Redis >= 8.4.0 and redis-py>=7.1.0" + + +class HybridQuery: + """ + A hybrid search query that combines text search and vector similarity, with configurable fusion methods. + + .. code-block:: python + + from redisvl.query import HybridQuery + from redisvl.index import SearchIndex + + index = SearchIndex.from_yaml("path/to/index.yaml") + + query = HybridQuery( + text="example text", + text_field_name="text_field", + vector=[0.1, 0.2, 0.3], + vector_field_name="vector_field", + text_scorer="BM25STD", + yield_text_score_as="text_score", + yield_vsim_score_as="vector_similarity", + combination_method="LINEAR", + linear_alpha=0.3, + yield_combined_score_as="hybrid_score", + num_results=10, + return_fields=["field1", "field2"], + stopwords="english", + ) + + results = index.query(query) + + See Also: + - `FT.HYBRID command documentation `_ + - `redis-py hybrid_search documentation `_ + """ + + def __init__( + self, + text: str, + text_field_name: str, + vector: Union[bytes, List[float]], + vector_field_name: str, + text_scorer: str = "BM25STD", + yield_text_score_as: Optional[str] = None, + vector_search_method: Optional[Literal["KNN", "RANGE"]] = None, + knn_ef_runtime: int = 10, + range_radius: Optional[float] = None, + range_epsilon: float = 0.01, + yield_vsim_score_as: Optional[str] = None, + filter_expression: Optional[Union[str, FilterExpression]] = None, + combination_method: Optional[Literal["RRF", "LINEAR"]] = None, + rrf_window: int = 20, + rrf_constant: int = 60, + linear_alpha: float = 0.3, + yield_combined_score_as: Optional[str] = None, + dtype: str = "float32", + num_results: Optional[int] = 10, + return_fields: Optional[List[str]] = None, + stopwords: Optional[Union[str, Set[str]]] = "english", + text_weights: Optional[Dict[str, float]] = None, + ): + """ + Instantiates a HybridQuery object. + + Args: + text: The text to search for. + text_field_name: The text field name to search in. + vector: The vector to perform vector similarity search. + vector_field_name: The vector field name to search in. + text_scorer: The text scorer to use. Options are {TFIDF, TFIDF.DOCNORM, + BM25STD, BM25STD.NORM, BM25STD.TANH, DISMAX, DOCSCORE, HAMMING}. Defaults to "BM25STD". For more + information about supported scoring algorithms, + see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/scoring/ + yield_text_score_as: The name of the field to yield the text score as. + vector_search_method: The vector search method to use. Options are {KNN, RANGE}. Defaults to None. + knn_ef_runtime: The exploration factor parameter for HNSW, optional if `vector_search_method` is "KNN". + range_radius: The search radius to use, required if `vector_search_method` is "RANGE". + range_epsilon: The epsilon value to use, optional if `vector_search_method` is "RANGE"; defines the + accuracy of the search. + yield_vsim_score_as: The name of the field to yield the vector similarity score as. + filter_expression: The filter expression to use for both the text and vector searches. Defaults to None. + combination_method: The combination method to use. Options are {RRF, LINEAR}. If not specified, the server + defaults to RRF. If "RRF" is specified, then at least one of `rrf_window` or `rrf_constant` must be + provided. If "LINEAR" is specified, then at least one of `linear_alpha` or `linear_beta` must be + provided. + rrf_window: The window size to use for the reciprocal rank fusion (RRF) combination method. Limits + fusion scope. + rrf_constant: The constant to use for the reciprocal rank fusion (RRF) combination method. Controls decay + of rank influence. + linear_alpha: The weight of the text query for the linear combination method (LINEAR). + yield_combined_score_as: The name of the field to yield the combined score as. + dtype: The data type of the vector. Defaults to "float32". + num_results: The number of results to return. + return_fields: The fields to return. Defaults to None. + stopwords (Optional[Union[str, Set[str]]], optional): The stopwords to remove from the + provided text prior to search-use. If a string such as "english" "german" is + provided then a default set of stopwords for that language will be used. if a list, + set, or tuple of strings is provided then those will be used as stopwords. + Defaults to "english". if set to "None" then no stopwords will be removed. + + Note: This parameter controls query-time stopword filtering (client-side). + For index-level stopwords configuration (server-side), see IndexInfo.stopwords. + Using query-time stopwords with index-level STOPWORDS 0 is counterproductive. + text_weights (Optional[Dict[str, float]]): The importance weighting of individual words + within the query text. Defaults to None, as no modifications will be made to the + text_scorer score. + + Raises: + ImportError: If redis-py>=7.1.0 is not installed. + TypeError: If the stopwords are not a set, list, or tuple of strings. + ValueError: If the text string is empty, or if the text string becomes empty after + stopwords are removed. + ValueError: If `vector_search_method` is defined and isn't one of {KNN, RANGE}. + ValueError: If `vector_search_method` is "KNN" and `knn_k` is not provided. + ValueError: If `vector_search_method` is "RANGE" and `range_radius` is not provided. + """ + try: + from redis.commands.search.hybrid_query import ( + CombineResultsMethod, + HybridPostProcessingConfig, + ) + except (ImportError, ModuleNotFoundError): + raise ImportError(_IMPORT_ERROR_MESSAGE) + + self.postprocessing_config = HybridPostProcessingConfig() + if num_results: + self.postprocessing_config.limit(offset=0, num=num_results) + if return_fields: + self.postprocessing_config.load(*(f"@{f}" for f in return_fields)) + + self._ft_helper = FullTextQueryHelper( + stopwords=stopwords, + text_weights=text_weights, + ) + + query_string = self._ft_helper.build_query_string( + text, text_field_name, filter_expression + ) + + self.query = build_base_query( + text_query=query_string, + vector=vector, + vector_field_name=vector_field_name, + text_scorer=text_scorer, + yield_text_score_as=yield_text_score_as, + vector_search_method=vector_search_method, + num_results=num_results, + knn_ef_runtime=knn_ef_runtime, + range_radius=range_radius, + range_epsilon=range_epsilon, + yield_vsim_score_as=yield_vsim_score_as, + filter_expression=filter_expression, + dtype=dtype, + ) + + if combination_method: + self.combination_method: Optional[CombineResultsMethod] = ( + build_combination_method( + combination_method=combination_method, + rrf_window=rrf_window, + rrf_constant=rrf_constant, + linear_alpha=linear_alpha, + yield_score_as=yield_combined_score_as, + ) + ) + else: + self.combination_method = None + + +def build_base_query( + text_query: str, + vector: Union[bytes, List[float]], + vector_field_name: str, + text_scorer: str = "BM25STD", + yield_text_score_as: Optional[str] = None, + vector_search_method: Optional[Literal["KNN", "RANGE"]] = None, + num_results: Optional[int] = None, + knn_ef_runtime: Optional[int] = None, + range_radius: Optional[float] = None, + range_epsilon: Optional[float] = None, + yield_vsim_score_as: Optional[str] = None, + filter_expression: Optional[Union[str, FilterExpression]] = None, + dtype: str = "float32", +): + """Build a Redis HybridQuery for performing hybrid search. + + Args: + text_query: The query for the text search. + vector: The vector to perform vector similarity search. + vector_field_name: The vector field name to search in. + text_scorer: The text scorer to use. Options are {TFIDF, TFIDF.DOCNORM, + BM25STD, BM25STD.NORM, BM25STD.TANH, DISMAX, DOCSCORE, HAMMING}. Defaults to "BM25STD". For more + information about supported scroring algorithms, + see https://redis.io/docs/latest/develop/ai/search-and-query/advanced-concepts/scoring/ + yield_text_score_as: The name of the field to yield the text score as. + vector_search_method: The vector search method to use. Options are {KNN, RANGE}. Defaults to None. + num_results: The number of nearest neighbors to return, required if `vector_search_method` is "KNN". + knn_ef_runtime: The exploration factor parameter for HNSW, optional if `vector_search_method` is "KNN". + range_radius: The search radius to use, required if `vector_search_method` is "RANGE". + range_epsilon: The epsilon value to use, optional if `vector_search_method` is "RANGE"; defines the + accuracy of the search. + yield_vsim_score_as: The name of the field to yield the vector similarity score as. + filter_expression: The filter expression to use for the vector similarity search. Defaults to None. + dtype: The data type of the vector. Defaults to "float32". + + Notes: + If RRF combination method is used, then at least one of `rrf_window` or `rrf_constant` must be provided. + If LINEAR combination method is used, then at least one of `linear_alpha` or `linear_beta` must be provided. + + Raises: + ImportError: If redis-py>=7.1.0 is not installed. + ValueError: If `vector_search_method` is defined and isn't one of {KNN, RANGE}. + ValueError: If `vector_search_method` is "KNN" and `knn_k` is not provided. + ValueError: If `vector_search_method` is "RANGE" and `range_radius` is not provided. + + Returns: + A Redis HybridQuery object that defines the text and vector searches to be performed. + """ + try: + from redis.commands.search.hybrid_query import HybridQuery as RedisHybridQuery + from redis.commands.search.hybrid_query import ( + HybridSearchQuery, + HybridVsimQuery, + VectorSearchMethods, + ) + except (ImportError, ModuleNotFoundError): + raise ImportError(_IMPORT_ERROR_MESSAGE) + + # Serialize the full-text search query + search_query = HybridSearchQuery( + query_string=text_query, + scorer=text_scorer, + yield_score_as=yield_text_score_as, + ) + + if isinstance(vector, bytes): + vector_data = vector + else: + vector_data = array_to_buffer(vector, dtype) + + # Serialize vector similarity search method and params, if specified + vsim_search_method: Optional[VectorSearchMethods] = None + vsim_search_method_params: Dict[str, Any] = {} + if vector_search_method == "KNN": + vsim_search_method = VectorSearchMethods.KNN + if not num_results: + raise ValueError( + "Must provide `num_results` if vector_search_method is KNN" + ) + + vsim_search_method_params["K"] = num_results + if knn_ef_runtime: + vsim_search_method_params["EF_RUNTIME"] = knn_ef_runtime + + elif vector_search_method == "RANGE": + vsim_search_method = VectorSearchMethods.RANGE + if not range_radius: + raise ValueError("Must provide RADIUS if vector_search_method is RANGE") + + vsim_search_method_params["RADIUS"] = range_radius + if range_epsilon: + vsim_search_method_params["EPSILON"] = range_epsilon + + elif vector_search_method is not None: + raise ValueError(f"Unknown vector search method: {vector_search_method}") + + if isinstance(filter_expression, FilterExpression): + filter_expression = str(filter_expression) + + if filter_expression and filter_expression != "*": + vsim_filter = Filter("FILTER", str(filter_expression)) + else: + vsim_filter = None + + # Serialize the vector similarity query + vsim_query = HybridVsimQuery( + vector_field_name="@" + vector_field_name, + vector_data=vector_data, + vsim_search_method=vsim_search_method, + vsim_search_method_params=vsim_search_method_params, + filter=vsim_filter, + yield_score_as=yield_vsim_score_as, + ) + + return RedisHybridQuery( + search_query=search_query, + vector_similarity_query=vsim_query, + ) + + +def build_combination_method( + combination_method: Literal["RRF", "LINEAR"], + rrf_window: Optional[int] = None, + rrf_constant: Optional[float] = None, + linear_alpha: Optional[float] = None, + yield_score_as: Optional[str] = None, +): + """Build a configuration for combining hybrid search scores. + + Args: + combination_method: The combination method to use. Options are {RRF, LINEAR}. + rrf_window: The window size to use for the reciprocal rank fusion (RRF) combination method. Limits + fusion scope. + rrf_constant: The constant to use for the reciprocal rank fusion (RRF) combination method. Controls decay + of rank influence. + linear_alpha: The weight of the first query for the linear combination method (LINEAR). + yield_score_as: The name of the field to yield the combined score as. + + Raises: + ImportError: If redis-py>=7.1.0 is not installed. + ValueError: If `combination_method` is defined and isn't one of {RRF, LINEAR}. + ValueError: If `combination_method` is "RRF" and neither `rrf_window` nor `rrf_constant` is provided. + ValueError: If `combination_method` is "LINEAR" and neither `linear_alpha` nor `linear_beta` is provided. + + Returns: + A CombineResultsMethod object that defines how the text and vector scores should be combined. + """ + try: + from redis.commands.search.hybrid_query import ( + CombinationMethods, + CombineResultsMethod, + ) + except (ImportError, ModuleNotFoundError): + raise ImportError(_IMPORT_ERROR_MESSAGE) + + method_params: Dict[str, Any] = {} + if combination_method == "RRF": + method = CombinationMethods.RRF + if rrf_window: + method_params["WINDOW"] = rrf_window + if rrf_constant: + method_params["CONSTANT"] = rrf_constant + + elif combination_method == "LINEAR": + method = CombinationMethods.LINEAR + if linear_alpha: + method_params["ALPHA"] = linear_alpha + method_params["BETA"] = 1 - linear_alpha + + else: + raise ValueError(f"Unknown combination method: {combination_method}") + + if yield_score_as: + method_params["YIELD_SCORE_AS"] = yield_score_as + + if not method_params: + raise ValueError( + "No parameters provided for combination method - must provide at least one parameter." + ) + + return CombineResultsMethod( + method=method, + **method_params, + ) diff --git a/redisvl/utils/full_text_query_helper.py b/redisvl/utils/full_text_query_helper.py new file mode 100644 index 00000000..ee5b8cb7 --- /dev/null +++ b/redisvl/utils/full_text_query_helper.py @@ -0,0 +1,149 @@ +from typing import Dict, List, Optional, Set, Tuple, Union + +from redisvl.query.filter import FilterExpression +from redisvl.utils.token_escaper import TokenEscaper +from redisvl.utils.utils import lazy_import + +nltk = lazy_import("nltk") +nltk_stopwords = lazy_import("nltk.corpus.stopwords") + + +def _parse_text_weights(weights: Optional[Dict[str, float]]) -> Dict[str, float]: + parsed_weights: Dict[str, float] = {} + if not weights: + return parsed_weights + for word, weight in weights.items(): + word = word.strip().lower() + if not word or " " in word: + raise ValueError( + f"Only individual words may be weighted. Got {{ {word}:{weight} }}" + ) + if not (isinstance(weight, float) or isinstance(weight, int)) or weight < 0.0: + raise ValueError( + f"Weights must be positive number. Got {{ {word}:{weight} }}" + ) + parsed_weights[word] = weight + return parsed_weights + + +class FullTextQueryHelper: + """Convert raw user queries into Redis full-text queries - tokenizes, escapes, and filters stopwords from the query.""" + + def __init__( + self, + stopwords: Optional[Union[str, Set[str]]] = "english", + text_weights: Optional[Dict[str, float]] = None, + ): + self._stopwords = self._get_stopwords(stopwords) + self._text_weights = _parse_text_weights(text_weights) + + @property + def stopwords(self) -> Set[str]: + """Return the stopwords used in the query. + Returns: + Set[str]: The stopwords used in the query. + """ + return self._stopwords.copy() if self._stopwords else set() + + @property + def text_weights(self) -> Dict[str, float]: + """Get the text weights. + + Returns: + Dictionary of word:weight mappings. + """ + return self._text_weights + + def build_query_string( + self, + text: str, + text_field_name: str, + filter_expression: Optional[Union[str, FilterExpression]] = None, + ) -> str: + """Build the full-text query string for text search with optional filtering.""" + if isinstance(filter_expression, FilterExpression): + filter_expression = str(filter_expression) + + query = f"(~@{text_field_name}:({self._tokenize_and_escape_query(text)})" + + if filter_expression and filter_expression != "*": + query += f" AND {filter_expression}" + + return query + ")" + + def _get_stopwords( + self, stopwords: Optional[Union[str, Set[str]]] = "english" + ) -> Set[str]: + """Get the stopwords to use in the query. + + Args: + stopwords (Optional[Union[str, Set[str]]]): The stopwords to use. If a string + such as "english" "german" is provided then a default set of stopwords for that + language will be used. if a list, set, or tuple of strings is provided then those + will be used as stopwords. Defaults to "english". if set to "None" then no stopwords + will be removed. + + Returns: + The set of stopwords to use. + + Raises: + TypeError: If the stopwords are not a set, list, or tuple of strings. + """ + if not stopwords: + return set() + elif isinstance(stopwords, str): + try: + nltk.download("stopwords", quiet=True) + return set(nltk_stopwords.words(stopwords)) + except ImportError: + raise ValueError( + f"Loading stopwords for {stopwords} failed: nltk is not installed." + ) + except Exception as e: + raise ValueError(f"Error trying to load {stopwords} from nltk. {e}") + elif isinstance(stopwords, (Set, List, Tuple)) and all( # type: ignore + isinstance(word, str) for word in stopwords + ): + return set(stopwords) + else: + raise TypeError("stopwords must be a set, list, or tuple of strings") + + def set_text_weights(self, weights: Dict[str, float]): + """Set or update the text weights for the query. + + Args: + weights: Dictionary of word:weight mappings + """ + self._text_weights = _parse_text_weights(weights) + + def _tokenize_and_escape_query(self, user_query: str) -> str: + """Convert a raw user query to a redis full text query joined by ORs + + Args: + user_query (str): The user query to tokenize and escape. + + Returns: + str: The tokenized and escaped query string. + + Raises: + ValueError: If the text string becomes empty after stopwords are removed. + """ + escaper = TokenEscaper() + + tokens = [ + escaper.escape( + token.strip().strip(",").replace("“", "").replace("”", "").lower() + ) + for token in user_query.split() + ] + + token_list = [ + token for token in tokens if token and token not in self._stopwords + ] + for i, token in enumerate(token_list): + if token in self._text_weights: + token_list[i] = f"{token}=>{{$weight:{self._text_weights[token]}}}" + + if not token_list: + raise ValueError("text string cannot be empty after removing stopwords") + return " | ".join(token_list) diff --git a/tests/integration/test_aggregation.py b/tests/integration/test_aggregation.py index d7dec532..1ee7a2a1 100644 --- a/tests/integration/test_aggregation.py +++ b/tests/integration/test_aggregation.py @@ -1,8 +1,8 @@ import pytest from redisvl.index import SearchIndex -from redisvl.query import AggregateHybridQuery, HybridQuery, MultiVectorQuery, Vector -from redisvl.query.filter import FilterExpression, Geo, GeoRadius, Num, Tag, Text +from redisvl.query import AggregateHybridQuery, MultiVectorQuery, Vector +from redisvl.query.filter import Geo, GeoRadius, Num, Tag, Text from redisvl.redis.utils import array_to_buffer from tests.conftest import skip_if_redis_version_below @@ -259,7 +259,7 @@ def test_hybrid_query_stopwords(index): query_string = hybrid_query._build_query_string() assert "medical" not in query_string - assert "expertize" not in query_string + assert "expertise" not in query_string results = index.query(hybrid_query) assert len(results) == 7 @@ -743,44 +743,3 @@ def test_multivector_query_mixed_index(index): assert ( float(r["combined_score"]) - score <= 0.0001 ) # allow for small floating point error - - -def test_hybrid_query_backward_compatibility(index): - skip_if_redis_version_below(index.client, "7.2.0") - - text = "a medical professional with expertise in lung cancer" - text_field = "description" - vector = [0.1, 0.1, 0.5] - vector_field = "user_embedding" - return_fields = ["user", "credit_score", "age", "job", "location", "description"] - - hybrid_query = AggregateHybridQuery( - text=text, - text_field_name=text_field, - vector=vector, - vector_field_name=vector_field, - return_fields=return_fields, - ) - - results = index.query(hybrid_query) - assert len(results) == 7 - for result in results: - assert result["user"] in [ - "john", - "derrick", - "nancy", - "tyler", - "tim", - "taimur", - "joe", - "mary", - ] - - with pytest.warns(DeprecationWarning): - _ = HybridQuery( - text=text, - text_field_name=text_field, - vector=vector, - vector_field_name=vector_field, - return_fields=return_fields, - ) diff --git a/tests/integration/test_hybrid.py b/tests/integration/test_hybrid.py new file mode 100644 index 00000000..27651954 --- /dev/null +++ b/tests/integration/test_hybrid.py @@ -0,0 +1,501 @@ +import pytest +from packaging.version import Version +from redis import ResponseError +from redis import __version__ as redis_version + +from redisvl.index import AsyncSearchIndex, SearchIndex +from redisvl.query.filter import Geo, GeoRadius, Num, Tag, Text +from redisvl.query.hybrid import HybridQuery +from redisvl.redis.utils import array_to_buffer +from redisvl.schema import IndexSchema +from tests.conftest import ( + get_redis_version, + skip_if_redis_version_below, + skip_if_redis_version_below_async, +) + +REDIS_HYBRID_AVAILABLE = Version(redis_version) >= Version("7.1.0") +SKIP_REASON = "Requires Redis >= 8.4.0 and redis-py>=7.1.0" + + +@pytest.fixture +def index_schema(worker_id): + return IndexSchema.from_dict( + { + "index": { + "name": f"user_index_{worker_id}", + "prefix": f"v1_{worker_id}", + "storage_type": "hash", + }, + "fields": [ + {"name": "credit_score", "type": "tag"}, + {"name": "job", "type": "text"}, + {"name": "description", "type": "text"}, + {"name": "age", "type": "numeric"}, + {"name": "last_updated", "type": "numeric"}, + {"name": "location", "type": "geo"}, + { + "name": "user_embedding", + "type": "vector", + "attrs": { + "dims": 3, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32", + }, + }, + { + "name": "image_embedding", + "type": "vector", + "attrs": { + "dims": 5, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float32", + }, + }, + { + "name": "audio_embedding", + "type": "vector", + "attrs": { + "dims": 6, + "distance_metric": "cosine", + "algorithm": "flat", + "datatype": "float64", + }, + }, + ], + } + ) + + +@pytest.fixture +def index(index_schema, multi_vector_data, redis_url): + index = SearchIndex(schema=index_schema, redis_url=redis_url) + + # create the index (no data yet) + index.create(overwrite=True) + + # prepare and load the data + def hash_preprocess(item: dict) -> dict: + return { + **item, + "user_embedding": array_to_buffer(item["user_embedding"], "float32"), + "image_embedding": array_to_buffer(item["image_embedding"], "float32"), + "audio_embedding": array_to_buffer(item["audio_embedding"], "float64"), + } + + index.load(multi_vector_data, preprocess=hash_preprocess) + + # run the test + yield index + + # clean up + index.delete(drop=True) + + +@pytest.fixture +async def async_index(index_schema, multi_vector_data, async_client): + index = AsyncSearchIndex(schema=index_schema, redis_client=async_client) + await index.create(overwrite=True) + + def hash_preprocess(item: dict) -> dict: + return { + **item, + "user_embedding": array_to_buffer(item["user_embedding"], "float32"), + "image_embedding": array_to_buffer(item["image_embedding"], "float32"), + "audio_embedding": array_to_buffer(item["audio_embedding"], "float64"), + } + + await index.load(multi_vector_data, preprocess=hash_preprocess) + yield index + await index.delete(drop=True) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query(index): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + return_fields = ["user", "credit_score", "age", "job", "location", "description"] + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + yield_text_score_as="text_score", + vector=vector, + vector_field_name=vector_field, + yield_vsim_score_as="vsim_score", + combination_method="RRF", + yield_combined_score_as="hybrid_score", + return_fields=return_fields, + ) + + results = index.query(hybrid_query) + assert isinstance(results, list) + assert len(results) == 10 # default for hybrid search + for doc in results: + assert doc["user"] in [ + "john", + "derrick", + "nancy", + "tyler", + "tim", + "taimur", + "joe", + "mary", + ] + assert int(doc["age"]) in [18, 14, 94, 100, 12, 15, 35] + assert doc["job"] in ["engineer", "doctor", "dermatologist", "CEO", "dentist"] + assert doc["credit_score"] in ["high", "low", "medium"] + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + num_results=3, + combination_method="RRF", + yield_combined_score_as="hybrid_score", + ) + + results = index.query(hybrid_query) + assert len(results) == 3 + assert ( + results[0]["hybrid_score"] + >= results[1]["hybrid_score"] + >= results[2]["hybrid_score"] + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_filter(index): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + return_fields = ["user", "credit_score", "age", "job", "location", "description"] + filter_expression = (Tag("credit_score") == ("high")) & (Num("age") > 30) + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + filter_expression=filter_expression, + return_fields=return_fields, + ) + + results = index.query(hybrid_query) + assert len(results) == 2 + for result in results: + assert result["credit_score"] == "high" + assert int(result["age"]) > 30 + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_geo_filter(index): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + return_fields = ["user", "credit_score", "age", "job", "location", "description"] + filter_expression = Geo("location") == GeoRadius(-122.4194, 37.7749, 1000, "m") + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + filter_expression=filter_expression, + return_fields=return_fields, + ) + + results = index.query(hybrid_query) + assert len(results) == 3 + for result in results: + assert result["location"] is not None + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.parametrize("alpha", [0.1, 0.5, 0.9]) +def test_hybrid_query_alpha(index, alpha): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + combination_method="LINEAR", + linear_alpha=alpha, + yield_text_score_as="text_score", + yield_vsim_score_as="vector_similarity", + yield_combined_score_as="hybrid_score", + ) + + results = index.query(hybrid_query) + assert len(results) == 7 + for result in results: + score = alpha * float(result["text_score"]) + (1 - alpha) * float( + result["vector_similarity"] + ) + assert ( + float(result["hybrid_score"]) - score <= 0.0001 + ) # allow for small floating point error + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords(index): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + alpha = 0.5 + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + combination_method="LINEAR", + linear_alpha=alpha, + stopwords={"medical", "expertise"}, + yield_text_score_as="text_score", + yield_vsim_score_as="vector_similarity", + yield_combined_score_as="hybrid_score", + ) + + query_string = hybrid_query.query._search_query.query_string() + assert "medical" not in query_string + assert "expertise" not in query_string + + results = index.query(hybrid_query) + assert len(results) == 7 + for result in results: + score = alpha * float(result["text_score"]) + (1 - alpha) * float( + result["vector_similarity"] + ) + assert ( + float(result["hybrid_score"]) - score <= 0.0001 + ) # allow for small floating point error + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_text_filter(index): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + filter_expression = Text(text_field) == ("medical") + + # make sure we can still apply filters to the same text field we are querying + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + filter_expression=filter_expression, + combination_method="LINEAR", + yield_combined_score_as="hybrid_score", + return_fields=[text_field], + ) + + results = index.query(hybrid_query) + assert len(results) == 2 + for result in results: + assert "medical" in result[text_field].lower() + + filter_expression = (Text(text_field) == ("medical")) & ( + (Text(text_field) != ("research")) + ) + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + filter_expression=filter_expression, + combination_method="LINEAR", + yield_combined_score_as="hybrid_score", + return_fields=[text_field], + ) + + results = index.query(hybrid_query) + assert len(results) == 2 + for result in results: + assert "medical" in result[text_field].lower() + assert "research" not in result[text_field].lower() + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.parametrize("scorer", ["BM25STD", "TFIDF", "TFIDF.DOCNORM"]) +def test_hybrid_query_word_weights(index, scorer): + skip_if_redis_version_below(index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancers" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + return_fields = ["description"] + + weights = {"medical": 3.4, "cancers": 5} + + # test we can run a query with text weights + weighted_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + return_fields=return_fields, + text_scorer=scorer, + text_weights=weights, + yield_text_score_as="text_score", + ) + + weighted_results = index.query(weighted_query) + assert len(weighted_results) == 7 + + # test that weights do change the scores on results + unweighted_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + return_fields=return_fields, + text_scorer=scorer, + text_weights={}, + yield_text_score_as="text_score", + ) + + unweighted_results = index.query(unweighted_query) + + for weighted, unweighted in zip(weighted_results, unweighted_results): + for word in weights: + if word in weighted["description"] or word in unweighted["description"]: + assert float(weighted["text_score"]) > float(unweighted["text_score"]) + + # test that weights do change the document score and order of results + weights = {"medical": 5, "cancers": 3.4} # switch the weights + weighted_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + return_fields=return_fields, + text_scorer=scorer, + text_weights=weights, + yield_text_score_as="text_score", + ) + + weighted_results = index.query(weighted_query) + assert weighted_results != unweighted_results + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.asyncio +async def test_hybrid_query_async(async_index): + await skip_if_redis_version_below_async(async_index.client, "8.4.0") + + text = "a medical professional with expertise in lung cancer" + text_field = "description" + vector = [0.1, 0.1, 0.5] + vector_field = "user_embedding" + return_fields = ["user", "credit_score", "age", "job", "location", "description"] + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + yield_text_score_as="text_score", + vector=vector, + vector_field_name=vector_field, + yield_vsim_score_as="vsim_score", + combination_method="RRF", + yield_combined_score_as="hybrid_score", + return_fields=return_fields, + ) + + results = await async_index.query(hybrid_query) + assert isinstance(results, list) + assert len(results) == 7 + for doc in results: + assert doc["user"] in [ + "john", + "derrick", + "nancy", + "tyler", + "tim", + "taimur", + "joe", + "mary", + ] + assert int(doc["age"]) in [18, 14, 94, 100, 12, 15, 35] + assert doc["job"] in ["engineer", "doctor", "dermatologist", "CEO", "dentist"] + assert doc["credit_score"] in ["high", "low", "medium"] + + hybrid_query = HybridQuery( + text=text, + text_field_name=text_field, + vector=vector, + vector_field_name=vector_field, + num_results=3, + combination_method="RRF", + yield_combined_score_as="hybrid_score", + ) + + results = await async_index.query(hybrid_query) + assert len(results) == 3 + assert ( + results[0]["hybrid_score"] + >= results[1]["hybrid_score"] + >= results[2]["hybrid_score"] + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_search_not_available_in_server(index): + if Version(get_redis_version(index.client)) >= Version("8.4.0"): + pytest.skip("Hybrid search is available in this version of Redis") + + hybrid_query = HybridQuery( + text="a medical professional with expertise in lung cancer", + text_field_name="description", + yield_text_score_as="text_score", + vector=[0.1, 0.1, 0.5], + vector_field_name="user_embedding", + yield_vsim_score_as="vsim_score", + combination_method="RRF", + yield_combined_score_as="hybrid_score", + return_fields=["user", "credit_score", "age", "job", "location", "description"], + ) + + with pytest.raises(ResponseError, match=r"unknown command .FT\.HYBRID"): + index.query(hybrid_query) + + +@pytest.mark.skipif( + REDIS_HYBRID_AVAILABLE, reason="Requires hybrid search to NOT be available" +) +def test_hybrid_query_not_available(index): + with pytest.raises(ImportError): + HybridQuery( + text="a medical professional with expertise in lung cancer", + text_field_name="description", + vector=[0.1, 0.1, 0.5], + vector_field_name="user_embedding", + ) diff --git a/tests/unit/test_aggregation_types.py b/tests/unit/test_aggregation_types.py index 22e89975..503674f9 100644 --- a/tests/unit/test_aggregation_types.py +++ b/tests/unit/test_aggregation_types.py @@ -3,16 +3,8 @@ import pytest from redis.commands.search.aggregation import AggregateRequest -from redis.commands.search.query import Query -from redis.commands.search.result import Result - -from redisvl.index.index import process_results -from redisvl.query.aggregate import ( - AggregateHybridQuery, - HybridQuery, - MultiVectorQuery, - Vector, -) + +from redisvl.query.aggregate import AggregateHybridQuery, MultiVectorQuery, Vector from redisvl.query.filter import Tag from redisvl.redis.utils import array_to_buffer @@ -435,35 +427,3 @@ def test_vector_object_handles_byte_conversion(): byte_string = array_to_buffer(sample_vector, datatype) vec = Vector(vector=byte_string, field_name="field 1") assert vec.vector == byte_string - - -def test_hybrid_query_backward_compatibility(): - # test that HybridQuery is a backward compatibility wrapper for AggregateHybridQuery - with pytest.warns(DeprecationWarning): - hybrid_query = HybridQuery( - text="sample text query", - text_field_name="description", - vector=sample_vector, - vector_field_name="embedding", - ) - - # Verify HybridQuery is actually an instance of AggregateHybridQuery - assert isinstance(hybrid_query, AggregateHybridQuery) - - # Verify AggregateHybridQuery does not emit warnings - with assert_no_warnings(): - aggregate_query = AggregateHybridQuery( - text="sample text query", - text_field_name="description", - vector=sample_vector, - vector_field_name="embedding", - ) - - # Verify that creating another HybridQuery also warns - with pytest.warns(DeprecationWarning): - another_hybrid_query = HybridQuery( - text="sample text query", - text_field_name="description", - vector=sample_vector, - vector_field_name="embedding", - ) diff --git a/tests/unit/test_hybrid_types.py b/tests/unit/test_hybrid_types.py new file mode 100644 index 00000000..b78e8ab5 --- /dev/null +++ b/tests/unit/test_hybrid_types.py @@ -0,0 +1,892 @@ +"""Unit tests for HybridQuery class from redisvl.query.hybrid module. + +This test module validates the functionality of the HybridQuery class which +combines full-text search with vector similarity search using Redis's hybrid +query capabilities (requires redis>=7.1.0). +""" + +from typing import List, Literal + +import pytest + +from redisvl.redis.utils import array_to_buffer + +try: + from redis.commands.search.hybrid_query import HybridQuery as RedisHybridQuery + from redis.commands.search.hybrid_query import ( + HybridSearchQuery, + HybridVsimQuery, + VectorSearchMethods, + ) + + from redisvl.query.hybrid import HybridQuery, build_combination_method + + REDIS_HYBRID_AVAILABLE = True + SKIP_REASON = "" +except (ImportError, ModuleNotFoundError): + REDIS_HYBRID_AVAILABLE = False + SKIP_REASON = "Requires redis>=8.4.0 and redis-py>=7.1.0" + # Create dummy classes to avoid import errors + RedisHybridQuery = None # type: ignore + HybridSearchQuery = None # type: ignore + HybridVsimQuery = None # type: ignore + VectorSearchMethods = None # type: ignore + HybridQuery = None # type: ignore + +from redisvl.query.filter import Num, Tag, Text + +# Test data +sample_vector = [0.1, 0.2, 0.3, 0.4] +bytes_vector = array_to_buffer(sample_vector, "float32") +sample_text = "the toon squad play basketball against a gang of aliens" + + +def get_query_pieces(query: HybridQuery) -> List[str]: + """Get all the pieces of the complete hybrid query.""" + # NOTE: Modeled after logic in `redis.commands.search.commands.SearchCommands.hybrid_search` + pieces = query.query.get_args() + if query.combination_method: + pieces.extend(query.combination_method.get_args()) + if query.postprocessing_config.build_args(): + pieces.extend(query.postprocessing_config.build_args()) + return pieces + + +# Basic init tests + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_basic_initialization(): + """Test basic HybridQuery initialization with required parameters.""" + text_field_name = "description" + vector_field_name = "embedding" + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name=text_field_name, + vector=sample_vector, + vector_field_name=vector_field_name, + ) + + # Verify get_args() returns empty list (HybridQuery uses params, not args) + assert get_query_pieces(hybrid_query) == [ + "SEARCH", + "(~@description:(toon | squad | play | basketball | gang | aliens))", + "SCORER", + "BM25STD", + "VSIM", + "@embedding", + bytes_vector, + "LIMIT", + "0", + "10", + ] + + # Verify that no combination method is set + assert hybrid_query.combination_method is None + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_all_parameters(): + """Test HybridQuery initialization with all optional parameters.""" + filter_expression = Tag("genre") == "comedy" + text_weights = {"toon": 2.0, "squad": 1.5} + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_scorer="TFIDF", + yield_text_score_as="text_score", + vector_search_method="KNN", + knn_ef_runtime=100, + yield_vsim_score_as="vsim_score", + filter_expression=filter_expression, + stopwords=None, + text_weights=text_weights, + combination_method="RRF", + rrf_window=10, + rrf_constant=50, + yield_combined_score_as="hybrid_score", + num_results=10, + ) + + assert hybrid_query._ft_helper is not None + assert hybrid_query._ft_helper.stopwords == set() + assert hybrid_query._ft_helper.text_weights == text_weights + + # Verify that the expected query pieces have been defined + assert get_query_pieces(hybrid_query) == [ + "SEARCH", + "(~@description:(the | toon=>{$weight:2.0} | squad=>{$weight:1.5} | play | basketball | against | a | gang | of | aliens) AND @genre:{comedy})", + "SCORER", + "TFIDF", + "YIELD_SCORE_AS", + "text_score", + "VSIM", + "@embedding", + bytes_vector, + "KNN", + 4, + "K", + 10, + "EF_RUNTIME", + 100, + "FILTER", + "@genre:{comedy}", + "YIELD_SCORE_AS", + "vsim_score", + "COMBINE", + "RRF", + 6, + "WINDOW", + 10, + "CONSTANT", + 50, + "YIELD_SCORE_AS", + "hybrid_score", + "LIMIT", + "0", + "10", + ] + + # Add post-processing and verify that it is reflected in the query + hybrid_query.postprocessing_config.limit(offset=10, num=20) + assert get_query_pieces(hybrid_query)[-3:] == ["LIMIT", "10", "20"] + + +# Stopwords tests + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_default(): + """Test that default stopwords (english) are applied.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + ) + + # Default should be english stopwords + stopwords = hybrid_query._ft_helper.stopwords + assert isinstance(stopwords, set) + assert len(stopwords) > 0 + # Common english stopwords should be present + assert "the" in stopwords + assert "a" in stopwords + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_none(): + """Test that stopwords can be disabled with None.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords=None, + ) + + assert hybrid_query._ft_helper.stopwords == set() + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_empty_set(): + """Test that stopwords can be set to empty set.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords=set(), + ) + + assert hybrid_query._ft_helper.stopwords == set() + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_custom(): + """Test that custom stopwords are applied.""" + custom_stopwords = {"the", "a", "of", "and"} + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords=custom_stopwords, + ) + + assert hybrid_query._ft_helper.stopwords == set(custom_stopwords) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_language(): + """Test that language-specific stopwords can be loaded.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords="german", + ) + + # German stopwords should be loaded + stopwords = hybrid_query._ft_helper.stopwords + assert isinstance(stopwords, set) + assert len(stopwords) > 0 + for word in ("der", "die", "und"): # Common expected words + assert word in stopwords + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_invalid_language(): + """Test that invalid language raises ValueError.""" + with pytest.raises(ValueError): + HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords="gibberish_language", + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_stopwords_invalid_type(): + """Test that invalid stopwords type raises TypeError.""" + with pytest.raises(TypeError): + HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords=[1, 2, 3], # Invalid: list of integers + ) + + +# Text weight tests + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_text_weights_basic(): + """Test that text weights are properly applied.""" + text_weights = {"toon": 2.0, "squad": 1.5, "basketball": 3.0} + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_weights=text_weights, + ) + + assert hybrid_query._ft_helper.text_weights == text_weights + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_text_weights_none(): + """Test that text_weights can be None.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_weights=None, + ) + + assert hybrid_query._ft_helper.text_weights == {} + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_text_weights_empty(): + """Test that text_weights can be empty dict.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_weights={}, + ) + + assert hybrid_query._ft_helper.text_weights == {} + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_text_weights_negative_value(): + """Test that negative text weights raise ValueError.""" + with pytest.raises(ValueError): + HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_weights={"word": -0.5}, + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_text_weights_invalid_type(): + """Test that non-numeric text weights raise ValueError.""" + with pytest.raises(ValueError): + HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_weights={"word": "invalid"}, # type: ignore + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_text_weights_multi_word_key(): + """Test that multi-word keys in text_weights raise ValueError.""" + with pytest.raises(ValueError): + HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + text_weights={"multi word": 2.0}, + ) + + +# Filter expression tests + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_string_filter(): + """Test HybridQuery with string filter expression.""" + string_filter = "@category:{tech|science|engineering}" + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression=string_filter, + ) + + assert get_query_pieces(hybrid_query) == [ + "SEARCH", + "(~@description:(toon | squad | play | basketball | gang | aliens) AND @category:{tech|science|engineering})", + "SCORER", + "BM25STD", + "VSIM", + "@embedding", + bytes_vector, + "FILTER", + "@category:{tech|science|engineering}", + "LIMIT", + "0", + "10", + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_tag_filter(): + """Test HybridQuery with Tag FilterExpression.""" + tag_filter = Tag("genre") == "comedy" + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression=tag_filter, + ) + + assert get_query_pieces(hybrid_query) == [ + "SEARCH", + "(~@description:(toon | squad | play | basketball | gang | aliens) AND @genre:{comedy})", + "SCORER", + "BM25STD", + "VSIM", + "@embedding", + bytes_vector, + "FILTER", + "@genre:{comedy}", + "LIMIT", + "0", + "10", + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_numeric_filter(): + """Test HybridQuery with Numeric FilterExpression.""" + numeric_filter = Num("age") > 30 + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression=numeric_filter, + ) + + # Verify filter is included in serialized query + args = get_query_pieces(hybrid_query) + expected = "@age:[(30 +inf]" + assert args[1].endswith(f"AND {expected})") # Check text filter + assert args[8] == expected # Check vector filter + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_text_filter(): + """Test HybridQuery with Text FilterExpression.""" + text_filter = Text("job") == "engineer" + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression=text_filter, + ) + + # Verify filter is included in serialized query + args = get_query_pieces(hybrid_query) + expected = '@job:("engineer")' + assert args[1].endswith(f"AND {expected})") # Check text filter + assert args[8] == expected # Check vector filter + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_combined_filters(): + """Test HybridQuery with combined FilterExpressions.""" + combined_filter = (Tag("genre") == "comedy") & (Num("rating") > 7.0) + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression=combined_filter, + ) + + # Verify both filters are included in serialized query + args = get_query_pieces(hybrid_query) + expected = "(@genre:{comedy} @rating:[(7.0 +inf])" + assert args[1].endswith(f"AND {expected})") # Check text filter + assert args[8] == expected # Check vector filter + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_wildcard_filter(): + """Test HybridQuery with wildcard filter.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression="*", + ) + + # Verify query structure - wildcard may or may not be included depending on implementation + args = get_query_pieces(hybrid_query) + assert ( + args[1] == "(~@description:(toon | squad | play | basketball | gang | aliens))" + ) # Query without filtering + assert "FILTER" not in args # Check that it was not added for vector filtering + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_without_filter(): + """Test HybridQuery without any filter expression.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + filter_expression=None, + ) + + # Verify no filter in serialized query (only text query) + args = get_query_pieces(hybrid_query) + assert ( + args[1] == "(~@description:(toon | squad | play | basketball | gang | aliens))" + ) # No filter in query + assert "FILTER" not in args # Check that it was not added for vector filtering + + +# Vector search method tests + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_vector_search_method_knn(): + """Test HybridQuery with KNN vector search method.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method="KNN", + num_results=10, + ) + + # KNN with params should be in args + args = get_query_pieces(hybrid_query) + assert args[7:13] == ["KNN", 4, "K", 10, "EF_RUNTIME", 10] + + # With optional EF_RUNTIME param + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method="KNN", + knn_ef_runtime=100, + num_results=10, + ) + + # KNN with params should be in args + args = get_query_pieces(hybrid_query) + assert args[7:13] == ["KNN", 4, "K", 10, "EF_RUNTIME", 100] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_vector_search_method_range(): + """Test HybridQuery with RANGE vector search method.""" + with pytest.raises(ValueError): + # RANGE requires RADIUS + HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method="RANGE", + ) + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method="RANGE", + range_radius=10, + ) + + # RANGE with params should be in args + args = get_query_pieces(hybrid_query) + assert args[7:13] == ["RANGE", 4, "RADIUS", 10, "EPSILON", 0.01] + + # With optional EPSILON param + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method="RANGE", + range_radius=10, + range_epsilon=0.1, + ) + + # RANGE with params should be in args + args = get_query_pieces(hybrid_query) + assert args[7:13] == ["RANGE", 4, "RADIUS", 10, "EPSILON", 0.1] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_vector_search_method_none(): + """Test HybridQuery without specifying vector search method.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method=None, + ) + + # Verify basic VSIM structure without explicit method + args = get_query_pieces(hybrid_query) + assert "VSIM" in args + assert "@embedding" in args + # When None, should not have KNN or RANGE explicitly + assert "KNN" not in args + assert "RANGE" not in args + + +# Edge cases + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_empty_text_after_stopwords(): + """Test HybridQuery behavior when text becomes empty after stopword removal.""" + # All words are stopwords + with pytest.raises(ValueError, match="text string cannot be empty"): + HybridQuery( + text="the a an", + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords="english", + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_special_characters_in_text(): + """Test HybridQuery with special characters in text.""" + special_text = "search for @user #hashtag $price 50% off!" + + hybrid_query = HybridQuery( + text=special_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + ) + + assert get_query_pieces(hybrid_query) == [ + "SEARCH", + "(~@description:(search | \\@user | \\#hashtag | \\$price | 50\\% | off\\!))", + "SCORER", + "BM25STD", + "VSIM", + "@embedding", + bytes_vector, + "LIMIT", + "0", + "10", + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_unicode_text(): + """Test HybridQuery with Unicode characters in text.""" + unicode_text = "café résumé naïve 日本語 中文" + + hybrid_query = HybridQuery( + text=unicode_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + stopwords=None, # Disable stopwords for Unicode test + ) + + assert get_query_pieces(hybrid_query) == [ + "SEARCH", + "(~@description:(café | résumé | naïve | 日本語 | 中文))", + "SCORER", + "BM25STD", + "VSIM", + "@embedding", + bytes_vector, + "LIMIT", + "0", + "10", + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_with_vector_filter_and_method(): + """Test HybridQuery with vector filter and a search method.""" + tag_filter = Tag("genre") == "comedy" + + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + vector_search_method="KNN", + filter_expression=tag_filter, + num_results=10, + ) + + # Verify KNN params and filter are both in args + args = get_query_pieces(hybrid_query) + assert args[7:15] == [ + "KNN", + 4, + "K", + 10, + "EF_RUNTIME", + 10, + "FILTER", + "@genre:{comedy}", + ] + + +# Combination method tests + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_combination_method_rrf_basic(): + """Test HybridQuery with RRF combination method.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + combination_method="RRF", + rrf_window=10, + ) + + # Verify RRF combination method is set + assert hybrid_query.combination_method is not None + + # Verify that combination method args are correct + assert hybrid_query.combination_method.get_args() == [ + "COMBINE", + "RRF", + 4, + "WINDOW", + 10, + "CONSTANT", + 60, + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_combination_method_rrf_with_constant(): + """Test HybridQuery with RRF combination method and constant parameter.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + combination_method="RRF", + rrf_constant=50, + ) + + # Verify RRF combination method is set + assert hybrid_query.combination_method is not None + + # Verify that combination method args are correct + assert hybrid_query.combination_method.get_args() == [ + "COMBINE", + "RRF", + 4, + "WINDOW", + 20, + "CONSTANT", + 50, + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_combination_method_rrf_with_both_params(): + """Test HybridQuery with RRF combination method with both window and constant.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + combination_method="RRF", + rrf_window=20, + rrf_constant=50, + yield_combined_score_as="rrf_score", + ) + + # Verify RRF combination method is set + assert hybrid_query.combination_method is not None + + # Verify that combination method args are correct + assert hybrid_query.combination_method.get_args() == [ + "COMBINE", + "RRF", + 6, + "WINDOW", + 20, + "CONSTANT", + 50, + "YIELD_SCORE_AS", + "rrf_score", + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.parametrize("alpha", [0.1, 0.5, 0.9]) +def test_hybrid_query_combination_method_linear(alpha: float): + """Test HybridQuery with LINEAR combination method.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + combination_method="LINEAR", + linear_alpha=alpha, + ) + + # Verify LINEAR combination method is set + assert hybrid_query.combination_method is not None + + # Verify that combination method args are correct + assert hybrid_query.combination_method.get_args() == [ + "COMBINE", + "LINEAR", + 4, + "ALPHA", + alpha, + "BETA", + 1 - alpha, + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_combination_method_linear_with_yield_score(): + """Test HybridQuery with LINEAR combination method and yield_combined_score_as.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + combination_method="LINEAR", + linear_alpha=0.3, + yield_combined_score_as="linear_score", + ) + + assert hybrid_query.combination_method is not None + assert hybrid_query.combination_method.get_args() == [ + "COMBINE", + "LINEAR", + 6, + "ALPHA", + 0.3, + "BETA", + 0.7, + "YIELD_SCORE_AS", + "linear_score", + ] + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_hybrid_query_combination_method_none(): + """Test HybridQuery without combination method.""" + hybrid_query = HybridQuery( + text=sample_text, + text_field_name="description", + vector=sample_vector, + vector_field_name="embedding", + combination_method=None, + ) + + # Verify no combination method is set + assert hybrid_query.combination_method is None + + # Verify COMBINE does not appear in query args + args = get_query_pieces(hybrid_query) + assert "COMBINE" not in args + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +def test_build_combination_method_invalid_method(): + """Test build_combination_method static method with invalid combination method.""" + with pytest.raises(ValueError, match="Unknown combination method"): + build_combination_method( + combination_method="INVALID", # type: ignore + ) + + +@pytest.mark.skipif(not REDIS_HYBRID_AVAILABLE, reason=SKIP_REASON) +@pytest.mark.parametrize("method", ["RRF", "LINEAR"]) +def test_build_combination_method_no_parameters(method: Literal["RRF", "LINEAR"]): + """Test build_combination_method static method raises ValueError when no parameters provided.""" + with pytest.raises( + ValueError, + match="No parameters provided for combination method - must provide at least one parameter", + ): + build_combination_method( + combination_method=method, + ) diff --git a/tests/unit/test_query_types.py b/tests/unit/test_query_types.py index 5f37f96c..fa7ad69a 100644 --- a/tests/unit/test_query_types.py +++ b/tests/unit/test_query_types.py @@ -1,4 +1,5 @@ import pytest +from redis import __version__ as redis_version from redis.commands.search.query import Query from redis.commands.search.result import Result @@ -6,6 +7,7 @@ from redisvl.query import CountQuery, FilterQuery, RangeQuery, TextQuery, VectorQuery from redisvl.query.filter import Tag from redisvl.query.query import VectorRangeQuery +from redisvl.redis.connection import is_version_gte # Sample data for testing sample_vector = [0.1, 0.2, 0.3, 0.4] @@ -402,6 +404,11 @@ def test_text_query_word_weights(): ], ) def test_query_modifiers(query): + if is_version_gte(redis_version, "7.0.0"): # Format changed in Redis 7.0+ + expected_fields = ["test"] + else: + expected_fields = ("test",) + query.paging(3, 5) assert query._offset == 3 assert query._num == 5 @@ -437,7 +444,7 @@ def test_query_modifiers(query): assert query._with_scores query.limit_fields("test") - assert query._fields == ("test",) + assert query._fields == expected_fields f = Tag("test") == "foo" query.set_filter(f) @@ -456,7 +463,7 @@ def test_query_modifiers(query): assert query._no_content assert query._no_stopwords assert query._with_scores - assert query._fields == ("test",) + assert query._fields == expected_fields @pytest.mark.parametrize( diff --git a/uv.lock b/uv.lock index 6c249ae4..59993000 100644 --- a/uv.lock +++ b/uv.lock @@ -1029,6 +1029,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/a8/20d0723294217e47de6d9e2e40fd4a9d2f7c4b6ef974babd482a59743694/fastjsonschema-2.21.2-py3-none-any.whl", hash = "sha256:1c797122d0a86c5cace2e54bf4e819c36223b552017172f32c5c024a6b77e463", size = 24024, upload-time = "2025-08-14T18:49:34.776Z" }, ] +[[package]] +name = "ffmpeg-python" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "future" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/5e/d5f9105d59c1325759d838af4e973695081fbbc97182baf73afc78dec266/ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127", size = 21543, upload-time = "2019-07-06T00:19:08.989Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/0c/56be52741f75bad4dc6555991fabd2e07b432d333da82c11ad701123888a/ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5", size = 25024, upload-time = "2019-07-06T00:19:07.215Z" }, +] + [[package]] name = "filelock" version = "3.19.1" @@ -1170,6 +1182,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7", size = 199289, upload-time = "2025-09-02T19:10:47.708Z" }, ] +[[package]] +name = "future" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/b2/4140c69c6a66432916b26158687e821ba631a4c9273c474343badf84d3ba/future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05", size = 1228490, upload-time = "2024-02-21T11:52:38.461Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/71/ae30dadffc90b9006d77af76b393cb9dfbfc9629f339fc1574a1c52e6806/future-1.0.0-py3-none-any.whl", hash = "sha256:929292d34f5872e70396626ef385ec22355a1fae8ad29e1a734c3e43f9fbc216", size = 491326, upload-time = "2024-02-21T11:52:35.956Z" }, +] + [[package]] name = "google-api-core" version = "2.26.0" @@ -4201,14 +4222,35 @@ wheels = [ [[package]] name = "redis" -version = "6.4.0" +version = "7.0.1" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.10'", +] +dependencies = [ + { name = "async-timeout", marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/8f/f125feec0b958e8d22c8f0b492b30b1991d9499a4315dfde466cf4289edc/redis-7.0.1.tar.gz", hash = "sha256:c949df947dca995dc68fdf5a7863950bf6df24f8d6022394585acc98e81624f1", size = 4755322, upload-time = "2025-10-27T14:34:00.33Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/97/9f22a33c475cda519f20aba6babb340fb2f2254a02fb947816960d1e669a/redis-7.0.1-py3-none-any.whl", hash = "sha256:4977af3c7d67f8f0eb8b6fec0dafc9605db9343142f634041fb0235f67c0588a", size = 339938, upload-time = "2025-10-27T14:33:58.553Z" }, +] + +[[package]] +name = "redis" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", + "python_full_version == '3.10.*'", +] dependencies = [ - { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, + { name = "async-timeout", marker = "python_full_version >= '3.10' and python_full_version < '3.11.3'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0d/d6/e8b92798a5bd67d659d51a18170e91c16ac3b59738d91894651ee255ed49/redis-6.4.0.tar.gz", hash = "sha256:b01bc7282b8444e28ec36b261df5375183bb47a07eb9c603f284e89cbc5ef010", size = 4647399, upload-time = "2025-08-07T08:10:11.441Z" } +sdist = { url = "https://files.pythonhosted.org/packages/43/c8/983d5c6579a411d8a99bc5823cc5712768859b5ce2c8afe1a65b37832c81/redis-7.1.0.tar.gz", hash = "sha256:b1cc3cfa5a2cb9c2ab3ba700864fb0ad75617b41f01352ce5779dabf6d5f9c3c", size = 4796669, upload-time = "2025-11-19T15:54:39.961Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/02/89e2ed7e85db6c93dfa9e8f691c5087df4e3551ab39081a4d7c6d1f90e05/redis-6.4.0-py3-none-any.whl", hash = "sha256:f0544fa9604264e9464cdf4814e7d4830f74b165d52f2a330a760a88dd248b7f", size = 279847, upload-time = "2025-08-07T08:10:09.84Z" }, + { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" }, ] [[package]] @@ -4224,7 +4266,8 @@ dependencies = [ { name = "pydantic" }, { name = "python-ulid" }, { name = "pyyaml" }, - { name = "redis" }, + { name = "redis", version = "7.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "redis", version = "7.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "tenacity" }, ] @@ -4305,7 +4348,7 @@ requires-dist = [ { name = "pydantic", specifier = ">=2,<3" }, { name = "python-ulid", specifier = ">=3.0.0" }, { name = "pyyaml", specifier = ">=5.4,<7.0" }, - { name = "redis", specifier = ">=5.0,<7.0" }, + { name = "redis", specifier = ">=5.0,<7.2" }, { name = "sentence-transformers", marker = "extra == 'sentence-transformers'", specifier = ">=3.4.0,<4" }, { name = "tenacity", specifier = ">=8.2.2" }, { name = "urllib3", marker = "extra == 'bedrock'", specifier = "<2.2.0" }, @@ -5787,11 +5830,12 @@ wheels = [ [[package]] name = "voyageai" -version = "0.3.5" +version = "0.3.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "aiolimiter" }, + { name = "ffmpeg-python" }, { name = "langchain-text-splitters", version = "0.3.11", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, { name = "langchain-text-splitters", version = "1.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, @@ -5804,9 +5848,9 @@ dependencies = [ { name = "tenacity" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/51/9b/e40f90793c1d03610b6109852791f752fcb257989a96701258278f874e00/voyageai-0.3.5.tar.gz", hash = "sha256:963e0d71611af529fa0e496db232a4f660b5f73bce7af1ab288a7f59df7512da", size = 20414, upload-time = "2025-09-11T00:28:26.29Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/a6/5f93fcd9c8c1a05873d287f1600e04f6cbfb2d42fce15ed75b0d5bebb9fa/voyageai-0.3.6.tar.gz", hash = "sha256:411e7c11eae4917429f091553a9b6911860df626811fb9415f24197d0cc5e219", size = 26346, upload-time = "2025-12-09T01:32:52.278Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/9d/709f5c7fc80a7bf11952fbccfca2bc5525bd5d345521795358819bd01d02/voyageai-0.3.5-py3-none-any.whl", hash = "sha256:1f70fcf3532d7e0bbc4332b1831a6fc1f714f268eeddc8b2859b81bf06a82411", size = 28257, upload-time = "2025-09-11T00:28:24.62Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e9/e13785fb2a3c605ea924ce2e54d235e423f6d6e45ddb09574963655ec111/voyageai-0.3.6-py3-none-any.whl", hash = "sha256:e282f9cef87eb949e2dd30ffe911689f1068c50b8c3c6e90e97793f2a52c83dd", size = 34465, upload-time = "2025-12-09T01:32:51.32Z" }, ] [[package]]