Skip to content

Commit f603016

Browse files
skeptrunedevcdxker
authored andcommitted
cleanup: make support for group_id and group_tracking_id filters more clear
1 parent d4e9e30 commit f603016

File tree

8 files changed

+118
-20
lines changed

8 files changed

+118
-20
lines changed

clients/ts-sdk/openapi.json

+61-5
Original file line numberDiff line numberDiff line change
@@ -7188,7 +7188,7 @@
71887188
},
71897189
"ChunkFilter": {
71907190
"type": "object",
7191-
"description": "Filters is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.",
7191+
"description": "ChunkFilter is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.",
71927192
"properties": {
71937193
"must": {
71947194
"type": "array",
@@ -8097,9 +8097,10 @@
80978097
"$ref": "#/components/schemas/FieldCondition"
80988098
},
80998099
{
8100-
"$ref": "#/components/schemas/HasIDCondition"
8100+
"$ref": "#/components/schemas/HasChunkIDCondition"
81018101
}
8102-
]
8102+
],
8103+
"description": "Filters can be constructed using either fields on the chunk objects, ids or tracking ids of chunks, and finally ids or tracking ids of groups."
81038104
},
81048105
"ContentChunkMetadata": {
81058106
"type": "object",
@@ -8791,6 +8792,14 @@
87918792
],
87928793
"nullable": true
87938794
},
8795+
"sort_options": {
8796+
"allOf": [
8797+
{
8798+
"$ref": "#/components/schemas/SortOptions"
8799+
}
8800+
],
8801+
"nullable": true
8802+
},
87948803
"topic_id": {
87958804
"type": "string",
87968805
"format": "uuid",
@@ -9508,6 +9517,14 @@
95089517
],
95099518
"nullable": true
95109519
},
9520+
"sort_options": {
9521+
"allOf": [
9522+
{
9523+
"$ref": "#/components/schemas/SortOptions"
9524+
}
9525+
],
9526+
"nullable": true
9527+
},
95119528
"topic_id": {
95129529
"type": "string",
95139530
"format": "uuid",
@@ -10176,6 +10193,7 @@
1017610193
},
1017710194
"FieldCondition": {
1017810195
"type": "object",
10196+
"description": "FieldCondition is a JSON object which can be used to filter chunks by a field. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.",
1017910197
"required": [
1018010198
"field"
1018110199
],
@@ -10190,7 +10208,7 @@
1019010208
},
1019110209
"field": {
1019210210
"type": "string",
10193-
"description": "Field is the name of the field to filter on. The field value will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`."
10211+
"description": "Field is the name of the field to filter on. Commonly used fields are `timestamp`, `link`, `tag_set`, `location`, `num_value`, `group_ids`, and `group_tracking_ids`. The field value will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`."
1019410212
},
1019510213
"geo_bounding_box": {
1019610214
"allOf": [
@@ -10976,22 +10994,25 @@
1097610994
}
1097710995
}
1097810996
},
10979-
"HasIDCondition": {
10997+
"HasChunkIDCondition": {
1098010998
"type": "object",
10999+
"description": "HasChunkIDCondition is a JSON object which can be used to filter chunks by their ids or tracking ids. This is useful for when you want to filter chunks by their ids or tracking ids.",
1098111000
"properties": {
1098211001
"ids": {
1098311002
"type": "array",
1098411003
"items": {
1098511004
"type": "string",
1098611005
"format": "uuid"
1098711006
},
11007+
"description": "Ids of the chunks to apply a match_any condition with. Only chunks with one of these ids will be returned.",
1098811008
"nullable": true
1098911009
},
1099011010
"tracking_ids": {
1099111011
"type": "array",
1099211012
"items": {
1099311013
"type": "string"
1099411014
},
11015+
"description": "Tracking ids of the chunks to apply a match_any condition with. Only chunks with one of these tracking ids will be returned.",
1099511016
"nullable": true
1099611017
}
1099711018
}
@@ -11441,6 +11462,25 @@
1144111462
"updated_at": "2021-01-01 00:00:00.000"
1144211463
}
1144311464
},
11465+
"MmrOptions": {
11466+
"type": "object",
11467+
"description": "MMR Options lets you specify different methods to rerank the chunks in the result set using Maximal Marginal Relevance. If not specified, this defaults to the score of the chunks.",
11468+
"required": [
11469+
"use_mmr"
11470+
],
11471+
"properties": {
11472+
"mmr_lambda": {
11473+
"type": "number",
11474+
"format": "float",
11475+
"description": "Set mmr_lambda to a value between 0.0 and 1.0 to control the tradeoff between relevance and diversity. Closer to 1.0 will give more diverse results, closer to 0.0 will give more relevant results. If not specified, this defaults to 0.5.",
11476+
"nullable": true
11477+
},
11478+
"use_mmr": {
11479+
"type": "boolean",
11480+
"description": "Set use_mmr to true to use the Maximal Marginal Relevance algorithm to rerank the results."
11481+
}
11482+
}
11483+
},
1144411484
"MultiQuery": {
1144511485
"type": "object",
1144611486
"description": "MultiQuery allows you to construct a dense vector from multiple queries with a weighted sum. This is useful for when you want to emphasize certain features of the query. This only works with Semantic Search and is not compatible with cross encoder re-ranking or highlights.",
@@ -13135,6 +13175,14 @@
1313513175
],
1313613176
"nullable": true
1313713177
},
13178+
"sort_options": {
13179+
"allOf": [
13180+
{
13181+
"$ref": "#/components/schemas/SortOptions"
13182+
}
13183+
],
13184+
"nullable": true
13185+
},
1313813186
"topic_id": {
1313913187
"type": "string",
1314013188
"format": "uuid",
@@ -15036,6 +15084,14 @@
1503615084
],
1503715085
"nullable": true
1503815086
},
15087+
"mmr": {
15088+
"allOf": [
15089+
{
15090+
"$ref": "#/components/schemas/MmrOptions"
15091+
}
15092+
],
15093+
"nullable": true
15094+
},
1503915095
"recency_bias": {
1504015096
"type": "number",
1504115097
"format": "float",

clients/ts-sdk/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"files": [
77
"dist"
88
],
9-
"version": "0.0.36",
9+
"version": "0.0.37",
1010
"license": "MIT",
1111
"scripts": {
1212
"lint": "eslint 'src/**/*.ts'",

clients/ts-sdk/src/types.gen.ts

+37-4
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ export type ChatMessageProxy = {
206206
};
207207

208208
/**
209-
* Filters is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
209+
* ChunkFilter is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
210210
*/
211211
export type ChunkFilter = {
212212
/**
@@ -485,7 +485,10 @@ export type ClusterAnalyticsFilter = {
485485

486486
export type ClusterAnalyticsResponse = SearchClusterResponse | SearchQueryResponse;
487487

488-
export type ConditionType = FieldCondition | HasIDCondition;
488+
/**
489+
* Filters can be constructed using either fields on the chunk objects, ids or tracking ids of chunks, and finally ids or tracking ids of groups.
490+
*/
491+
export type ConditionType = FieldCondition | HasChunkIDCondition;
489492

490493
export type ContentChunkMetadata = {
491494
chunk_html?: (string) | null;
@@ -720,6 +723,7 @@ export type CreateMessageReqPayload = {
720723
*/
721724
search_query?: (string) | null;
722725
search_type?: ((SearchMethod) | null);
726+
sort_options?: ((SortOptions) | null);
723727
/**
724728
* The ID of the topic to attach the message to.
725729
*/
@@ -1033,6 +1037,7 @@ export type EditMessageReqPayload = {
10331037
*/
10341038
search_query?: (string) | null;
10351039
search_type?: ((SearchMethod) | null);
1040+
sort_options?: ((SortOptions) | null);
10361041
/**
10371042
* The id of the topic to edit the message at the given sort order for.
10381043
*/
@@ -1329,10 +1334,13 @@ export type event_type = 'view';
13291334

13301335
export type EventTypesFilter = 'add_to_cart' | 'purchase' | 'view' | 'click' | 'filter_clicked';
13311336

1337+
/**
1338+
* FieldCondition is a JSON object which can be used to filter chunks by a field. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.
1339+
*/
13321340
export type FieldCondition = {
13331341
date_range?: ((DateRange) | null);
13341342
/**
1335-
* Field is the name of the field to filter on. The field value will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.
1343+
* Field is the name of the field to filter on. Commonly used fields are `timestamp`, `link`, `tag_set`, `location`, `num_value`, `group_ids`, and `group_tracking_ids`. The field value will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.
13361344
*/
13371345
field: string;
13381346
geo_bounding_box?: ((LocationBoundingBox) | null);
@@ -1597,8 +1605,17 @@ export type GroupsForChunk = {
15971605
slim_groups: Array<ChunkGroupAndFileId>;
15981606
};
15991607

1600-
export type HasIDCondition = {
1608+
/**
1609+
* HasChunkIDCondition is a JSON object which can be used to filter chunks by their ids or tracking ids. This is useful for when you want to filter chunks by their ids or tracking ids.
1610+
*/
1611+
export type HasChunkIDCondition = {
1612+
/**
1613+
* Ids of the chunks to apply a match_any condition with. Only chunks with one of these ids will be returned.
1614+
*/
16011615
ids?: Array<(string)> | null;
1616+
/**
1617+
* Tracking ids of the chunks to apply a match_any condition with. Only chunks with one of these tracking ids will be returned.
1618+
*/
16021619
tracking_ids?: Array<(string)> | null;
16031620
};
16041621

@@ -1777,6 +1794,20 @@ export type Message = {
17771794
updated_at: string;
17781795
};
17791796

1797+
/**
1798+
* MMR Options lets you specify different methods to rerank the chunks in the result set using Maximal Marginal Relevance. If not specified, this defaults to the score of the chunks.
1799+
*/
1800+
export type MmrOptions = {
1801+
/**
1802+
* Set mmr_lambda to a value between 0.0 and 1.0 to control the tradeoff between relevance and diversity. Closer to 1.0 will give more diverse results, closer to 0.0 will give more relevant results. If not specified, this defaults to 0.5.
1803+
*/
1804+
mmr_lambda?: (number) | null;
1805+
/**
1806+
* Set use_mmr to true to use the Maximal Marginal Relevance algorithm to rerank the results.
1807+
*/
1808+
use_mmr: boolean;
1809+
};
1810+
17801811
/**
17811812
* MultiQuery allows you to construct a dense vector from multiple queries with a weighted sum. This is useful for when you want to emphasize certain features of the query. This only works with Semantic Search and is not compatible with cross encoder re-ranking or highlights.
17821813
*/
@@ -2218,6 +2249,7 @@ export type RegenerateMessageReqPayload = {
22182249
*/
22192250
search_query?: (string) | null;
22202251
search_type?: ((SearchMethod) | null);
2252+
sort_options?: ((SortOptions) | null);
22212253
/**
22222254
* The id of the topic to regenerate the last message for.
22232255
*/
@@ -2723,6 +2755,7 @@ export type SortBySearchType = {
27232755
*/
27242756
export type SortOptions = {
27252757
location_bias?: ((GeoInfoWithBias) | null);
2758+
mmr?: ((MmrOptions) | null);
27262759
/**
27272760
* Recency Bias lets you determine how much of an effect the recency of chunks will have on the search results. If not specified, this defaults to 0.0. We recommend setting this to 1.0 for a gentle reranking of the results, >3.0 for a strong reranking of the results.
27282761
*/

frontends/search/src/components/FilterModal.tsx

+2
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,8 @@ export const FilterItem = (props: FilterItemProps) => {
339339
"location",
340340
"metadata",
341341
"num_value",
342+
"group_tracking_ids",
343+
"group_ids",
342344
"tracking_ids",
343345
"ids",
344346
]}

server/src/data/models.rs

+10-3
Original file line numberDiff line numberDiff line change
@@ -4204,14 +4204,20 @@ pub struct LocationPolygon {
42044204
#[allow(clippy::large_enum_variant)]
42054205
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema)]
42064206
#[serde(untagged)]
4207+
/// Filters can be constructed using either fields on the chunk objects, ids or tracking ids of chunks, and finally ids or tracking ids of groups.
42074208
pub enum ConditionType {
4209+
#[schema(title = "FieldCondition")]
42084210
Field(FieldCondition),
4209-
HasID(HasIDCondition),
4211+
#[schema(title = "HasChunkIDCondition")]
4212+
HasChunkId(HasChunkIDCondition),
42104213
}
42114214

42124215
#[derive(Serialize, Deserialize, Debug, Clone, ToSchema)]
4213-
pub struct HasIDCondition {
4216+
/// HasChunkIDCondition is a JSON object which can be used to filter chunks by their ids or tracking ids. This is useful for when you want to filter chunks by their ids or tracking ids.
4217+
pub struct HasChunkIDCondition {
4218+
/// Ids of the chunks to apply a match_any condition with. Only chunks with one of these ids will be returned.
42144219
pub ids: Option<Vec<uuid::Uuid>>,
4220+
/// Tracking ids of the chunks to apply a match_any condition with. Only chunks with one of these tracking ids will be returned.
42154221
pub tracking_ids: Option<Vec<String>>,
42164222
}
42174223

@@ -4226,8 +4232,9 @@ pub struct HasIDCondition {
42264232
"lt": 1.0
42274233
}
42284234
}))]
4235+
/// FieldCondition is a JSON object which can be used to filter chunks by a field. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.
42294236
pub struct FieldCondition {
4230-
/// Field is the name of the field to filter on. The field value will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.
4237+
/// Field is the name of the field to filter on. Commonly used fields are `timestamp`, `link`, `tag_set`, `location`, `num_value`, `group_ids`, and `group_tracking_ids`. The field value will be used to check for an exact substring match on the metadata values for each existing chunk. This is useful for when you want to filter chunks by arbitrary metadata. To access fields inside of the metadata that you provide with the card, prefix the field name with `metadata.`.
42314238
pub field: String,
42324239
/// Match any lets you pass in an array of values that will return results if any of the items match. The match value will be used to check for an exact substring match on the metadata values for each existing chunk. If both match_all and match_any are provided, the match_any condition will be used.
42334240
#[serde(alias = "match")]

server/src/handlers/chunk_handler.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ pub async fn update_chunk_by_tracking_id(
948948
}
949949
]
950950
}))]
951-
/// Filters is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
951+
/// ChunkFilter is a JSON object which can be used to filter chunks. This is useful for when you want to filter chunks by arbitrary metadata. Unlike with tag filtering, there is a performance hit for filtering on metadata.
952952
pub struct ChunkFilter {
953953
/// Only one of these field conditions has to match for the chunk to be included in the result set.
954954
pub should: Option<Vec<ConditionType>>,

server/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ impl Modify for SecurityAddon {
519519
data::models::ContentChunkMetadata,
520520
data::models::ChunkMetadataStringTagSet,
521521
data::models::ConditionType,
522-
data::models::HasIDCondition,
522+
data::models::HasChunkIDCondition,
523523
data::models::DistanceMetric,
524524
data::models::PublicDatasetOptions,
525525
data::models::Invitation,

server/src/operators/search_operator.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use super::typo_operator::correct_query;
1717
use crate::data::models::{
1818
convert_to_date_time, ChunkGroup, ChunkGroupAndFileId, ChunkMetadata,
1919
ChunkMetadataStringTagSet, ChunkMetadataTypes, ConditionType, ContentChunkMetadata, Dataset,
20-
DatasetConfiguration, HasIDCondition, MmrOptions, QdrantChunkMetadata, QdrantSortBy,
20+
DatasetConfiguration, HasChunkIDCondition, MmrOptions, QdrantChunkMetadata, QdrantSortBy,
2121
QueryTypes, ReRankOptions, RedisPool, ScoreChunk, ScoreChunkDTO, SearchMethod,
2222
SlimChunkMetadata, SortByField, SortBySearchType, SortOptions, UnifiedId,
2323
};
@@ -157,7 +157,7 @@ async fn convert_group_tracking_ids_to_group_ids(
157157
}
158158

159159
pub async fn get_qdrant_ids_from_condition(
160-
cond: HasIDCondition,
160+
cond: HasChunkIDCondition,
161161
pool: web::Data<Pool>,
162162
) -> Result<Vec<String>, ServiceError> {
163163
if let Some(ids) = cond.ids {
@@ -220,7 +220,7 @@ pub async fn assemble_qdrant_filter(
220220
filter.should.push(condition);
221221
}
222222
}
223-
ConditionType::HasID(cond) => {
223+
ConditionType::HasChunkId(cond) => {
224224
filter.should.push(Condition::has_id(
225225
get_qdrant_ids_from_condition(cond, pool.clone()).await?,
226226
));
@@ -245,7 +245,7 @@ pub async fn assemble_qdrant_filter(
245245
filter.must.push(condition);
246246
}
247247
}
248-
ConditionType::HasID(cond) => {
248+
ConditionType::HasChunkId(cond) => {
249249
filter.must.push(Condition::has_id(
250250
get_qdrant_ids_from_condition(cond, pool.clone()).await?,
251251
));
@@ -270,7 +270,7 @@ pub async fn assemble_qdrant_filter(
270270
filter.must_not.push(condition);
271271
}
272272
}
273-
ConditionType::HasID(cond) => {
273+
ConditionType::HasChunkId(cond) => {
274274
filter.must_not.push(Condition::has_id(
275275
get_qdrant_ids_from_condition(cond, pool.clone()).await?,
276276
));

0 commit comments

Comments
 (0)