Skip to content

Commit 962ff9c

Browse files
densumeshskeptrunedev
authored andcommitted
feature: readd recency bias for groups
1 parent c1bd516 commit 962ff9c

File tree

1 file changed

+62
-40
lines changed

1 file changed

+62
-40
lines changed

server/src/operators/search_operator.rs

+62-40
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ use super::typo_operator::correct_query;
1717
use crate::data::models::{
1818
convert_to_date_time, ChunkGroup, ChunkGroupAndFileId, ChunkMetadata,
1919
ChunkMetadataStringTagSet, ChunkMetadataTypes, ConditionType, ContentChunkMetadata, Dataset,
20-
DatasetConfiguration, GeoInfoWithBias, HasIDCondition, QdrantChunkMetadata, QdrantSortBy,
21-
QueryTypes, ReRankOptions, RedisPool, ScoreChunk, ScoreChunkDTO, SearchMethod,
22-
SlimChunkMetadata, SortByField, SortBySearchType, SortOptions, UnifiedId,
20+
DatasetConfiguration, HasIDCondition, QdrantChunkMetadata, QdrantSortBy, QueryTypes,
21+
ReRankOptions, RedisPool, ScoreChunk, ScoreChunkDTO, SearchMethod, SlimChunkMetadata,
22+
SortByField, SortBySearchType, SortOptions, UnifiedId,
2323
};
2424
use crate::handlers::chunk_handler::{
2525
AutocompleteReqPayload, ChunkFilter, CountChunkQueryResponseBody, CountChunksReqPayload,
@@ -1630,12 +1630,15 @@ pub fn rerank_chunks(
16301630

16311631
pub fn rerank_groups(
16321632
groups: Vec<GroupScoreChunk>,
1633-
tag_weights: Option<HashMap<String, f32>>,
1634-
use_weights: Option<bool>,
1635-
query_location: Option<GeoInfoWithBias>,
1633+
sort_options: Option<SortOptions>,
16361634
) -> Vec<GroupScoreChunk> {
16371635
let mut reranked_groups = Vec::new();
1638-
if use_weights.unwrap_or(true) {
1636+
if sort_options.is_none() {
1637+
return groups;
1638+
}
1639+
1640+
let sort_options = sort_options.unwrap();
1641+
if sort_options.use_weights.unwrap_or(true) {
16391642
groups.into_iter().for_each(|mut group| {
16401643
let first_chunk = group.metadata.get_mut(0).unwrap();
16411644
if first_chunk.metadata[0].metadata().weight == 0.0 {
@@ -1649,8 +1652,55 @@ pub fn rerank_groups(
16491652
reranked_groups = groups;
16501653
}
16511654

1652-
if query_location.is_some() && query_location.unwrap().bias > 0.0 {
1653-
let info_with_bias = query_location.unwrap();
1655+
if sort_options.recency_bias.is_some() && sort_options.recency_bias.unwrap() > 0.0 {
1656+
let recency_weight = sort_options.recency_bias.unwrap();
1657+
let min_timestamp = reranked_groups
1658+
.iter()
1659+
.filter_map(|group| group.metadata[0].metadata[0].metadata().time_stamp)
1660+
.min();
1661+
let max_timestamp = reranked_groups
1662+
.iter()
1663+
.filter_map(|group| group.metadata[0].metadata[0].metadata().time_stamp)
1664+
.max();
1665+
let max_score = reranked_groups
1666+
.iter()
1667+
.map(|group| group.metadata[0].score)
1668+
.max_by(|a, b| a.partial_cmp(b).unwrap());
1669+
let min_score = reranked_groups
1670+
.iter()
1671+
.map(|group| group.metadata[0].score)
1672+
.min_by(|a, b| a.partial_cmp(b).unwrap());
1673+
1674+
if let (Some(min), Some(max)) = (min_timestamp, max_timestamp) {
1675+
let min_duration = chrono::Utc::now().signed_duration_since(min.and_utc());
1676+
let max_duration = chrono::Utc::now().signed_duration_since(max.and_utc());
1677+
1678+
reranked_groups = reranked_groups
1679+
.iter_mut()
1680+
.map(|group| {
1681+
let first_chunk = group.metadata.get_mut(0).unwrap();
1682+
if let Some(time_stamp) = first_chunk.metadata[0].metadata().time_stamp {
1683+
let duration =
1684+
chrono::Utc::now().signed_duration_since(time_stamp.and_utc());
1685+
let normalized_recency_score = (duration.num_seconds() as f32
1686+
- min_duration.num_seconds() as f32)
1687+
/ (max_duration.num_seconds() as f32
1688+
- min_duration.num_seconds() as f32);
1689+
1690+
let normalized_chunk_score = (first_chunk.score - min_score.unwrap_or(0.0))
1691+
/ (max_score.unwrap_or(1.0) - min_score.unwrap_or(0.0));
1692+
1693+
first_chunk.score = (normalized_chunk_score * (1.0 / recency_weight) as f64)
1694+
+ (recency_weight * normalized_recency_score) as f64
1695+
}
1696+
group.clone()
1697+
})
1698+
.collect::<Vec<GroupScoreChunk>>();
1699+
}
1700+
}
1701+
1702+
if sort_options.location_bias.is_some() && sort_options.location_bias.unwrap().bias > 0.0 {
1703+
let info_with_bias = sort_options.location_bias.unwrap();
16541704
let query_location = info_with_bias.location;
16551705
let location_bias = info_with_bias.bias;
16561706
let distances = reranked_groups
@@ -1688,7 +1738,7 @@ pub fn rerank_groups(
16881738
.collect::<Vec<GroupScoreChunk>>();
16891739
}
16901740

1691-
if let Some(tag_weights) = tag_weights {
1741+
if let Some(tag_weights) = sort_options.tag_weights {
16921742
reranked_groups = reranked_groups
16931743
.iter_mut()
16941744
.map(|group| {
@@ -2545,21 +2595,7 @@ pub async fn search_over_groups_query(
25452595

25462596
timer.add("fetched from postgres");
25472597

2548-
result_chunks.group_chunks = rerank_groups(
2549-
result_chunks.group_chunks,
2550-
data.sort_options
2551-
.as_ref()
2552-
.map(|d| d.tag_weights.clone())
2553-
.unwrap_or_default(),
2554-
data.sort_options
2555-
.as_ref()
2556-
.map(|d| d.use_weights)
2557-
.unwrap_or_default(),
2558-
data.sort_options
2559-
.as_ref()
2560-
.map(|d| d.location_bias)
2561-
.unwrap_or_default(),
2562-
);
2598+
result_chunks.group_chunks = rerank_groups(result_chunks.group_chunks, data.sort_options);
25632599

25642600
result_chunks.corrected_query = corrected_query.map(|c| c.query);
25652601

@@ -2771,21 +2807,7 @@ pub async fn hybrid_search_over_groups(
27712807
});
27722808
}
27732809

2774-
reranked_chunks = rerank_groups(
2775-
reranked_chunks,
2776-
data.sort_options
2777-
.as_ref()
2778-
.map(|d| d.tag_weights.clone())
2779-
.unwrap_or_default(),
2780-
data.sort_options
2781-
.as_ref()
2782-
.map(|d| d.use_weights)
2783-
.unwrap_or_default(),
2784-
data.sort_options
2785-
.as_ref()
2786-
.map(|d| d.location_bias)
2787-
.unwrap_or_default(),
2788-
);
2810+
reranked_chunks = rerank_groups(reranked_chunks, data.sort_options);
27892811

27902812
let result_chunks = DeprecatedSearchOverGroupsResponseBody {
27912813
group_chunks: reranked_chunks,

0 commit comments

Comments
 (0)