Skip to content

Commit

Permalink
feature: group search with pagefind
Browse files Browse the repository at this point in the history
  • Loading branch information
cdxker committed Dec 17, 2024
1 parent 0f840b8 commit 8b5f373
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 35 deletions.
1 change: 1 addition & 0 deletions clients/search-component/example/src/routes/ecommerce.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export default function ECommerce() {
tags={defaultTags}
floatingButtonPosition={floatingButtonPosition}
showFloatingButton={showFloatingButton}
debounceMs={500}
/>
</>
) : (
Expand Down
68 changes: 52 additions & 16 deletions clients/search-component/src/utils/hooks/modal-context.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
import {
countChunks,
countChunksWithPagefind,
groupSearchWithPagefind,
groupSearchWithTrieve,
searchWithPagefind,
searchWithTrieve,
Expand Down Expand Up @@ -152,29 +153,31 @@ const ModalContext = createContext<{
currentGroup: ChunkGroup | null;
setCurrentGroup: React.Dispatch<React.SetStateAction<ChunkGroup | null>>;
tagCounts: CountChunkQueryResponseBody[];
pagefind?: any;
}>({
props: defaultProps,
trieveSDK: (() => {}) as unknown as TrieveSDK,
trieveSDK: (() => { }) as unknown as TrieveSDK,
query: "",
results: [],
loadingResults: false,
open: false,
inputRef: { current: null },
modalRef: { current: null },
mode: "search",
setMode: () => {},
setOpen: () => {},
setQuery: () => {},
setResults: () => {},
setMode: () => { },
setOpen: () => { },
setQuery: () => { },
setResults: () => { },
requestID: "",
setRequestID: () => {},
setLoadingResults: () => {},
setCurrentTag: () => {},
setRequestID: () => { },
setLoadingResults: () => { },
setCurrentTag: () => { },
currentTag: "all",
currentGroup: null,
setCurrentGroup: () => {},
setCurrentGroup: () => { },
tagCounts: [],
setContextProps: () => {},
setContextProps: () => { },
pagefind: null,
});

const ModalProvider = ({
Expand Down Expand Up @@ -202,6 +205,7 @@ const ModalProvider = ({
const [currentTag, setCurrentTag] = useState(
props.tags?.find((t) => t.selected)?.tag || "all"
);
const [pagefind, setPagefind] = useState<any>(undefined);

const [currentGroup, setCurrentGroup] = useState<ChunkGroup | null>(null);

Expand All @@ -219,7 +223,7 @@ const ModalProvider = ({

try {
setLoadingResults(true);
if (props.useGroupSearch) {
if (props.useGroupSearch && !props.pagefindOptions?.usePagefind) {
const results = await groupSearchWithTrieve({
query: query,
searchOptions: props.searchOptions,
Expand All @@ -241,10 +245,31 @@ const ModalProvider = ({

setResults(Array.from(groupMap.values()));
setRequestID(results.requestID);
} else if (props.pagefindOptions?.usePagefind) {
} else if (props.useGroupSearch && props.pagefindOptions?.usePagefind) {

const results = await groupSearchWithPagefind(
pagefind,
query,
props.datasetId,
currentTag !== "all" ? currentTag : undefined
);
console.log("pagefindGMdd", results);
const groupMap = new Map<string, GroupChunk[]>();
results.groups.forEach((group) => {
const title = group.chunks[0].chunk.metadata?.title;
if (groupMap.has(title)) {
groupMap.get(title)?.push(group);
} else {
groupMap.set(title, [group]);
}
});
console.log("pagefindGM", groupMap);
setResults(Array.from(groupMap.values()));

} else if (!props.useGroupSearch && props.pagefindOptions?.usePagefind) {
const results = await searchWithPagefind(
pagefind,
query,
props.pagefindOptions,
props.datasetId,
currentTag !== "all" ? currentTag : undefined
);
Expand Down Expand Up @@ -280,10 +305,9 @@ const ModalProvider = ({
}
if (props.tags?.length) {
if (props.pagefindOptions?.usePagefind) {
let filterCounts = await countChunksWithPagefind(
const filterCounts = await countChunksWithPagefind(
pagefind,
query,
props.pagefindOptions,
props.datasetId,
props.tags
);
console.log("fildas", filterCounts, props.tags);
Expand Down Expand Up @@ -320,6 +344,18 @@ const ModalProvider = ({
}));
}, [onLoadProps]);

useEffect(() => {
if (props.pagefindOptions?.usePagefind) {
const pagefind_base_url = `${props?.pagefindOptions.cdnBaseUrl}/${props.datasetId}`;
import(`${pagefind_base_url}/pagefind.js`).then((pagefind) => {
setPagefind(pagefind)
pagefind.filters().then(() => {
console.log("pagefind filters loaded");
})
});
}
}, []);

useEffect(() => {
props.onOpenChange?.(open);
}, [open]);
Expand Down
107 changes: 92 additions & 15 deletions clients/search-component/src/utils/trieve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import {
SearchResponseBody,
TrieveSDK,
} from "trieve-ts-sdk";
import { Chunk, GroupSearchResults, Props, SearchResults } from "./types";
import { Chunk, ChunkWithHighlights, GroupChunk, GroupSearchResults, Props, SearchResults } from "./types";
import { defaultHighlightOptions, highlightText } from "./highlight";
import { ModalTypes, PagefindOptions } from "./hooks/modal-context";

Expand Down Expand Up @@ -308,15 +308,11 @@ export const getAllChunksForGroup = async (
};

export const searchWithPagefind = async (
pagefind: any,
query: string,
pagefindOptions: PagefindOptions,
datasetId: string,
tag?: string,
) => {
const pagefind_base_url = `${pagefindOptions.cdnBaseUrl}/${datasetId}`;

const pagefind = await import(`${pagefind_base_url}/pagefind.js`);
console.log("searchWithPagefind B");
const response = await pagefind.search(query,
tag && {
filters: {
Expand Down Expand Up @@ -356,10 +352,93 @@ export const searchWithPagefind = async (
return pagefindResultsMappedToTrieve;
};

export const countChunksWithPagefind = async (
export const groupSearchWithPagefind = async (
pagefind: any,
query: string,
pagefindOptions: PagefindOptions,
datasetId: string,
tag?: string,
): Promise<GroupSearchResults> => {
const response = await pagefind.search(query,
tag && {
filters: {
tag_set: tag
}
}
);

const results = await Promise.all(response.results.map(async (result: any) => {
return await result.data();
}));

const groupMap = new Map<string, ChunkWithHighlights[]>();

let i = 0;
for (const result of results) {
const chunkWithHighlights = {
chunk: {
chunk_html: result.content,
link: result.url,
metadata: result.meta,
created_at: "",
dataset_id: datasetId,
id: i.toString(),
image_urls: result.meta.image_urls.split(", "),
location: null,
num_value: null,
tag_set: result.meta.tag_set,
time_stamp: null,
tracking_id: null,
updated_at: "",
weight: 0

},
highlights: []
} as unknown as ChunkWithHighlights;

const group = result.meta.group_ids;
if (groupMap.has(group)) {
groupMap.get(group)?.push(chunkWithHighlights);
} else {
groupMap.set(group, [chunkWithHighlights]);
}
i++;

if (groupMap.size >= 10 || i >= 20) {
break;
}
}

console.log("groupMap", groupMap);
const groups: GroupChunk[] = [];
groupMap.entries().forEach(([group_id, chunks]) => {
console.log("group_id", group_id);
groups.push({
chunks: chunks,
group: {
created_at: "",
dataset_id: datasetId,
description: "",
id: group_id,
metadata: null,
name: "",
tag_set: "",
tracking_id: null,
updated_at: ""
},
requestID: "",
} as unknown as GroupChunk
);
});

return {
groups: groups,
requestID: "",
} as unknown as GroupSearchResults;
};

export const countChunksWithPagefind = async (
pagefind: any,
query: string,
tags: {
tag: string;
label?: string;
Expand All @@ -368,14 +447,12 @@ export const countChunksWithPagefind = async (
icon?: () => JSX.Element;
}[]
): Promise<CountChunkQueryResponseBody[]> => {
const pagefind_base_url = `${pagefindOptions.cdnBaseUrl}/${datasetId}`;

const pagefind = await import(`${pagefind_base_url}/pagefind.js`);
console.log("filters load");
console.log("filters");
let queryParam: string | null = query;
if (query.trim() === "") {
queryParam = null;
}

await pagefind.filters();
const response = await pagefind.search(query);
const response = await pagefind.search(queryParam);

const tag_set = response.filters.tag_set;
console.log("tag_set", response);
Expand Down
20 changes: 20 additions & 0 deletions server/src/data/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,7 @@ pub struct QdrantChunkMetadata {
pub image_urls: Option<Vec<String>>,
pub tag_set: Option<Vec<String>>,
pub num_value: Option<f64>,
pub group_ids: Option<Vec<uuid::Uuid>>,
}

impl From<SearchResult> for QdrantChunkMetadata {
Expand Down Expand Up @@ -1169,6 +1170,24 @@ impl From<SearchResult> for QdrantChunkMetadata {
}) => Some(*num_value),
_ => None,
};
let group_ids: Option<Vec<uuid::Uuid>> = match search_result.payload.get("group_ids") {
Some(qdrant::Value {
kind: Some(Kind::ListValue(group_ids)),
..
}) => Some(
group_ids
.iter()
.filter_map(|id| match id {
qdrant::Value {
kind: Some(Kind::StringValue(id)),
..
} => uuid::Uuid::parse_str(id).ok(),
_ => None
})
.collect(),
),
_ => None,
};

QdrantChunkMetadata {
link,
Expand All @@ -1183,6 +1202,7 @@ impl From<SearchResult> for QdrantChunkMetadata {
image_urls: images_urls,
tag_set,
num_value,
group_ids,
}
}
}
Expand Down
15 changes: 11 additions & 4 deletions server/src/operators/pagefind_operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,17 @@ pub async fn build_index_for_dataset_id(
sort_keys.insert("time_stamp".to_string(), time_stamp.to_string());
}

if let Some(group_ids) = payload.group_ids.clone() {
filters.insert(
"group_ids".to_string(),
group_ids.iter().map(|i| i.to_string()).collect(),
);
meta_keys.insert(
"group_ids".to_string(),
group_ids.iter().map(|i| i.to_string()).collect(),
);
}

if let Some(image_urls) = payload.image_urls {
meta_keys.insert("image_urls".to_string(), image_urls.join(", "));
}
Expand Down Expand Up @@ -157,7 +168,3 @@ pub async fn build_index_for_dataset_id(

Ok(())
}




0 comments on commit 8b5f373

Please sign in to comment.