diff --git a/clients/search-component/example/src/routes/ecommerce.tsx b/clients/search-component/example/src/routes/ecommerce.tsx index 221be1ffed..06c0e1fcf8 100644 --- a/clients/search-component/example/src/routes/ecommerce.tsx +++ b/clients/search-component/example/src/routes/ecommerce.tsx @@ -96,6 +96,7 @@ export default function ECommerce() { tags={defaultTags} floatingButtonPosition={floatingButtonPosition} showFloatingButton={showFloatingButton} + debounceMs={500} /> ) : ( diff --git a/clients/search-component/src/utils/hooks/modal-context.tsx b/clients/search-component/src/utils/hooks/modal-context.tsx index d796047152..4b8dcc68d9 100644 --- a/clients/search-component/src/utils/hooks/modal-context.tsx +++ b/clients/search-component/src/utils/hooks/modal-context.tsx @@ -15,6 +15,7 @@ import { import { countChunks, countChunksWithPagefind, + groupSearchWithPagefind, groupSearchWithTrieve, searchWithPagefind, searchWithTrieve, @@ -155,9 +156,10 @@ const ModalContext = createContext<{ currentGroup: ChunkGroup | null; setCurrentGroup: React.Dispatch>; tagCounts: CountChunkQueryResponseBody[]; + pagefind?: any; }>({ props: defaultProps, - trieveSDK: (() => {}) as unknown as TrieveSDK, + trieveSDK: (() => { }) as unknown as TrieveSDK, query: "", results: [], loadingResults: false, @@ -165,19 +167,20 @@ const ModalContext = createContext<{ inputRef: { current: null }, modalRef: { current: null }, mode: "search", - setMode: () => {}, - setOpen: () => {}, - setQuery: () => {}, - setResults: () => {}, + setMode: () => { }, + setOpen: () => { }, + setQuery: () => { }, + setResults: () => { }, requestID: "", - setRequestID: () => {}, - setLoadingResults: () => {}, - setCurrentTag: () => {}, + setRequestID: () => { }, + setLoadingResults: () => { }, + setCurrentTag: () => { }, currentTag: "all", currentGroup: null, - setCurrentGroup: () => {}, + setCurrentGroup: () => { }, tagCounts: [], - setContextProps: () => {}, + setContextProps: () => { }, + pagefind: null, }); const ModalProvider = ({ @@ -205,6 +208,7 @@ const ModalProvider = ({ const [currentTag, setCurrentTag] = useState( props.tags?.find((t) => t.selected)?.tag || "all" ); + const [pagefind, setPagefind] = useState(undefined); const [currentGroup, setCurrentGroup] = useState(null); @@ -222,7 +226,7 @@ const ModalProvider = ({ try { setLoadingResults(true); - if (props.useGroupSearch) { + if (props.useGroupSearch && !props.pagefindOptions?.usePagefind) { const results = await groupSearchWithTrieve({ query: query, searchOptions: props.searchOptions, @@ -244,10 +248,31 @@ const ModalProvider = ({ setResults(Array.from(groupMap.values())); setRequestID(results.requestID); - } else if (props.pagefindOptions?.usePagefind) { + } else if (props.useGroupSearch && props.pagefindOptions?.usePagefind) { + + const results = await groupSearchWithPagefind( + pagefind, + query, + props.datasetId, + currentTag !== "all" ? currentTag : undefined + ); + console.log("pagefindGMdd", results); + const groupMap = new Map(); + results.groups.forEach((group) => { + const title = group.chunks[0].chunk.metadata?.title; + if (groupMap.has(title)) { + groupMap.get(title)?.push(group); + } else { + groupMap.set(title, [group]); + } + }); + console.log("pagefindGM", groupMap); + setResults(Array.from(groupMap.values())); + + } else if (!props.useGroupSearch && props.pagefindOptions?.usePagefind) { const results = await searchWithPagefind( + pagefind, query, - props.pagefindOptions, props.datasetId, currentTag !== "all" ? currentTag : undefined ); @@ -283,10 +308,9 @@ const ModalProvider = ({ } if (props.tags?.length) { if (props.pagefindOptions?.usePagefind) { - let filterCounts = await countChunksWithPagefind( + const filterCounts = await countChunksWithPagefind( + pagefind, query, - props.pagefindOptions, - props.datasetId, props.tags ); console.log("fildas", filterCounts, props.tags); @@ -323,6 +347,18 @@ const ModalProvider = ({ })); }, [onLoadProps]); + useEffect(() => { + if (props.pagefindOptions?.usePagefind) { + const pagefind_base_url = `${props?.pagefindOptions.cdnBaseUrl}/${props.datasetId}`; + import(`${pagefind_base_url}/pagefind.js`).then((pagefind) => { + setPagefind(pagefind) + pagefind.filters().then(() => { + console.log("pagefind filters loaded"); + }) + }); + } + }, []); + useEffect(() => { props.onOpenChange?.(open); }, [open]); diff --git a/clients/search-component/src/utils/trieve.ts b/clients/search-component/src/utils/trieve.ts index 40b608a74c..213e40a8be 100644 --- a/clients/search-component/src/utils/trieve.ts +++ b/clients/search-component/src/utils/trieve.ts @@ -5,7 +5,7 @@ import { SearchResponseBody, TrieveSDK, } from "trieve-ts-sdk"; -import { Chunk, GroupSearchResults, Props, SearchResults } from "./types"; +import { Chunk, ChunkWithHighlights, GroupChunk, GroupSearchResults, Props, SearchResults } from "./types"; import { defaultHighlightOptions, highlightText } from "./highlight"; import { ModalTypes, PagefindOptions } from "./hooks/modal-context"; @@ -308,15 +308,11 @@ export const getAllChunksForGroup = async ( }; export const searchWithPagefind = async ( + pagefind: any, query: string, - pagefindOptions: PagefindOptions, datasetId: string, tag?: string, ) => { - const pagefind_base_url = `${pagefindOptions.cdnBaseUrl}/${datasetId}`; - - const pagefind = await import(`${pagefind_base_url}/pagefind.js`); - console.log("searchWithPagefind B"); const response = await pagefind.search(query, tag && { filters: { @@ -356,10 +352,93 @@ export const searchWithPagefind = async ( return pagefindResultsMappedToTrieve; }; -export const countChunksWithPagefind = async ( +export const groupSearchWithPagefind = async ( + pagefind: any, query: string, - pagefindOptions: PagefindOptions, datasetId: string, + tag?: string, +): Promise => { + const response = await pagefind.search(query, + tag && { + filters: { + tag_set: tag + } + } + ); + + const results = await Promise.all(response.results.map(async (result: any) => { + return await result.data(); + })); + + const groupMap = new Map(); + + let i = 0; + for (const result of results) { + const chunkWithHighlights = { + chunk: { + chunk_html: result.content, + link: result.url, + metadata: result.meta, + created_at: "", + dataset_id: datasetId, + id: i.toString(), + image_urls: result.meta.image_urls.split(", "), + location: null, + num_value: null, + tag_set: result.meta.tag_set, + time_stamp: null, + tracking_id: null, + updated_at: "", + weight: 0 + + }, + highlights: [] + } as unknown as ChunkWithHighlights; + + const group = result.meta.group_ids; + if (groupMap.has(group)) { + groupMap.get(group)?.push(chunkWithHighlights); + } else { + groupMap.set(group, [chunkWithHighlights]); + } + i++; + + if (groupMap.size >= 10 || i >= 20) { + break; + } + } + + console.log("groupMap", groupMap); + const groups: GroupChunk[] = []; + groupMap.entries().forEach(([group_id, chunks]) => { + console.log("group_id", group_id); + groups.push({ + chunks: chunks, + group: { + created_at: "", + dataset_id: datasetId, + description: "", + id: group_id, + metadata: null, + name: "", + tag_set: "", + tracking_id: null, + updated_at: "" + }, + requestID: "", + } as unknown as GroupChunk + ); + }); + + return { + groups: groups, + requestID: "", + } as unknown as GroupSearchResults; +}; + +export const countChunksWithPagefind = async ( + pagefind: any, + query: string, tags: { tag: string; label?: string; @@ -368,14 +447,12 @@ export const countChunksWithPagefind = async ( icon?: () => JSX.Element; }[] ): Promise => { - const pagefind_base_url = `${pagefindOptions.cdnBaseUrl}/${datasetId}`; - - const pagefind = await import(`${pagefind_base_url}/pagefind.js`); - console.log("filters load"); - console.log("filters"); + let queryParam: string | null = query; + if (query.trim() === "") { + queryParam = null; + } - await pagefind.filters(); - const response = await pagefind.search(query); + const response = await pagefind.search(queryParam); const tag_set = response.filters.tag_set; console.log("tag_set", response); diff --git a/server/src/data/models.rs b/server/src/data/models.rs index 10b07601b3..962ec04226 100644 --- a/server/src/data/models.rs +++ b/server/src/data/models.rs @@ -1013,6 +1013,7 @@ pub struct QdrantChunkMetadata { pub image_urls: Option>, pub tag_set: Option>, pub num_value: Option, + pub group_ids: Option>, } impl From for QdrantChunkMetadata { @@ -1169,6 +1170,24 @@ impl From for QdrantChunkMetadata { }) => Some(*num_value), _ => None, }; + let group_ids: Option> = match search_result.payload.get("group_ids") { + Some(qdrant::Value { + kind: Some(Kind::ListValue(group_ids)), + .. + }) => Some( + group_ids + .iter() + .filter_map(|id| match id { + qdrant::Value { + kind: Some(Kind::StringValue(id)), + .. + } => uuid::Uuid::parse_str(id).ok(), + _ => None + }) + .collect(), + ), + _ => None, + }; QdrantChunkMetadata { link, @@ -1183,6 +1202,7 @@ impl From for QdrantChunkMetadata { image_urls: images_urls, tag_set, num_value, + group_ids, } } } diff --git a/server/src/operators/pagefind_operator.rs b/server/src/operators/pagefind_operator.rs index 7c43984ed2..5189bc4e7e 100644 --- a/server/src/operators/pagefind_operator.rs +++ b/server/src/operators/pagefind_operator.rs @@ -86,6 +86,17 @@ pub async fn build_index_for_dataset_id( sort_keys.insert("time_stamp".to_string(), time_stamp.to_string()); } + if let Some(group_ids) = payload.group_ids.clone() { + filters.insert( + "group_ids".to_string(), + group_ids.iter().map(|i| i.to_string()).collect(), + ); + meta_keys.insert( + "group_ids".to_string(), + group_ids.iter().map(|i| i.to_string()).collect(), + ); + } + if let Some(image_urls) = payload.image_urls { meta_keys.insert("image_urls".to_string(), image_urls.join(", ")); } @@ -157,7 +168,3 @@ pub async fn build_index_for_dataset_id( Ok(()) } - - - -