diff --git a/apps/mobile/src/lib/search/index.ts b/apps/mobile/src/lib/search/index.ts index 6ee486d..10e05b4 100644 --- a/apps/mobile/src/lib/search/index.ts +++ b/apps/mobile/src/lib/search/index.ts @@ -82,6 +82,10 @@ export const rehydrateOramaDb = async (): Promise => { entry.examples, z.array(ExampleSchema) ); + const morphologyResult = safeJsonParse( + entry.morphology, + MorphologySchema + ); if ( !( @@ -94,9 +98,14 @@ export const rehydrateOramaDb = async (): Promise => { continue; } + const morphology = morphologyResult.ok + ? morphologyResult.value + : undefined; + documents.push({ id: entry.id, word: entry.word, + word_exact: entry.word, translation: entry.translation, created_at: entry.created_at ?? undefined, created_at_timestamp_ms: entry.created_at_timestamp_ms ?? undefined, @@ -106,6 +115,30 @@ export const rehydrateOramaDb = async (): Promise => { type: entry.type ?? undefined, root: rootResult.value ?? undefined, tags: tagsResult.value ?? undefined, + morphology: morphology + ? { + ism: morphology.ism + ? { + singular: morphology.ism.singular, + plurals: morphology.ism.plurals?.map((p) => p.word), + singular_exact: morphology.ism.singular, + plurals_exact: morphology.ism.plurals?.map((p) => p.word), + } + : undefined, + verb: morphology.verb + ? { + past_tense: morphology.verb.past_tense, + present_tense: morphology.verb.present_tense, + masadir: morphology.verb.masadir?.map((m) => m.word), + past_tense_exact: morphology.verb.past_tense, + present_tense_exact: morphology.verb.present_tense, + masadir_exact: morphology.verb.masadir?.map( + (m) => m.word + ), + } + : undefined, + } + : undefined, }); } @@ -229,9 +262,14 @@ export const hydrateOramaDb = async (): Promise< continue; } + const morphology = morphologyResult.ok + ? morphologyResult.value + : undefined; + documents.push({ id: entry.id, word: entry.word, + word_exact: entry.word, translation: entry.translation, created_at: entry.created_at ?? undefined, created_at_timestamp_ms: entry.created_at_timestamp_ms ?? undefined, @@ -241,6 +279,30 @@ export const hydrateOramaDb = async (): Promise< type: entry.type ?? undefined, root: rootResult.value ?? undefined, tags: tagsResult.value ?? undefined, + morphology: morphology + ? { + ism: morphology.ism + ? { + singular: morphology.ism.singular, + plurals: morphology.ism.plurals?.map((p) => p.word), + singular_exact: morphology.ism.singular, + plurals_exact: morphology.ism.plurals?.map((p) => p.word), + } + : undefined, + verb: morphology.verb + ? { + past_tense: morphology.verb.past_tense, + present_tense: morphology.verb.present_tense, + masadir: morphology.verb.masadir?.map((m) => m.word), + past_tense_exact: morphology.verb.past_tense, + present_tense_exact: morphology.verb.present_tense, + masadir_exact: morphology.verb.masadir?.map( + (m) => m.word + ), + } + : undefined, + } + : undefined, }); } diff --git a/apps/web/src/atoms/suggested-tags.ts b/apps/web/src/atoms/suggested-tags.ts new file mode 100644 index 0000000..9014bb3 --- /dev/null +++ b/apps/web/src/atoms/suggested-tags.ts @@ -0,0 +1,16 @@ +import { atomWithStorage } from "jotai/utils"; + +export const suggestedTagsAtom = atomWithStorage( + "suggested-tags", + [], + { + getItem: (key) => { + const val = sessionStorage.getItem(key); + return val ? JSON.parse(val) : []; + }, + setItem: (key, val) => { + sessionStorage.setItem(key, JSON.stringify(val)); + }, + removeItem: (key) => sessionStorage.removeItem(key), + } +); diff --git a/apps/web/src/components/TagsCombobox.tsx b/apps/web/src/components/TagsCombobox.tsx new file mode 100644 index 0000000..98589af --- /dev/null +++ b/apps/web/src/components/TagsCombobox.tsx @@ -0,0 +1,381 @@ +import { cn } from "@bahar/design-system"; +import { useDebounce } from "@uidotdev/usehooks"; +import { Check, ChevronDown, X } from "lucide-react"; +import { + type KeyboardEvent, + type ReactNode, + startTransition, + useCallback, + useEffect, + useRef, + useState, +} from "react"; + +export interface TagsComboboxProps { + /** Selected tag values */ + value: string[]; + onValueChange: (value: string[]) => void; + + /** Query function to fetch options */ + queryFn: (search: string) => Promise; + + /** Extract the string value from each option */ + getOptionValue: (option: T) => string; + + /** Extract display label (defaults to value) */ + getOptionLabel?: (option: T) => string; + placeholder?: string; + + /** Allow creating new tags not in options */ + allowCreate?: boolean; + + /** Custom render for "create new" option */ + renderCreateOption?: (inputValue: string) => ReactNode; + debounceMs?: number; + disabled?: boolean; + + /** Class name for root element */ + className?: string; +} + +export function TagsCombobox({ + value, + onValueChange, + queryFn, + getOptionValue, + getOptionLabel, + placeholder = "Search...", + allowCreate = true, + renderCreateOption, + debounceMs = 500, + disabled = false, + className, +}: TagsComboboxProps) { + const [inputValue, setInputValue] = useState(""); + const [isOpen, setIsOpen] = useState(false); + const [highlightedIndex, setHighlightedIndex] = useState(-1); + const [options, setOptions] = useState([]); + const [isLoading, setIsLoading] = useState(false); + + const debouncedInputValue = useDebounce(inputValue, debounceMs); + + const isSearching = inputValue !== debouncedInputValue || isLoading; + + const containerRef = useRef(null); + const inputRef = useRef(null); + const listRef = useRef(null); + + const trimmedInput = inputValue.trim(); + const showCreateOption = + allowCreate && + trimmedInput !== "" && + !options.some((o) => getOptionValue(o) === trimmedInput) && + !value.includes(trimmedInput); + + const totalOptions = options.length + (showCreateOption ? 1 : 0); + + useEffect(() => { + let cancelled = false; + setIsLoading(true); + + const fetchOptions = async () => { + try { + const results = await queryFn(debouncedInputValue); + if (!cancelled) { + startTransition(() => { + setOptions(results); + setIsLoading(false); + }); + } + } catch (_error) { + if (!cancelled) { + startTransition(() => { + setOptions([]); + setIsLoading(false); + }); + } + } + }; + + fetchOptions(); + + return () => { + cancelled = true; + }; + }, [debouncedInputValue, queryFn]); + + useEffect(() => { + const handleClickOutside = (event: MouseEvent) => { + if ( + containerRef.current && + !containerRef.current.contains(event.target as Node) + ) { + setIsOpen(false); + setHighlightedIndex(-1); + setInputValue(""); + } + }; + + document.addEventListener("mousedown", handleClickOutside); + return () => document.removeEventListener("mousedown", handleClickOutside); + }, []); + + // Reset highlight when input changes + useEffect(() => { + setHighlightedIndex(-1); + }, [inputValue]); + + const selectOption = useCallback( + (optionValue: string) => { + if (value.includes(optionValue)) { + onValueChange(value.filter((v) => v !== optionValue)); + } else { + onValueChange([...value, optionValue]); + } + setInputValue(""); + setHighlightedIndex(-1); + inputRef.current?.focus(); + }, + [value, onValueChange] + ); + + const removeTag = useCallback( + (tagValue: string) => { + onValueChange(value.filter((v) => v !== tagValue)); + inputRef.current?.focus(); + }, + [value, onValueChange] + ); + + const handleKeyDown = useCallback( + (event: KeyboardEvent) => { + if (disabled) return; + + switch (event.key) { + case "ArrowDown": + event.preventDefault(); + setIsOpen(true); + setHighlightedIndex((prev) => + prev < totalOptions - 1 ? prev + 1 : prev + ); + break; + + case "ArrowUp": + event.preventDefault(); + setHighlightedIndex((prev) => (prev > 0 ? prev - 1 : prev)); + break; + + case "Enter": + event.preventDefault(); + if (highlightedIndex >= 0 && highlightedIndex < totalOptions) { + if (highlightedIndex < options.length) { + selectOption(getOptionValue(options[highlightedIndex])); + } else if (showCreateOption) { + selectOption(trimmedInput); + } + } else if (showCreateOption && trimmedInput) { + selectOption(trimmedInput); + } + break; + + case "Escape": + setIsOpen(false); + setHighlightedIndex(-1); + break; + + case "Backspace": + if (inputValue === "" && value.length > 0) { + removeTag(value[value.length - 1]); + } + break; + } + }, + [ + disabled, + totalOptions, + highlightedIndex, + options, + showCreateOption, + trimmedInput, + inputValue, + selectOption, + removeTag, + value, + getOptionValue, + ] + ); + + const getLabel = (option: T) => + getOptionLabel ? getOptionLabel(option) : getOptionValue(option); + + return ( +
+ {/* Anchor - Tags + Input */} + {/* biome-ignore lint/a11y/useKeyWithClickEvents: keyboard events handled by input */} + {/* biome-ignore lint/a11y/noStaticElementInteractions: click delegates to input */} + {/* biome-ignore lint/a11y/noNoninteractiveElementInteractions: click delegates to input */} +
{ + if (!disabled) { + inputRef.current?.focus(); + } + }} + > + {value.map((tagValue) => ( + + ))} + + { + // Delay closing to allow clicks on dropdown options to register + if (!containerRef.current?.contains(e.relatedTarget as Node)) { + setTimeout(() => { + setIsOpen(false); + setHighlightedIndex(-1); + setInputValue(""); + }, 150); + } + }} + onChange={(e) => { + setInputValue(e.target.value); + setIsOpen(true); + }} + onFocus={() => setIsOpen(true)} + onKeyDown={handleKeyDown} + placeholder={value.length === 0 ? placeholder : ""} + ref={inputRef} + spellCheck={false} + type="text" + value={inputValue} + /> + + +
+ + {/* Dropdown Content */} + {isOpen && (totalOptions > 0 || isSearching) && ( +
+ {isSearching ? ( +
+ Searching... +
+ ) : ( +
+ {options.map((option, index) => { + const optionValue = getOptionValue(option); + const isHighlighted = index === highlightedIndex; + const isSelected = value.includes(optionValue); + + return ( +
selectOption(optionValue)} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + selectOption(optionValue); + } + }} + onMouseEnter={() => setHighlightedIndex(index)} + role="option" + tabIndex={0} + > + {getLabel(option)} + {isSelected && ( + + + + )} +
+ ); + })} + + {/* Create new option */} + {showCreateOption && ( +
selectOption(trimmedInput)} + onKeyDown={(e) => { + if (e.key === "Enter" || e.key === " ") { + e.preventDefault(); + selectOption(trimmedInput); + } + }} + onMouseEnter={() => setHighlightedIndex(options.length)} + role="option" + tabIndex={0} + > + {renderCreateOption ? ( + renderCreateOption(trimmedInput) + ) : ( + <>Add "{trimmedInput}" + )} +
+ )} +
+ )} +
+ )} +
+ ); +} diff --git a/apps/web/src/components/features/dictionary/add/TagsFormSection.tsx b/apps/web/src/components/features/dictionary/add/TagsFormSection.tsx index 0acff6f..860fb01 100644 --- a/apps/web/src/components/features/dictionary/add/TagsFormSection.tsx +++ b/apps/web/src/components/features/dictionary/add/TagsFormSection.tsx @@ -1,4 +1,3 @@ -import { Badge } from "@bahar/web-ui/components/badge"; import { Card, CardContent, @@ -6,25 +5,38 @@ import { CardHeader, CardTitle, } from "@bahar/web-ui/components/card"; +import { t } from "@lingui/core/macro"; import { Trans } from "@lingui/react/macro"; -import { X } from "lucide-react"; +import { useAtomValue } from "jotai"; +import { Plus } from "lucide-react"; import { useFieldArray, useFormContext } from "react-hook-form"; -import { Autocomplete } from "@/components/Autocomplete"; +import { suggestedTagsAtom } from "@/atoms/suggested-tags"; +import { TagsCombobox } from "@/components/TagsCombobox"; +import { dictionaryEntriesTable } from "@/lib/db/operations/dictionary-entries"; import type { FormSchema } from "@/lib/schemas/dictionary"; import type { z } from "@/lib/zod"; export const TagsFormSection = () => { const form = useFormContext>(); + const suggestedTags = useAtomValue(suggestedTagsAtom); - const { - fields: tagFields, - append: appendTag, - remove: removeTag, - } = useFieldArray({ + const { fields: tagFields, replace: replaceTags } = useFieldArray({ name: "tags", control: form.control, }); + const selectedTags = tagFields.map((field) => field.name); + + const availableSuggestions = suggestedTags.filter( + (tag) => !selectedTags.includes(tag) + ); + + const addTag = (tag: string) => { + if (!selectedTags.includes(tag)) { + replaceTags([...tagFields, { name: tag }]); + } + }; + return ( @@ -37,30 +49,54 @@ export const TagsFormSection = () => { - -
    - {tagFields.map((field, index) => { - return ( - removeTag(index)} - variant="secondary" - > - {field.name} - - - - ); - })} -
- - field.name)} - onClick={(val) => { - appendTag({ name: val }); + + option.tag} + onValueChange={(value) => { + replaceTags(value.map((item) => ({ name: item }))); }} + placeholder={t`Search for a tag...`} + queryFn={dictionaryEntriesTable.tags.query} + renderCreateOption={(value) => Add tag {value}} + value={selectedTags} /> + + {availableSuggestions.length > 0 && ( +
+
+

+ Recently used +

+ + +
+ +
+ {availableSuggestions.map((tag) => ( + + ))} +
+
+ )}
); diff --git a/apps/web/src/hooks/db/index.ts b/apps/web/src/hooks/db/index.ts index 1e74ddd..a1ba31f 100644 --- a/apps/web/src/hooks/db/index.ts +++ b/apps/web/src/hooks/db/index.ts @@ -1,11 +1,12 @@ import { insert, remove, update } from "@orama/orama"; import { useMutation } from "@tanstack/react-query"; +import { useSetAtom } from "jotai"; import { createEmptyCard } from "ts-fsrs"; +import { suggestedTagsAtom } from "@/atoms/suggested-tags"; import { dictionaryEntriesTable } from "@/lib/db/operations/dictionary-entries"; import { flashcardsTable } from "@/lib/db/operations/flashcards"; import { queryClient } from "@/lib/query"; -import { getOramaDb } from "@/lib/search"; -import { nullToUndefined } from "@/lib/utils"; +import { getOramaDb, toOramaDocument } from "@/lib/search"; import { useSearch } from "../useSearch"; /** @@ -13,6 +14,7 @@ import { useSearch } from "../useSearch"; */ export const useAddDictionaryEntry = () => { const { reset } = useSearch(); + const setSuggestedTags = useSetAtom(suggestedTagsAtom); const { mutateAsync } = useMutation({ mutationFn: dictionaryEntriesTable.addWord.mutation, onSuccess: async () => { @@ -35,6 +37,10 @@ export const useAddDictionaryEntry = () => { const newWord = await mutateAsync(params, opts); + if (params.word.tags && params.word.tags.length > 0) { + setSuggestedTags(params.word.tags); + } + const formattedEmptyCard = { ...emptyFlashcard, due: emptyFlashcard.due.toISOString(), @@ -57,7 +63,7 @@ export const useAddDictionaryEntry = () => { }), ]); - insert(getOramaDb(), nullToUndefined(newWord)); + insert(getOramaDb(), toOramaDocument(newWord)); reset(); }, }; @@ -112,7 +118,7 @@ export const useEditDictionaryEntry = () => { ) => { const updatedWord = await mutateAsync(params, opts); - update(getOramaDb(), updatedWord.id, nullToUndefined(updatedWord)); + update(getOramaDb(), updatedWord.id, toOramaDocument(updatedWord)); reset(); }, }; diff --git a/apps/web/src/hooks/useSearch.ts b/apps/web/src/hooks/useSearch.ts index d2bf997..1d231ad 100644 --- a/apps/web/src/hooks/useSearch.ts +++ b/apps/web/src/hooks/useSearch.ts @@ -1,25 +1,20 @@ -import type { SelectDictionaryEntry } from "@bahar/drizzle-user-db-schemas"; -import { - type InternalTypedDocument, - search as oramaSearch, - type Result, - type Results, - type SearchParams, -} from "@orama/orama"; +import { type SearchLanguage, searchDictionary } from "@bahar/search/database"; +import type { DictionaryDocument } from "@bahar/search/schema"; +import type { InternalTypedDocument, Result, Results } from "@orama/orama"; import { atom, useAtom, useSetAtom } from "jotai"; import { useCallback, useEffect, useMemo, useState } from "react"; import { getOramaDb } from "@/lib/search"; -import { detectLanguage, stripArabicDiacritics } from "@/lib/utils"; +import { detectLanguage } from "@/lib/utils"; const SEARCH_RESULTS_PER_PAGE = 20; const searchResultsMetadataAtom = atom>, + Results>, "hits" > | null>(null); const hitsAtom = atom< - Result>[] | null + Result>[] | null >(null); const offsetAtom = atom(0); @@ -38,44 +33,14 @@ export const useSearch = () => { const search = useCallback( ( - params: Omit< - SearchParams>, - "limit" | "mode" - > = {}, - language: "arabic" | "english" = "english" + params: { term?: string; offset?: number } = {}, + language: SearchLanguage = "english" ) => { - const tolerance = (() => { - if (!params.term) return 0; - - const len = stripArabicDiacritics(params.term).length; - - if (len <= 2) return 0; - if (len <= 3) return 1; - - return 2; - })(); - - // Orama's search function is sync by default, - // but it's typed as sync or async since some plugins - // can make it async. We cast type to sync return type - // so it's easier to work with. - return oramaSearch( - getOramaDb(), - { - ...params, - mode: "fulltext", - limit: SEARCH_RESULTS_PER_PAGE, - properties: params.term - ? ["word", "translation", "definition", "tags"] - : undefined, - boost: { - word: 10, - translation: 10, - }, - tolerance, - }, - language - ) as Results>; + return searchDictionary(getOramaDb(), params.term ?? "", { + limit: SEARCH_RESULTS_PER_PAGE, + offset: params.offset, + language, + }) as Results>; }, [] ); @@ -120,7 +85,7 @@ export const useSearch = () => { ? ({ hits, ...searchResultsMetadata, - } as Results>) + } as Results>) : undefined, /** @@ -139,12 +104,7 @@ export const useSearch = () => { * Custom hook that wraps orama's search to implement infinite scrolling * functionality and exposes helper methods for interacting with the results. */ -export const useInfiniteScroll = ( - params: Omit< - SearchParams>, - "limit" | "offset" | "mode" - > = {} -) => { +export const useInfiniteScroll = (params: { term?: string } = {}) => { const { search } = useSearch(); const [hasMore, setHasMore] = useState(true); @@ -212,7 +172,7 @@ export const useInfiniteScroll = ( useEffect(() => { if (!(hits && searchResultsMetadata)) return; - if (hits.length === searchResultsMetadata.count) { + if (hits.length >= searchResultsMetadata.count) { setHasMore(false); } }, [hits, searchResultsMetadata]); @@ -227,7 +187,7 @@ export const useInfiniteScroll = ( ? ({ ...searchResultsMetadata, hits, - } as Results>) + } as Results>) : undefined, }; }; diff --git a/apps/web/src/lib/search/index.ts b/apps/web/src/lib/search/index.ts index e454070..a2791cd 100644 --- a/apps/web/src/lib/search/index.ts +++ b/apps/web/src/lib/search/index.ts @@ -5,6 +5,7 @@ import { MorphologySchema, type RawDictionaryEntry, RootLettersSchema, + type SelectDictionaryEntry, TagsSchema, } from "@bahar/drizzle-user-db-schemas"; import { err, ok } from "@bahar/result"; @@ -12,7 +13,7 @@ import { createDictionaryDatabase, insertDocuments, } from "@bahar/search/database"; -import type { DictionaryOrama } from "@bahar/search/schema"; +import type { DictionaryDocument, DictionaryOrama } from "@bahar/search/schema"; import * as Sentry from "@sentry/react"; import { z } from "zod"; import { ensureDb } from "../db"; @@ -21,6 +22,55 @@ let oramaDb = createDictionaryDatabase(); export const getOramaDb = () => oramaDb; +/** + * Transforms a SelectDictionaryEntry into a DictionaryDocument for Orama. + * Handles the morphology transformation (plurals/masadir are {word: string}[] in DB but string[] in Orama). + */ +export const toOramaDocument = ( + entry: SelectDictionaryEntry +): DictionaryDocument => { + const morphology = entry.morphology; + + return { + id: entry.id, + word: entry.word, + word_exact: entry.word, + translation: entry.translation, + created_at: entry.created_at ?? undefined, + created_at_timestamp_ms: entry.created_at_timestamp_ms ?? undefined, + updated_at: entry.updated_at ?? undefined, + updated_at_timestamp_ms: entry.updated_at_timestamp_ms ?? undefined, + definition: entry.definition ?? undefined, + type: entry.type ?? undefined, + root: entry.root ?? undefined, + tags: entry.tags ?? undefined, + antonyms: entry.antonyms ?? undefined, + examples: entry.examples ?? undefined, + morphology: morphology + ? { + ism: morphology.ism + ? { + singular: morphology.ism.singular, + plurals: morphology.ism.plurals?.map((p) => p.word), + singular_exact: morphology.ism.singular, + plurals_exact: morphology.ism.plurals?.map((p) => p.word), + } + : undefined, + verb: morphology.verb + ? { + past_tense: morphology.verb.past_tense, + present_tense: morphology.verb.present_tense, + masadir: morphology.verb.masadir?.map((m) => m.word), + past_tense_exact: morphology.verb.past_tense, + present_tense_exact: morphology.verb.present_tense, + masadir_exact: morphology.verb.masadir?.map((m) => m.word), + } + : undefined, + } + : undefined, + }; +}; + let isOramaHydrated = false; /** @@ -121,9 +171,14 @@ export const hydrateOramaDb = async () => { return null; } + const morphology = morphologyResult.ok + ? morphologyResult.value + : undefined; + return { id: entry.id, word: entry.word, + word_exact: entry.word, translation: entry.translation, created_at_timestamp_ms: entry.created_at_timestamp_ms ?? undefined, updated_at_timestamp_ms: entry.updated_at_timestamp_ms ?? undefined, @@ -131,6 +186,32 @@ export const hydrateOramaDb = async () => { type: entry.type ?? undefined, root: rootResult.value ?? undefined, tags: tagsResult.value ?? undefined, + morphology: morphology + ? { + ism: morphology.ism + ? { + singular: morphology.ism.singular, + plurals: morphology.ism.plurals?.map((p) => p.word), + singular_exact: morphology.ism.singular, + plurals_exact: morphology.ism.plurals?.map( + (p) => p.word + ), + } + : undefined, + verb: morphology.verb + ? { + past_tense: morphology.verb.past_tense, + present_tense: morphology.verb.present_tense, + masadir: morphology.verb.masadir?.map((m) => m.word), + past_tense_exact: morphology.verb.past_tense, + present_tense_exact: morphology.verb.present_tense, + masadir_exact: morphology.verb.masadir?.map( + (m) => m.word + ), + } + : undefined, + } + : undefined, }; }) .filter((entry) => entry !== null); @@ -195,14 +276,23 @@ export const rehydrateOramaDb = async () => { .map((entry) => { const rootResult = safeJsonParse(entry.root, RootLettersSchema); const tagsResult = safeJsonParse(entry.tags, TagsSchema); + const morphologyResult = safeJsonParse( + entry.morphology, + MorphologySchema + ); if (!(rootResult.ok && tagsResult.ok)) { return null; } + const morphology = morphologyResult.ok + ? morphologyResult.value + : undefined; + return { id: entry.id, word: entry.word, + word_exact: entry.word, translation: entry.translation, created_at_timestamp_ms: entry.created_at_timestamp_ms ?? undefined, updated_at_timestamp_ms: entry.updated_at_timestamp_ms ?? undefined, @@ -210,6 +300,32 @@ export const rehydrateOramaDb = async () => { type: entry.type ?? undefined, root: rootResult.value ?? undefined, tags: tagsResult.value ?? undefined, + morphology: morphology + ? { + ism: morphology.ism + ? { + singular: morphology.ism.singular, + plurals: morphology.ism.plurals?.map((p) => p.word), + singular_exact: morphology.ism.singular, + plurals_exact: morphology.ism.plurals?.map( + (p) => p.word + ), + } + : undefined, + verb: morphology.verb + ? { + past_tense: morphology.verb.past_tense, + present_tense: morphology.verb.present_tense, + masadir: morphology.verb.masadir?.map((m) => m.word), + past_tense_exact: morphology.verb.past_tense, + present_tense_exact: morphology.verb.present_tense, + masadir_exact: morphology.verb.masadir?.map( + (m) => m.word + ), + } + : undefined, + } + : undefined, }; }) .filter((entry) => entry !== null); diff --git a/apps/web/src/lib/utils.ts b/apps/web/src/lib/utils.ts index 519ba9b..f1d2d9f 100644 --- a/apps/web/src/lib/utils.ts +++ b/apps/web/src/lib/utils.ts @@ -38,11 +38,9 @@ export const nullToUndefined = (obj: T): NullToUndefined => { ) as NullToUndefined; } - // eslint-disable-next-line @typescript-eslint/no-explicit-any const result: any = {}; for (const key in obj) { if (Object.hasOwn(obj, key)) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any const value = (obj as any)[key]; if (value === null) { result[key] = undefined; diff --git a/biome.jsonc b/biome.jsonc index 0c93f1f..2595ff5 100644 --- a/biome.jsonc +++ b/biome.jsonc @@ -54,13 +54,13 @@ "useConsistentTypeDefinitions": "off", "useBlockStatements": "off", "noNonNullAssertion": "off", + "useAtIndex": "off", + "useDefaultSwitchClause": "off", "useConsistentBuiltinInstantiation": "info", "useConsistentMemberAccessibility": "info", "useReadonlyClassProperties": "info", - "useDefaultSwitchClause": "info", "noExportedImports": "info", "noNestedTernary": "info", - "useAtIndex": "info", "noParameterProperties": "info" }, "suspicious": { diff --git a/packages/search/src/arabic.ts b/packages/search/src/arabic.ts index 15d37b6..dc04f8e 100644 --- a/packages/search/src/arabic.ts +++ b/packages/search/src/arabic.ts @@ -20,20 +20,25 @@ export const normalizeArabicHamza = (text: string): string => { /** * Normalizes weak letters (حروف العلة: ا و ي) to alif (ا) - * Example: "عمارة" → "عاااة", "كتوب" → "كتاب" + * + * WARNING: This is extremely aggressive and destroys word distinctiveness. + * Example: "طاحون" → "طاحان", "كتوب" → "كتاب" + * + * @deprecated Not used in search normalization - too destructive */ export const normalizeArabicWeakLetters = (text: string): string => { return text.replace(/[اوي]/g, "ا"); }; /** - * Applies all Arabic normalization transformations for search matching. - * Combines: diacritics removal + hamza normalization + weak letter normalization + * Applies Arabic normalization transformations for search matching. + * Combines: diacritics removal + hamza normalization + * + * Note: Weak letter normalization is intentionally excluded as it's too + * aggressive and destroys word distinctiveness (e.g., "طاحون" → "طاحان"). */ export const normalizeArabicForSearch = (text: string): string => { - return normalizeArabicWeakLetters( - normalizeArabicHamza(stripArabicDiacritics(text)) - ); + return normalizeArabicHamza(stripArabicDiacritics(text)); }; /** diff --git a/packages/search/src/database.ts b/packages/search/src/database.ts index 520326a..55719c3 100644 --- a/packages/search/src/database.ts +++ b/packages/search/src/database.ts @@ -4,13 +4,16 @@ import { create, + type InternalTypedDocument, insert, insertMultiple, + type Results, remove, search, update, } from "@orama/orama"; import { pluginQPS } from "@orama/plugin-qps"; +import { stripArabicDiacritics } from "./arabic"; import { type DictionaryDocument, type DictionaryOrama, @@ -18,6 +21,8 @@ import { } from "./schema"; import { multiLanguageTokenizer } from "./tokenizer"; +type SearchResults = Results>; + /** * Formats elapsed time for Orama internal logging */ @@ -92,7 +97,52 @@ export const removeDocument = (db: DictionaryOrama, id: string) => { type SearchableProperties = keyof typeof dictionarySchema; /** - * Searches the Orama database + * Exact match fields - searched first with low tolerance + */ +export const EXACT_PROPERTIES: SearchableProperties[] = [ + "word_exact", + "morphology.ism.singular_exact", + "morphology.ism.plurals_exact", + "morphology.verb.past_tense_exact", + "morphology.verb.present_tense_exact", + "morphology.verb.masadir_exact", +]; + +/** + * Normalized fields - searched with higher tolerance for fuzzy matching + */ +export const NORMALIZED_PROPERTIES: SearchableProperties[] = [ + "word", + "translation", + "definition", + "tags", + "morphology.ism.plurals", + "morphology.ism.singular", + "morphology.verb.masadir", + "morphology.verb.past_tense", + "morphology.verb.present_tense", +]; + +/** + * Boost configuration for normalized field search + */ +export const NORMALIZED_BOOST = { + word: 10, + translation: 10, + "morphology.ism.plurals": 10, + "morphology.ism.singular": 10, + "morphology.verb.masadir": 10, + "morphology.verb.past_tense": 10, + "morphology.verb.present_tense": 10, +} as const; + +export type SearchLanguage = "arabic" | "english"; + +/** + * Searches the Orama database using two-pass search for better relevance: + * 1. First pass: exact fields with low tolerance (precise matching) + * 2. Second pass: normalized fields with higher tolerance (fuzzy matching) + * Results are merged with exact matches prioritized. */ export const searchDictionary = ( db: DictionaryOrama, @@ -101,12 +151,71 @@ export const searchDictionary = ( limit?: number; offset?: number; properties?: SearchableProperties[]; + language?: SearchLanguage; } ) => { - return search(db, { - term, - limit: options?.limit ?? 10, - offset: options?.offset ?? 0, - properties: options?.properties, - }); + const limit = options?.limit ?? 10; + const offset = options?.offset ?? 0; + const language = options?.language; + + if (!term) { + return search( + db, + { + term, + limit, + offset, + }, + language + ); + } + + const termLen = stripArabicDiacritics(term).length; + + // Fetch enough results to cover offset + limit for proper pagination + const fetchLimit = offset + limit; + + // Pass 1: Exact match search (tolerance 0-1) + const exactTolerance = termLen <= 4 ? 0 : 1; + const exactResults = search( + db, + { + term, + mode: "fulltext", + limit: fetchLimit, + properties: EXACT_PROPERTIES, + tolerance: exactTolerance, + }, + language + ) as SearchResults; + + // Pass 2: Fuzzy search on normalized fields + const fuzzyTolerance = termLen <= 2 ? 0 : termLen <= 4 ? 1 : 2; + const fuzzyResults = search( + db, + { + term, + mode: "fulltext", + limit: fetchLimit, + properties: NORMALIZED_PROPERTIES, + boost: NORMALIZED_BOOST, + tolerance: fuzzyTolerance, + }, + language + ) as SearchResults; + + // Merge results: exact matches first, then fuzzy (deduplicated) + const exactIds = new Set(exactResults.hits.map((h) => h.id)); + const mergedHits = [ + ...exactResults.hits, + ...fuzzyResults.hits.filter((h) => !exactIds.has(h.id)), + ].slice(offset, offset + limit); + + const estimatedCount = Math.max(exactResults.count, fuzzyResults.count); + + return { + elapsed: exactResults.elapsed, + count: estimatedCount, + hits: mergedHits, + } as SearchResults; }; diff --git a/packages/search/src/schema.ts b/packages/search/src/schema.ts index 1776e27..6b34e25 100644 --- a/packages/search/src/schema.ts +++ b/packages/search/src/schema.ts @@ -2,28 +2,63 @@ * Orama schema for dictionary entries */ -import type { - Antonym, - Example, - Morphology, -} from "@bahar/drizzle-user-db-schemas"; +import type { Antonym, Example } from "@bahar/drizzle-user-db-schemas"; import type { Orama } from "@orama/orama"; /** * Schema definition for dictionary entries in Orama * Only these fields will be indexed for search + * + * Fields ending in `_exact` store the original Arabic text without normalization, + * allowing exact matches to rank higher than normalized/fuzzy matches. */ export const dictionarySchema = { created_at_timestamp_ms: "number", updated_at_timestamp_ms: "number", + + // Normalized fields (stemmed, diacritics stripped, hamza/weak letters normalized) word: "string", translation: "string", definition: "string", type: "enum", root: "string[]", tags: "string[]", + "morphology.ism.singular": "string", + "morphology.ism.plurals": "string[]", + "morphology.verb.past_tense": "string", + "morphology.verb.present_tense": "string", + "morphology.verb.masadir": "string[]", + + // Exact fields (only diacritics stripped, no other normalization) + word_exact: "string", + "morphology.ism.singular_exact": "string", + "morphology.ism.plurals_exact": "string[]", + "morphology.verb.past_tense_exact": "string", + "morphology.verb.present_tense_exact": "string", + "morphology.verb.masadir_exact": "string[]", } as const; +/** + * Morphology structure flattened for Orama indexing. + * Includes both normalized and exact variants. + */ +export interface IndexedMorphology { + ism?: { + singular?: string; + plurals?: string[]; + singular_exact?: string; + plurals_exact?: string[]; + }; + verb?: { + past_tense?: string; + present_tense?: string; + masadir?: string[]; + past_tense_exact?: string; + present_tense_exact?: string; + masadir_exact?: string[]; + }; +} + /** * Document type for dictionary entries in Orama * Fields not in dictionarySchema are stored but not indexed @@ -31,6 +66,7 @@ export const dictionarySchema = { export interface DictionaryDocument { id: string; word: string; + word_exact?: string; translation: string; created_at?: string; created_at_timestamp_ms?: number; @@ -40,7 +76,7 @@ export interface DictionaryDocument { type?: string; root?: string[]; tags?: string[]; - morphology?: Morphology; + morphology?: IndexedMorphology; antonyms?: Antonym[]; examples?: Example[]; } diff --git a/packages/search/src/tokenizer.ts b/packages/search/src/tokenizer.ts index bdecd85..f4f4c8c 100644 --- a/packages/search/src/tokenizer.ts +++ b/packages/search/src/tokenizer.ts @@ -4,7 +4,7 @@ import { tokenizer as defaultTokenizer } from "@orama/orama/components"; import { stemmer as arabicStemmer } from "@orama/stemmers/arabic"; -import { normalizeArabicForSearch } from "./arabic"; +import { normalizeArabicForSearch, stripArabicDiacritics } from "./arabic"; export type OramaLanguage = "arabic" | "english"; @@ -20,6 +20,16 @@ export const arabicTokenizer = defaultTokenizer.createTokenizer({ stemmerSkipProperties: ["tags"], }); +/** + * Arabic tokenizer for exact fields - no stemming, only diacritic stripping. + * This preserves hamza variants and weak letters for exact matching. + */ +export const arabicExactTokenizer = defaultTokenizer.createTokenizer({ + language: "arabic", + stemming: false, + stopWords: false, +}); + /** * English tokenizer with stemming */ @@ -35,21 +45,41 @@ export const englishTokenizer = defaultTokenizer.createTokenizer({ */ const ENGLISH_PROPS = ["translation"]; +/** + * Properties that should use exact Arabic tokenization (no normalization beyond diacritics) + */ +const EXACT_PROPS = [ + "word_exact", + "morphology.ism.singular_exact", + "morphology.ism.plurals_exact", + "morphology.verb.past_tense_exact", + "morphology.verb.present_tense_exact", + "morphology.verb.masadir_exact", +]; + /** * Multi-language tokenizer that delegates to Arabic or English tokenizers - * based on the property name and language parameter + * based on the property name and language parameter. + * + * For `*_exact` fields, uses minimal normalization (diacritics only) to enable + * exact matching that ranks higher than fuzzy/normalized matches. */ export const multiLanguageTokenizer = { language: "multi" as const, normalizationCache: new Map(), tokenize(raw: string, language: string, prop?: string): string[] { - const normalizedRaw = normalizeArabicForSearch(raw); + if (prop && EXACT_PROPS.includes(prop)) { + const diacriticsStripped = stripArabicDiacritics(raw); + return arabicExactTokenizer.tokenize(diacriticsStripped, "arabic", prop); + } if (prop && ENGLISH_PROPS.includes(prop)) { return englishTokenizer.tokenize(raw, "english", prop); } + const normalizedRaw = normalizeArabicForSearch(raw); + if (prop && !language && !ENGLISH_PROPS.includes(prop)) { return arabicTokenizer.tokenize(normalizedRaw, "arabic", prop); } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b59ffb6..be303bd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -3530,7 +3530,7 @@ packages: peerDependencies: elysia: '>=1.4.19' dependencies: - elysia: 1.4.19(@sinclair/typebox@0.34.45)(exact-mirror@0.2.5)(file-type@21.2.0)(openapi-types@12.1.3)(typescript@5.9.2) + elysia: 1.4.19(@sinclair/typebox@0.34.45)(exact-mirror@0.2.5)(file-type@21.2.0)(openapi-types@12.1.3)(typescript@5.8.3) dev: false /@emmetio/abbreviation@2.3.3: @@ -5529,7 +5529,7 @@ packages: react-hook-form: ^7.55.0 dependencies: '@standard-schema/utils': 0.3.0 - react-hook-form: 7.56.4(react@19.1.0) + react-hook-form: 7.56.4(react@19.0.0) dev: false /@img/colour@1.0.0: @@ -6449,7 +6449,6 @@ packages: transitivePeerDependencies: - supports-color - typescript - dev: true /@lingui/babel-plugin-lingui-macro@5.3.2(babel-plugin-macros@3.1.0)(typescript@5.9.2): resolution: {integrity: sha512-NdXrq8aZlPjN4jeA/LkSLNyx5vPGmrW+r2ywMNQDPQPVP28Hq8c3hF9SQc1t7hwBorGQ3qzIQ7i2Vm6Y8PnjQw==} @@ -6578,7 +6577,6 @@ packages: jiti: 1.21.7 transitivePeerDependencies: - typescript - dev: true /@lingui/conf@5.3.2(typescript@5.9.2): resolution: {integrity: sha512-c0Dfovr9BLuwAnY5GADxKcwBUQdVl0Jo/JUa3cumIXFhHzZGb78kfhCHjWWQdX8+WQD8qzSl/YkVDbxhcQJGmg==} @@ -6622,7 +6620,7 @@ packages: optional: true dependencies: '@babel/runtime': 7.26.7 - '@lingui/babel-plugin-lingui-macro': 5.3.2(babel-plugin-macros@3.1.0)(typescript@5.9.2) + '@lingui/babel-plugin-lingui-macro': 5.3.2(babel-plugin-macros@3.1.0)(typescript@5.8.3) '@lingui/message-utils': 5.3.2 babel-plugin-macros: 3.1.0 unraw: 3.0.0 @@ -12151,7 +12149,6 @@ packages: parse-json: 5.2.0 path-type: 4.0.0 typescript: 5.8.3 - dev: true /cosmiconfig@8.3.6(typescript@5.9.2): resolution: {integrity: sha512-kcZ6+W5QzcJ3P1Mt+83OUv/oHFqZHIx8DuxG6eZ5RGMERoLqp4BuGjhHLYGK+Kf5XVkQvqBSmAy/nGWN3qDgEA==} @@ -12757,31 +12754,6 @@ packages: typescript: 5.8.3 dev: false - /elysia@1.4.19(@sinclair/typebox@0.34.45)(exact-mirror@0.2.5)(file-type@21.2.0)(openapi-types@12.1.3)(typescript@5.9.2): - resolution: {integrity: sha512-DZb9y8FnWyX5IuqY44SvqAV0DjJ15NeCWHrLdgXrKgTPDPsl3VNwWHqrEr9bmnOCpg1vh6QUvAX/tcxNj88jLA==} - peerDependencies: - '@sinclair/typebox': '>= 0.34.0 < 1' - '@types/bun': '>= 1.2.0' - exact-mirror: '>= 0.0.9' - file-type: '>= 20.0.0' - openapi-types: '>= 12.0.0' - typescript: '>= 5.0.0' - peerDependenciesMeta: - '@types/bun': - optional: true - typescript: - optional: true - dependencies: - '@sinclair/typebox': 0.34.45 - cookie: 1.1.1 - exact-mirror: 0.2.5(@sinclair/typebox@0.34.45) - fast-decode-uri-component: 1.0.1 - file-type: 21.2.0 - memoirist: 0.4.0 - openapi-types: 12.1.3 - typescript: 5.9.2 - dev: false - /emittery@0.13.1: resolution: {integrity: sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==} engines: {node: '>=12'}