1
+ BEGIN ;
2
+
3
+ -- Hybrid search function combining text and vector search
4
+ CREATE
5
+ OR REPLACE FUNCTION search_hybrid (
6
+ developer_id UUID,
7
+ query_text text ,
8
+ query_embedding vector (1024 ),
9
+ owner_types TEXT [],
10
+ owner_ids UUID [],
11
+ k integer DEFAULT 3 ,
12
+ alpha float DEFAULT 0 .7 , -- Weight for embedding results
13
+ confidence float DEFAULT 0 .5 ,
14
+ metadata_filter jsonb DEFAULT NULL ,
15
+ search_language text DEFAULT ' english'
16
+ ) RETURNS SETOF doc_search_result AS $$
17
+ DECLARE
18
+ text_weight float;
19
+ embedding_weight float;
20
+ intermediate_limit integer ;
21
+ BEGIN
22
+ -- Input validation
23
+ IF k <= 0 THEN
24
+ RAISE EXCEPTION ' k must be greater than 0' ;
25
+ END IF;
26
+
27
+ text_weight := 1 .0 - alpha;
28
+ embedding_weight := alpha;
29
+ -- Get more intermediate results than final to allow for better fusion
30
+ intermediate_limit := k * 4 ;
31
+
32
+ RETURN QUERY
33
+ WITH text_results AS (
34
+ SELECT * FROM search_by_text(
35
+ developer_id,
36
+ query_text,
37
+ owner_types,
38
+ owner_ids,
39
+ search_language,
40
+ intermediate_limit, -- Use larger intermediate limit
41
+ metadata_filter
42
+ )
43
+ ),
44
+ embedding_results AS (
45
+ SELECT * FROM search_by_vector(
46
+ developer_id,
47
+ query_embedding,
48
+ owner_types,
49
+ owner_ids,
50
+ intermediate_limit, -- Use larger intermediate limit
51
+ confidence,
52
+ metadata_filter
53
+ )
54
+ ),
55
+ all_results AS (
56
+ SELECT DISTINCT doc_id, title, content, metadata, embedding,
57
+ index, owner_type, owner_id
58
+ FROM (
59
+ SELECT * FROM text_results
60
+ UNION
61
+ SELECT * FROM embedding_results
62
+ ) combined
63
+ ),
64
+ scores AS (
65
+ SELECT
66
+ r .doc_id ,
67
+ r .title ,
68
+ r .content ,
69
+ r .metadata ,
70
+ r .embedding ,
71
+ r .index ,
72
+ r .owner_type ,
73
+ r .owner_id ,
74
+ COALESCE(t .distance , 0 .0 ) as text_score,
75
+ COALESCE(e .distance , 0 .0 ) as embedding_score,
76
+ RANK() OVER (ORDER BY COALESCE(t .distance , 0 .0 ) DESC ) as text_rank,
77
+ RANK() OVER (ORDER BY COALESCE(e .distance , 0 .0 ) DESC ) as embedding_rank
78
+ FROM all_results r
79
+ LEFT JOIN text_results t ON r .doc_id = t .doc_id
80
+ LEFT JOIN embedding_results e ON r .doc_id = e .doc_id
81
+ ),
82
+ normalized_scores AS (
83
+ SELECT
84
+ s.* ,
85
+ normalized_text_scores[row_number() OVER (ORDER BY s .doc_id )] as norm_text_score,
86
+ normalized_embedding_scores[row_number() OVER (ORDER BY s .doc_id )] as norm_embedding_score
87
+ FROM
88
+ scores s,
89
+ (SELECT
90
+ dbsf_normalize(array_agg(text_score ORDER BY doc_id)) as normalized_text_scores,
91
+ dbsf_normalize(array_agg(embedding_score ORDER BY doc_id)) as normalized_embedding_scores
92
+ FROM scores) n
93
+ )
94
+ SELECT
95
+ developer_id,
96
+ doc_id,
97
+ index,
98
+ title,
99
+ content,
100
+ 1 .0 - (text_weight * norm_text_score + embedding_weight * norm_embedding_score) as distance,
101
+ embedding,
102
+ metadata,
103
+ owner_type,
104
+ owner_id
105
+ FROM normalized_scores
106
+ ORDER BY distance ASC
107
+ LIMIT k;
108
+ END;
109
+ $$ LANGUAGE plpgsql;
110
+
111
+ COMMENT ON FUNCTION search_hybrid IS ' Hybrid search combining text and vector search using Distribution-Based Score Fusion (DBSF)' ;
112
+
113
+ COMMIT ;
0 commit comments