Skip to content

Commit e1d7b53

Browse files
committed
use trigram tokenizer instead of filter to maintain ordering
1 parent 2236db8 commit e1d7b53

File tree

3 files changed

+19
-18
lines changed

3 files changed

+19
-18
lines changed

Diff for: indexer.go

+7-10
Original file line numberDiff line numberDiff line change
@@ -308,20 +308,12 @@ const indexSettings = `
308308
"routing_partition_size": 3
309309
},
310310
"analysis": {
311-
"filter": {
312-
"trigrams_filter": {
313-
"type": "ngram",
314-
"min_gram": 3,
315-
"max_gram": 3
316-
}
317-
},
318311
"analyzer": {
319312
"trigrams": {
320313
"type": "custom",
321-
"tokenizer": "standard",
314+
"tokenizer": "trigram",
322315
"filter": [
323-
"lowercase",
324-
"trigrams_filter"
316+
"lowercase"
325317
]
326318
},
327319
"locations": {
@@ -344,6 +336,11 @@ const indexSettings = `
344336
"type": "pattern",
345337
"pattern": "(.* > )?([^>]+)",
346338
"group": 2
339+
},
340+
"trigram": {
341+
"type" : "ngram",
342+
"min_gram" : 3,
343+
"max_gram" : 3
347344
}
348345
},
349346
"normalizer": {

Diff for: indexer_test.go

+10-6
Original file line numberDiff line numberDiff line change
@@ -88,21 +88,25 @@ func TestIndexing(t *testing.T) {
8888
// urn substring query
8989
query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must(
9090
elastic.NewMatchQuery("urns.scheme", "tel"),
91-
elastic.NewMatchQuery("urns.path", "779")))
92-
assertQuery(t, client, physicalName, query, []int64{1, 3, 4, 5, 6, 7})
91+
elastic.NewMatchPhraseQuery("urns.path", "779")))
92+
assertQuery(t, client, physicalName, query, []int64{1, 3, 4, 7})
9393

9494
// urn substring query with more characters (77911)
9595
query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must(
9696
elastic.NewMatchQuery("urns.scheme", "tel"),
97-
elastic.NewMatchQuery("urns.path", "779"),
98-
elastic.NewMatchQuery("urns.path", "791"),
99-
elastic.NewMatchQuery("urns.path", "911")))
97+
elastic.NewMatchPhraseQuery("urns.path", "77911")))
10098
assertQuery(t, client, physicalName, query, []int64{1})
10199

100+
// urn substring query with more characters (600055)
101+
query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must(
102+
elastic.NewMatchQuery("urns.scheme", "tel"),
103+
elastic.NewMatchPhraseQuery("urns.path", "600055")))
104+
assertQuery(t, client, physicalName, query, []int64{6})
105+
102106
// match a contact with multiple tel urns
103107
query = elastic.NewNestedQuery("urns", elastic.NewBoolQuery().Must(
104108
elastic.NewMatchQuery("urns.scheme", "tel"),
105-
elastic.NewMatchQuery("urns.path", "222")))
109+
elastic.NewMatchPhraseQuery("urns.path", "222")))
106110
assertQuery(t, client, physicalName, query, []int64{1})
107111

108112
// text query

Diff for: testdb.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ INSERT INTO contacts_contacturn(id, contact_id, scheme, org_id, priority, path,
115115
(3, 2, 'tel', 1, 50, '+12067793333', NULL, 'tel:+12067793333'),
116116
(4, 3, 'tel', 1, 50, '+12067794444', NULL, 'tel:+12067794444'),
117117
(5, 4, 'tel', 1, 50, '+12067795555', NULL, 'tel:+12067795555'),
118-
(6, 5, 'tel', 1, 50, '+12067796666', NULL, 'tel:+12067796666'),
119-
(7, 6, 'tel', 2, 50, '+12067797777', NULL, 'tel:+12067797777'),
118+
(6, 5, 'tel', 1, 50, '+12060000556', NULL, 'tel:+12067796666'),
119+
(7, 6, 'tel', 2, 50, '+12060005577', NULL, 'tel:+12067797777'),
120120
(8, 7, 'tel', 2, 50, '+12067798888', NULL, 'tel:+12067798888'),
121121
(9, 8, 'viber', 2, 90, 'viberpath==', NULL, 'viber:viberpath=='),
122122
(10, 9, 'facebook', 2, 90, 1000001, 'funguy', 'facebook:1000001'),

0 commit comments

Comments
 (0)