Skip to content

Commit 97dba37

Browse files
committed
add prefix name querying, add modified_on_mu for sorting/building
1 parent 619b894 commit 97dba37

File tree

3 files changed

+40
-15
lines changed

3 files changed

+40
-15
lines changed

Diff for: indexer.go

+25-6
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,8 @@ func MapIndexAlias(elasticURL string, alias string, newIndex string) error {
308308

309309
const contactQuery = `
310310
SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM(
311-
SELECT id, org_id, uuid, name, language, is_stopped, is_blocked, is_active, created_on, modified_on,
311+
SELECT id, org_id, uuid, name, language, is_stopped, is_blocked, is_active, created_on, modified_on,
312+
EXTRACT(EPOCH FROM modified_on) * 1000000 as modified_on_mu,
312313
(
313314
SELECT array_to_json(array_agg(row_to_json(u))) FROM (
314315
SELECT scheme, path
@@ -366,7 +367,15 @@ const indexSettings = `
366367
"lowercase",
367368
"trim"
368369
]
369-
}
370+
},
371+
"prefix": {
372+
"type": "custom",
373+
"tokenizer": "standard",
374+
"filter": [
375+
"lowercase",
376+
"prefix_filter"
377+
]
378+
}
370379
},
371380
"tokenizer": {
372381
"location_tokenizer": {
@@ -386,7 +395,14 @@ const indexSettings = `
386395
"char_filter": [],
387396
"filter": ["lowercase"]
388397
}
389-
}
398+
},
399+
"filter": {
400+
"prefix_filter": {
401+
"type": "edge_ngram",
402+
"min_gram": 1,
403+
"max_gram": 8
404+
}
405+
}
390406
}
391407
},
392408
@@ -478,10 +494,13 @@ const indexSettings = `
478494
},
479495
"modified_on": {
480496
"type": "date"
481-
},
497+
},
498+
"modified_on_mu": {
499+
"type": "long"
500+
},
482501
"name": {
483502
"type": "text",
484-
"analyzer": "simple",
503+
"analyzer": "prefix",
485504
"fields": {
486505
"keyword": {
487506
"type": "keyword",
@@ -497,7 +516,7 @@ const indexSettings = `
497516
`
498517

499518
// gets our last modified contact
500-
const lastModifiedQuery = `{ "sort": [{ "modified_on": "desc" }]}`
519+
const lastModifiedQuery = `{ "sort": [{ "modified_on_mu": "desc" }]}`
501520

502521
// indexes a contact
503522
const indexCommand = `{ "index": { "_id": %d, "_type": "_doc", "_version": %d, "_version_type": "external", "_routing": %d} }`

Diff for: indexer_test.go

+13-7
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
_ "github.com/lib/pq"
1414
"github.com/olivere/elastic"
15+
"github.com/sirupsen/logrus"
1516
"github.com/stretchr/testify/assert"
1617
)
1718

@@ -37,6 +38,8 @@ func setup(t *testing.T) (*sql.DB, *elastic.Client) {
3738
assert.NoError(t, err)
3839
}
3940

41+
logrus.SetLevel(logrus.DebugLevel)
42+
4043
return db, client
4144
}
4245

@@ -66,7 +69,10 @@ func TestIndexing(t *testing.T) {
6669

6770
time.Sleep(2 * time.Second)
6871

69-
assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JOHn"), []int64{5})
72+
assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JOHn").Analyzer("standard"), []int64{5})
73+
74+
// prefix on name matches both john and joanne, but no ajodi
75+
assertQuery(t, client, physicalName, elastic.NewMatchQuery("name", "JO").Analyzer("standard"), []int64{5, 7})
7076
assertQuery(t, client, physicalName, elastic.NewTermQuery("name.keyword", "JOHN DOE"), []int64{5})
7177

7278
assertQuery(t, client, physicalName, elastic.NewMatchQuery("language", "eng"), []int64{1})
@@ -201,10 +207,10 @@ func TestIndexing(t *testing.T) {
201207
// map our index over
202208
err = MapIndexAlias(elasticURL, indexName, physicalName)
203209
assert.NoError(t, err)
204-
time.Sleep(2 * time.Second)
210+
time.Sleep(5 * time.Second)
205211

206212
// try a test query to check it worked
207-
assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john"), []int64{5})
213+
assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john").Analyzer("standard"), []int64{5})
208214

209215
// look up our mapping
210216
physical := FindPhysicalIndexes(elasticURL, indexName)
@@ -223,9 +229,9 @@ func TestIndexing(t *testing.T) {
223229
err = MapIndexAlias(elasticURL, indexName, newIndex)
224230
assert.NoError(t, err)
225231

226-
time.Sleep(2 * time.Second)
232+
time.Sleep(5 * time.Second)
227233

228-
assertQuery(t, client, newIndex, elastic.NewMatchQuery("name", "john"), []int64{5})
234+
assertQuery(t, client, newIndex, elastic.NewMatchQuery("name", "john").Analyzer("standard"), []int64{5})
229235

230236
// update our database, removing one contact, updating another
231237
dbUpdate, err := ioutil.ReadFile("testdb_update.sql")
@@ -238,10 +244,10 @@ func TestIndexing(t *testing.T) {
238244
assert.Equal(t, 1, added)
239245
assert.Equal(t, 1, deleted)
240246

241-
time.Sleep(2 * time.Second)
247+
time.Sleep(5 * time.Second)
242248

243249
// should only match new john, old john is gone
244-
assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john"), []int64{3})
250+
assertQuery(t, client, indexName, elastic.NewMatchQuery("name", "john").Analyzer("standard"), []int64{3})
245251

246252
// 3 is no longer in our group
247253
assertQuery(t, client, indexName, elastic.NewMatchQuery("groups", "529bac39-550a-4d6f-817c-1833f3449007"), []int64{1})

Diff for: testdb.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@ INSERT INTO contacts_contact(id, is_active, created_by_id, created_on, modified_
9898
'{ "05bca1cd-e322-4837-9595-86d0d85e5adb": {"text": "9", "decimal": 9 }, "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2018-04-06T18:37:59+00:00", "datetime": "2018-04-06T18:37:59+00:00"}}'),
9999
(5, TRUE, -1, '2015-03-27 07:39:28.955051+00', -1, '2015-03-27 07:39:28.955051+00', 1, FALSE, 'John Doe', FALSE, NULL, '51762bba-01a2-4c4e-b5cd-b182d0405cd4', FALSE,
100100
'{ "e0eac267-463a-4c00-9732-cab62df07b16": { "text": "2030-04-06T18:37:59+00:00", "datetime": "2030-04-06T18:37:59+00:00"}}'),
101-
(6, TRUE, -1, '2015-10-30 19:42:27.001837+00', -1, '2015-10-30 19:42:27.001837+00', 2, FALSE, NULL, FALSE, NULL, '3e814add-e614-41f7-8b5d-a07f670a698f', FALSE,
101+
(6, TRUE, -1, '2015-10-30 19:42:27.001837+00', -1, '2015-10-30 19:42:27.001837+00', 2, FALSE, 'Ajodi Dane', FALSE, NULL, '3e814add-e614-41f7-8b5d-a07f670a698f', FALSE,
102102
'{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Washington", "state": "USA > Washington"} }'),
103-
(7, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 2, FALSE, NULL, FALSE, NULL, '7051dff0-0a27-49d7-af1f-4494239139e6', FALSE,
103+
(7, TRUE, -1, '2017-11-10 21:11:59.890662+00', -1, '2017-11-10 21:11:59.890662+00', 2, FALSE, 'Joanne Stone', FALSE, NULL, '7051dff0-0a27-49d7-af1f-4494239139e6', FALSE,
104104
'{ "22d11697-edba-4186-b084-793e3b876379": { "text": "USA > Colorado", "state": "USA > Colorado"} }'),
105105
(8, TRUE, -1, '2015-03-27 13:39:43.995812+00', -1, '2015-03-27 13:39:43.995812+00', 2, FALSE, NULL, FALSE, NULL, 'b46f6e18-95b4-4984-9926-dded047f4eb3', FALSE,
106106
'{ "fcab2439-861c-4832-aa54-0c97f38f24ab": { "text": "USA > Washington > King County", "district": "USA > Washington > King County"} }'),

0 commit comments

Comments
 (0)