Skip to content

Commit fdef0af

Browse files
authored
Merge pull request #9 from nyaruka/update_location_keyword_type
Update location index spec
2 parents 5c74175 + e0dfb98 commit fdef0af

File tree

2 files changed

+83
-61
lines changed

2 files changed

+83
-61
lines changed

Diff for: indexer.go

+77-55
Original file line numberDiff line numberDiff line change
@@ -342,33 +342,65 @@ func MapIndexAlias(elasticURL string, alias string, newIndex string) error {
342342
}
343343

344344
const contactQuery = `
345-
SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM(
346-
SELECT id, org_id, uuid, name, language, is_stopped, is_blocked, is_active, created_on, modified_on,
347-
EXTRACT(EPOCH FROM modified_on) * 1000000 as modified_on_mu,
348-
(
349-
SELECT array_to_json(array_agg(row_to_json(u))) FROM (
345+
SELECT org_id, id, modified_on, is_active, row_to_json(t) FROM (
346+
SELECT
347+
id, org_id, uuid, name, language, is_stopped, is_blocked, is_active, created_on, modified_on,
348+
EXTRACT(EPOCH FROM modified_on) * 1000000 as modified_on_mu,
349+
(
350+
SELECT array_to_json(array_agg(row_to_json(u)))
351+
FROM (
350352
SELECT scheme, path
351353
FROM contacts_contacturn
352-
WHERE contact_id=contacts_contact.id
353-
) u
354-
) as urns,
355-
(
356-
SELECT jsonb_agg(f.value) FROM (
357-
SELECT value||jsonb_build_object('field', key) as value from jsonb_each(contacts_contact.fields)
358-
) as f
359-
) as fields,
360-
(
361-
SELECT array_to_json(array_agg(g.uuid)) FROM (
362-
SELECT contacts_contactgroup.uuid
363-
FROM contacts_contactgroup_contacts, contacts_contactgroup
364-
WHERE contact_id=contacts_contact.id AND contacts_contactgroup_contacts.contactgroup_id = contacts_contactgroup.id
365-
) g
366-
) as groups
367-
FROM contacts_contact
368-
WHERE is_test = FALSE AND modified_on >= $1
369-
ORDER BY modified_on ASC
370-
LIMIT 10000
371-
) t
354+
WHERE contact_id = contacts_contact.id
355+
) u
356+
) as urns,
357+
(
358+
SELECT jsonb_agg(f.value)
359+
FROM (
360+
select case
361+
when value ? 'ward'
362+
then jsonb_build_object(
363+
'ward_keyword', (regexp_matches(value ->> 'ward', '(.* > )?([^>]+)'))[2]
364+
)
365+
else '{}' :: jsonb
366+
end || district_value.value as value
367+
FROM (
368+
select case
369+
when value ? 'district'
370+
then jsonb_build_object(
371+
'district_keyword', (regexp_matches(value ->> 'district', '(.* > )?([^>]+)'))[2]
372+
)
373+
else '{}' :: jsonb
374+
end || state_value.value as value
375+
FROM (
376+
377+
select case
378+
when value ? 'state'
379+
then jsonb_build_object(
380+
'state_keyword', (regexp_matches(value ->> 'state', '(.* > )?([^>]+)'))[2]
381+
)
382+
else '{}' :: jsonb
383+
end ||
384+
jsonb_build_object('field', key) || value as value
385+
from jsonb_each(contacts_contact.fields)
386+
) state_value
387+
) as district_value
388+
) as f
389+
) as fields,
390+
(
391+
SELECT array_to_json(array_agg(g.uuid))
392+
FROM (
393+
SELECT contacts_contactgroup.uuid
394+
FROM contacts_contactgroup_contacts, contacts_contactgroup
395+
WHERE contact_id = contacts_contact.id AND
396+
contacts_contactgroup_contacts.contactgroup_id = contacts_contactgroup.id
397+
) g
398+
) as groups
399+
FROM contacts_contact
400+
WHERE is_test = FALSE AND modified_on >= $1
401+
ORDER BY modified_on ASC
402+
LIMIT 10000
403+
) t;
372404
`
373405

374406
// settings and mappings for our index
@@ -396,13 +428,6 @@ const indexSettings = `
396428
"word_delimiter"
397429
]
398430
},
399-
"locations_keyword": {
400-
"tokenizer": "location_tokenizer",
401-
"filter": [
402-
"lowercase",
403-
"trim"
404-
]
405-
},
406431
"prefix": {
407432
"type": "custom",
408433
"tokenizer": "standard",
@@ -436,7 +461,7 @@ const indexSettings = `
436461
"lowercase": {
437462
"type": "custom",
438463
"char_filter": [],
439-
"filter": ["lowercase"]
464+
"filter": ["lowercase", "trim"]
440465
}
441466
},
442467
"filter": {
@@ -479,34 +504,31 @@ const indexSettings = `
479504
},
480505
"state": {
481506
"type": "text",
482-
"analyzer": "locations",
483-
"fields": {
484-
"keyword": {
485-
"type": "text",
486-
"analyzer": "locations_keyword"
487-
}
488-
}
507+
"analyzer": "locations"
489508
},
509+
"state_keyword": {
510+
"type": "keyword",
511+
"normalizer": "lowercase",
512+
"ignore_above": 64
513+
},
490514
"district": {
491515
"type": "text",
492-
"analyzer": "locations",
493-
"fields": {
494-
"keyword": {
495-
"type": "text",
496-
"analyzer": "locations_keyword"
497-
}
498-
}
516+
"analyzer": "locations"
499517
},
518+
"district_keyword": {
519+
"type": "keyword",
520+
"normalizer": "lowercase",
521+
"ignore_above": 64
522+
},
500523
"ward": {
501524
"type": "text",
502-
"analyzer": "locations",
503-
"fields": {
504-
"keyword": {
505-
"type": "text",
506-
"analyzer": "locations_keyword"
507-
}
508-
}
509-
}
525+
"analyzer": "locations"
526+
},
527+
"ward_keyword": {
528+
"type": "keyword",
529+
"normalizer": "lowercase",
530+
"ignore_above": 64
531+
}
510532
}
511533
},
512534
"urns": {

Diff for: indexer_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func setup(t *testing.T) (*sql.DB, *elastic.Client) {
3030
_, err = db.Exec(string(testDB))
3131
assert.NoError(t, err)
3232

33-
client, err := elastic.NewClient(elastic.SetTraceLog(log.New(os.Stdout, "", log.LstdFlags)))
33+
client, err := elastic.NewClient(elastic.SetURL(elasticURL), elastic.SetTraceLog(log.New(os.Stdout, "", log.LstdFlags)))
3434
assert.NoError(t, err)
3535

3636
existing := FindPhysicalIndexes(elasticURL, indexName)
@@ -164,13 +164,13 @@ func TestIndexing(t *testing.T) {
164164

165165
query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must(
166166
elastic.NewMatchQuery("fields.field", "22d11697-edba-4186-b084-793e3b876379"),
167-
elastic.NewMatchQuery("fields.state.keyword", " washington")))
167+
elastic.NewMatchQuery("fields.state_keyword", " washington")))
168168
assertQuery(t, client, physicalName, query, []int64{6})
169169

170170
// doesn't include country
171171
query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must(
172172
elastic.NewMatchQuery("fields.field", "22d11697-edba-4186-b084-793e3b876379"),
173-
elastic.NewMatchQuery("fields.state.keyword", "usa")))
173+
elastic.NewMatchQuery("fields.state_keyword", "usa")))
174174
assertQuery(t, client, physicalName, query, []int64{})
175175

176176
query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must(
@@ -192,7 +192,7 @@ func TestIndexing(t *testing.T) {
192192

193193
query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must(
194194
elastic.NewMatchQuery("fields.field", "fcab2439-861c-4832-aa54-0c97f38f24ab"),
195-
elastic.NewMatchQuery("fields.district.keyword", "King County")))
195+
elastic.NewMatchQuery("fields.district_keyword", "King County")))
196196
assertQuery(t, client, physicalName, query, []int64{8})
197197

198198
// ward query
@@ -203,13 +203,13 @@ func TestIndexing(t *testing.T) {
203203

204204
query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must(
205205
elastic.NewMatchQuery("fields.field", "a551ade4-e5a0-4d83-b185-53b515ad2f2a"),
206-
elastic.NewMatchQuery("fields.ward.keyword", "central district")))
206+
elastic.NewMatchQuery("fields.ward_keyword", "central district")))
207207
assertQuery(t, client, physicalName, query, []int64{9})
208208

209209
// no substring though on keyword
210210
query = elastic.NewNestedQuery("fields", elastic.NewBoolQuery().Must(
211211
elastic.NewMatchQuery("fields.field", "a551ade4-e5a0-4d83-b185-53b515ad2f2a"),
212-
elastic.NewMatchQuery("fields.ward.keyword", "district")))
212+
elastic.NewMatchQuery("fields.ward_keyword", "district")))
213213
assertQuery(t, client, physicalName, query, []int64{})
214214

215215
// group query

0 commit comments

Comments
 (0)