Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] Language sorting on Indexed data. #4316

Merged
merged 8 commits into from
Feb 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dgraph/cmd/bulk/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ func (m *mapper) addIndexMapEntries(nq gql.NQuad, de *pb.DirectedEdge) {
x.Check(err)

// Extract tokens.
toks, err := tok.BuildTokens(schemaVal.Value, tok.GetLangTokenizer(toker, nq.Lang))
toks, err := tok.BuildTokens(schemaVal.Value, tok.GetTokenizerForLang(toker, nq.Lang))
x.Check(err)

// Store index posting.
Expand Down
2 changes: 1 addition & 1 deletion dgraph/cmd/live/batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (l *loader) conflictKeysForNQuad(nq *api.NQuad) ([]uint64, error) {
if err != nil {
errs = append(errs, err.Error())
}
toks, err := tok.BuildTokens(schemaVal.Value, tok.GetLangTokenizer(token, nq.Lang))
toks, err := tok.BuildTokens(schemaVal.Value, tok.GetTokenizerForLang(token, nq.Lang))
if err != nil {
errs = append(errs, err.Error())
}
Expand Down
26 changes: 22 additions & 4 deletions posting/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func indexTokens(info *indexMutationInfo) ([]string, error) {

var tokens []string
for _, it := range info.tokenizers {
toks, err := tok.BuildTokens(sv.Value, tok.GetLangTokenizer(it, lang))
toks, err := tok.BuildTokens(sv.Value, tok.GetTokenizerForLang(it, lang))
if err != nil {
return tokens, err
}
Expand Down Expand Up @@ -439,13 +439,18 @@ func (l *List) AddMutationWithIndex(ctx context.Context, edge *pb.DirectedEdge,
}

// deleteTokensFor deletes the index for the given attribute and token.
func deleteTokensFor(attr, tokenizerName string) error {
func deleteTokensFor(attr, tokenizerName string, hasLang bool) error {
pk := x.ParsedKey{Attr: attr}
prefix := pk.IndexPrefix()
tokenizer, ok := tok.GetTokenizer(tokenizerName)
if !ok {
return errors.Errorf("Could not find valid tokenizer for %s", tokenizerName)
}
if hasLang {
// We just need the tokenizer identifier for ExactTokenizer having language.
// It will be same for all the language.
tokenizer = tok.GetTokenizerForLang(tokenizer, "en")
}
prefix = append(prefix, tokenizer.Identifier())
if err := pstore.DropPrefix(prefix); err != nil {
return err
Expand Down Expand Up @@ -785,7 +790,13 @@ func rebuildIndex(ctx context.Context, rb *IndexRebuild) error {
glog.Infof("Deleting index for attr %s and tokenizers %s", rb.Attr,
rebuildInfo.tokenizersToDelete)
for _, tokenizer := range rebuildInfo.tokenizersToDelete {
if err := deleteTokensFor(rb.Attr, tokenizer); err != nil {
if err := deleteTokensFor(rb.Attr, tokenizer, false); err != nil {
return err
}
if tokenizer != "exact" {
continue
}
if err := deleteTokensFor(rb.Attr, tokenizer, true); err != nil {
return err
}
}
Expand All @@ -804,7 +815,13 @@ func rebuildIndex(ctx context.Context, rb *IndexRebuild) error {
rebuildInfo.tokenizersToRebuild)
// Before rebuilding, the existing index needs to be deleted.
for _, tokenizer := range rebuildInfo.tokenizersToRebuild {
if err := deleteTokensFor(rb.Attr, tokenizer); err != nil {
if err := deleteTokensFor(rb.Attr, tokenizer, false); err != nil {
return err
}
if tokenizer != "exact" {
continue
}
if err := deleteTokensFor(rb.Attr, tokenizer, true); err != nil {
return err
}
}
Expand All @@ -824,6 +841,7 @@ func rebuildIndex(ctx context.Context, rb *IndexRebuild) error {
Value: p.Value,
Tid: types.TypeID(p.ValType),
}
edge.Lang = string(p.LangTag)

for {
err := txn.addIndexMutations(ctx, &indexMutationInfo{
Expand Down
14 changes: 13 additions & 1 deletion query/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,10 +236,12 @@ type Speaker {
}

name : string @index(term, exact, trigram) @count @lang .
name_lang : string @lang .
name_lang : string @lang .
lang_type : string @index(exact) .
name_lang_index : string @index(exact) @lang .
alt_name : [string] @index(term, exact, trigram) @count .
alias : string @index(exact, term, fulltext) .
alias_lang : string @index(exact) @lang .
abbr : string .
dob : dateTime @index(year) .
dob_day : dateTime @index(day) .
Expand Down Expand Up @@ -370,9 +372,13 @@ func populateCluster() {
<10007> <name> "Elizabeth" .
<10101> <name_lang> "zon"@sv .
<10101> <name_lang> "öffnen"@de .
<10101> <name_lang_index> "zon"@sv .
<10101> <name_lang_index> "öffnen"@de .
<10101> <lang_type> "Test" .
<10102> <name_lang> "öppna"@sv .
<10102> <name_lang> "zumachen"@de .
<10102> <name_lang_index> "öppna"@sv .
<10102> <name_lang_index> "zumachen"@de .
<10102> <lang_type> "Test" .
<11000> <name> "Baz Luhrmann"@en .
<11001> <name> "Strictly Ballroom"@en .
Expand Down Expand Up @@ -514,6 +520,12 @@ func populateCluster() {
<31> <alias> "Allan Matt" .
<101> <alias> "John Oliver" .

<23> <alias_lang> "Zambo Alice"@en .
<24> <alias_lang> "John Alice"@en .
<25> <alias_lang> "Bob Joe"@en .
<31> <alias_lang> "Allan Matt"@en .
<101> <alias_lang> "John Oliver"@en .

<1> <bin_data> "YmluLWRhdGE=" .

<1> <graduation> "1932-01-01" .
Expand Down
16 changes: 13 additions & 3 deletions query/query1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,26 @@ func TestToFastJSONOrderLang(t *testing.T) {
query := `
{
me(func: uid(0x01)) {
friend(first:2, orderdesc: alias@en) {
alias
friend(first: 2, orderdesc: alias_lang@en) {
alias_lang@en
}
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{"data": {"me":[{"friend":[{"alias":"Zambo Alice"},{"alias":"John Oliver"}]}]}}`,
`{
"data": {
"me": [{
"friend": [{
"alias_lang@en": "Zambo Alice"
}, {
"alias_lang@en": "John Oliver"
}]
}]
}
}`,
js)
}

Expand Down
167 changes: 167 additions & 0 deletions query/query2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -907,6 +907,173 @@ func TestLanguageOrderNonIndexed2(t *testing.T) {
js)
}

func TestLanguageOrderIndexed1(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderasc: name_lang_index@de) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": [{
"name_lang_index@de": "öffnen",
"name_lang_index@sv": "zon"
}, {
"name_lang_index@de": "zumachen",
"name_lang_index@sv": "öppna"
}]
}
}`,
js)
}

func TestLanguageOrderIndexed2(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderasc: name_lang_index@sv) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": [{
"name_lang_index@de": "öffnen",
"name_lang_index@sv": "zon"
}, {
"name_lang_index@de": "zumachen",
"name_lang_index@sv": "öppna"
}]
}
}`,
js)
}

func TestLanguageOrderIndexed3(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderasc: name_lang_index) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": []
}
}`,
js)
}

func TestLanguageOrderIndexed4(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderasc: name_lang_index@hi) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": []
}
}`,
js)
}

func TestLanguageOrderIndexed5(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderdesc: name_lang_index@de) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": [{
"name_lang_index@de": "zumachen",
"name_lang_index@sv": "öppna"
}, {
"name_lang_index@de": "öffnen",
"name_lang_index@sv": "zon"
}]
}
}`,
js)
}

func TestLanguageOrderIndexed6(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderdesc: name_lang_index@sv) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": [{
"name_lang_index@de": "zumachen",
"name_lang_index@sv": "öppna"
}, {
"name_lang_index@de": "öffnen",
"name_lang_index@sv": "zon"
}]
}
}`,
js)
}

func TestLanguageOrderIndexedPaginationOffset(t *testing.T) {
query := `
{
q(func:eq(lang_type, "Test"), orderasc: name_lang_index@sv, first: 1, offset: 1) {
name_lang_index@de
name_lang_index@sv
}
}
`

js := processQueryNoErr(t, query)
require.JSONEq(t,
`{
"data": {
"q": [{
"name_lang_index@de": "zumachen",
"name_lang_index@sv": "öppna"
}]
}
}`,
js)
}

// Test sorting / ordering by dob.
func TestToFastJSONOrderDesc_pawan(t *testing.T) {

Expand Down
20 changes: 10 additions & 10 deletions query/query4_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,19 +636,19 @@ func TestHasOrderDesc(t *testing.T) {
"data": {
"q": [
{
"name": ""
"name": "name"
},
{
"name": ""
"name": "expand"
},
{
"name": "Badger"
"name": "Shoreline Amphitheater"
},
{
"name": "name"
"name": "School B"
},
{
"name": "expand"
"name": "School A"
}
]
}
Expand All @@ -665,19 +665,19 @@ func TestHasOrderDescOffset(t *testing.T) {
"data": {
"q": [
{
"name": "Shoreline Amphitheater"
"name": "San Mateo School District"
},
{
"name": "School B"
"name": "San Mateo High School"
},
{
"name": "School A"
"name": "San Mateo County"
},
{
"name": "San Mateo School District"
"name": "San Carlos Airport"
},
{
"name": "San Mateo High School"
"name": "San Carlos"
}
]
}
Expand Down
4 changes: 2 additions & 2 deletions tok/langbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ var langBaseCache struct {
m map[string]string
}

// langBase returns the BCP47 base of a language.
// LangBase returns the BCP47 base of a language.
// If the confidence of the matching is better than none, we return that base.
// Otherwise, we return "en" (English) which is a good default.
func langBase(lang string) string {
func LangBase(lang string) string {
if lang == "" {
return enBase // default to this
}
Expand Down
Loading