From 916518fd4fcdc00ea809d409f13692a038f2d1e5 Mon Sep 17 00:00:00 2001 From: Animesh Chandra Pathak Date: Tue, 14 Jan 2020 16:24:46 +0530 Subject: [PATCH] Support filtering on nonindexed predicate (#4531) --- query/common_test.go | 29 ++++- query/query0_test.go | 264 +++++++++++++++++++++++++++++++++++++++++++ query/query1_test.go | 17 +-- worker/task.go | 65 +++++++---- 4 files changed, 341 insertions(+), 34 deletions(-) diff --git a/query/common_test.go b/query/common_test.go index b434531cd7d..9d57519ab39 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -256,7 +256,6 @@ value : string @index(trigram) . full_name : string @index(hash) . nick_name : string @index(term) . royal_title : string @index(hash, term, fulltext) @lang . -noindex_name : string . school : [uid] @count . lossy : string @index(term) @lang . occupations : [string] @index(term) . @@ -288,6 +287,11 @@ boss : uid . newfriend : [uid] . owner : [uid] . noconflict_pred : string @noconflict . +noindex_name : string . +noindex_age : int . +noindex_dob : datetime . +noindex_alive : bool . +noindex_salary : float . language : [string] . ` @@ -390,6 +394,29 @@ func populateCluster() { <1> "Michonne's large name for hashing" . <1> "Michonne's name not indexed" . + <2> "King Lear's name not indexed" . + <3> "Margaret's name not indexed" . + <4> "Leonard's name not indexed" . + + <1> "21" . + <2> "22" . + <3> "23" . + <4> "24" . + + <1> "1810-11-01" . + <2> "1710-11-01" . + <3> "1610-11-01" . + <4> "1510-11-01" . + + <1> "true" . + <2> "false" . + <3> "false" . + <4> "true" . + + <1> "501.23" . + <2> "589.04" . + <3> "459.47" . + <4> "967.68" . <1> <23> . <1> <24> . diff --git a/query/query0_test.go b/query/query0_test.go index 8e9a5f7ad63..d1f0cd31555 100644 --- a/query/query0_test.go +++ b/query/query0_test.go @@ -2674,6 +2674,270 @@ func TestCountUidWithAlias(t *testing.T) { js) } +func TestFilterNonIndexedPredicate(t *testing.T) { + t.Parallel() + tests := []struct { + name string + query string + result string + }{ + { + `Test ge filter on non-indexed string`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_name, "Leonard's name not indexed")) { + noindex_name + } + } + `, + `{"data":{"me":[{"noindex_name":"Michonne's name not indexed"},{"noindex_name":"Margaret's name not indexed"},{"noindex_name":"Leonard's name not indexed"}]}}`, + }, + { + `Test gt filter on non-indexed string`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_name, "Leonard's name not indexed")) { + noindex_name + } + } + `, + `{"data":{"me":[{"noindex_name":"Michonne's name not indexed"},{"noindex_name":"Margaret's name not indexed"}]}}`, + }, + { + `Test le filter on non-indexed string`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(le(noindex_name, "Leonard's name not indexed")) { + noindex_name + } + } + `, + `{"data":{"me":[{"noindex_name":"King Lear's name not indexed"},{"noindex_name":"Leonard's name not indexed"}]}}`, + }, + { + `Test lt filter on non-indexed string`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_name, "Leonard's name not indexed")){ + noindex_name + } + }, + `, + `{"data":{"me":[{"noindex_name":"King Lear's name not indexed"}]}}`, + }, + { + `Test eq filter on non-indexed string`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_name, "King Lear's name not indexed")) { + noindex_name + } + } + `, + `{"data":{"me":[{"noindex_name":"King Lear's name not indexed"}]}}`, + }, + { + `Test ge filter on non-indexed int`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_age, "22")) { + noindex_age + } + } + `, + `{"data":{"me":[{"noindex_age":22},{"noindex_age":23},{"noindex_age":24}]}}`, + }, + { + `Test gt filter on non-indexed int`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_age, "22")) { + noindex_age + } + } + `, + `{"data":{"me":[{"noindex_age":23},{"noindex_age":24}]}}`, + }, + { + `Test le filter on non-indexed int`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(le(noindex_age, "22")) { + noindex_age + } + } + `, + `{"data":{"me":[{"noindex_age":21},{"noindex_age":22}]}}`, + }, + { + `Test lt filter on non-indexed int`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_age, "22")){ + noindex_age + } + }, + `, + `{"data":{"me":[{"noindex_age":21}]}}`, + }, + { + `Test eq filter on non-indexed int`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_age, "22")) { + noindex_age + } + } + `, + `{"data":{"me":[{"noindex_age":22}]}}`, + }, + { + `Test ge filter on non-indexed datetime`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_dob, "1610-11-01")) { + noindex_dob + } + } + `, + `{"data":{"me":[{"noindex_dob":"1810-11-01T00:00:00Z"},{"noindex_dob":"1710-11-01T00:00:00Z"},{"noindex_dob":"1610-11-01T00:00:00Z"}]}}`, + }, + { + `Test gt filter on non-indexed datetime`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_dob, "1610-11-01")) { + noindex_dob + } + } + `, + `{"data":{"me":[{"noindex_dob":"1810-11-01T00:00:00Z"},{"noindex_dob":"1710-11-01T00:00:00Z"}]}}`, + }, + { + `Test le filter on non-indexed datetime`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(le(noindex_dob, "1610-11-01")) { + noindex_dob + } + } + `, + `{"data":{"me":[{"noindex_dob":"1610-11-01T00:00:00Z"},{"noindex_dob":"1510-11-01T00:00:00Z"}]}}`, + }, + { + `Test lt filter on non-indexed datetime`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_dob, "1610-11-01")){ + noindex_dob + } + }, + `, + `{"data":{"me":[{"noindex_dob":"1510-11-01T00:00:00Z"}]}}`, + }, + { + `Test eq filter on non-indexed datetime`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_dob, "1610-11-01")) { + noindex_dob + } + } + `, + `{"data":{"me":[{"noindex_dob":"1610-11-01T00:00:00Z"}]}}`, + }, + { + `Test ge filter on non-indexed float`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_salary, "589.04")) { + noindex_salary + } + } + `, + `{"data":{"me":[{"noindex_salary":589.040000},{"noindex_salary":967.680000}]}}`, + }, + { + `Test gt filter on non-indexed float`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_salary, "589.04")) { + noindex_salary + } + } + `, + `{"data":{"me":[{"noindex_salary":967.680000}]}}`, + }, + { + `Test le filter on non-indexed float`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(le(noindex_salary, "589.04")) { + noindex_salary + } + } + `, + `{"data":{"me":[{"noindex_salary":501.230000},{"noindex_salary":589.040000},{"noindex_salary":459.470000}]}}`, + }, + { + `Test lt filter on non-indexed float`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_salary, "589.04")){ + noindex_salary + } + }, + `, + `{"data":{"me":[{"noindex_salary":501.230000},{"noindex_salary":459.470000}]}}`, + }, + { + `Test eq filter on non-indexed float`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_salary, "589.04")) { + noindex_salary + } + } + `, + `{"data":{"me":[{"noindex_salary":589.040000}]}}`, + }, + { + `Test eq filter on non-indexed bool`, + ` + { + me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_alive, true)) { + uid + noindex_name + noindex_alive + } + } + `, + `{"data":{"me":[{"uid":"0x1","noindex_name":"Michonne's name not indexed","noindex_alive":true},{"uid":"0x4","noindex_name":"Leonard's name not indexed","noindex_alive":true}]}}`, + }, + { + `Test filtering of non indexed predicate inside query`, + ` + { + me(func: uid(0x01)) { + friend @filter(ge(survival_rate, 1.6)) { + name + survival_rate + } + } + } + `, + `{"data":{"me":[{"friend":[{"name":"Rick Grimes","survival_rate":1.600000},{"name":"Glenn Rhee","survival_rate":1.600000},{"name":"Daryl Dixon","survival_rate":1.600000},{"name":"Andrea","survival_rate":1.600000}]}]}}`, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + js := processQueryNoErr(t, tc.query) + require.JSONEq(t, js, tc.result) + }) + } +} + var client *dgo.Dgraph func TestMain(m *testing.M) { diff --git a/query/query1_test.go b/query/query1_test.go index 52731e18595..f09d14f6f70 100644 --- a/query/query1_test.go +++ b/query/query1_test.go @@ -81,22 +81,17 @@ func TestSchemaBlock5(t *testing.T) { require.JSONEq(t, `{"data":{"schema":[{"predicate":"name","type":"string","index":true,"tokenizer":["term","exact","trigram"],"count":true,"lang":true}]}}`, js) } -func TestFilterNonIndexedPredicateFail(t *testing.T) { - - // filtering on non indexing predicate fails +func TestNonIndexedPredicateAtRoot(t *testing.T) { query := ` - { - me(func: uid(0x01)) { - friend @filter(le(survival_rate, 30)) { - uid - name - age - } - } + { + me(func: ge(noindex_name, "Michonne")) { + noindex_name } + } ` _, err := processQuery(context.Background(), t, query) require.Error(t, err) + require.Contains(t, err.Error(), "Predicate noindex_name is not indexed") } func TestMultipleSamePredicateInBlockFail(t *testing.T) { diff --git a/worker/task.go b/worker/task.go index ccb7f6f252d..938e3f60ad3 100644 --- a/worker/task.go +++ b/worker/task.go @@ -269,9 +269,17 @@ func parseFuncTypeHelper(name string) (FuncType, string) { } } -func needsIndex(fnType FuncType) bool { +func needsIndex(fnType FuncType, uidList *pb.List) bool { switch fnType { - case compareAttrFn, geoFn, fullTextSearchFn, standardFn, matchFn: + case compareAttrFn: + if uidList != nil { + // UidList is not nil means this is a filter. Filter predicate is not indexed, so + // instead of fetching values by index key, we will fetch value by data key + // (from uid and predicate) and apply filter on values. + return false + } + return true + case geoFn, fullTextSearchFn, standardFn, matchFn: return true } return false @@ -852,7 +860,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint return nil, errors.Errorf("Predicate %s doesn't have reverse edge", attr) } - if needsIndex(srcFn.fnType) && !schema.State().IsIndexed(q.Attr) { + if needsIndex(srcFn.fnType, q.UidList) && !schema.State().IsIndexed(q.Attr) { return nil, errors.Errorf("Predicate %s is not indexed", q.Attr) } @@ -913,21 +921,21 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint if srcFn.fnType == compareScalarFn && srcFn.isFuncAtRoot { span.Annotate(nil, "handleCompareScalarFunction") - if err := qs.handleCompareScalarFunction(funcArgs{q, gid, srcFn, out}); err != nil { + if err := qs.handleCompareScalarFunction(args); err != nil { return nil, err } } if srcFn.fnType == regexFn { span.Annotate(nil, "handleRegexFunction") - if err := qs.handleRegexFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil { + if err := qs.handleRegexFunction(ctx, args); err != nil { return nil, err } } if srcFn.fnType == matchFn { span.Annotate(nil, "handleMatchFunction") - if err := qs.handleMatchFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil { + if err := qs.handleMatchFunction(ctx, args); err != nil { return nil, err } } @@ -936,7 +944,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint // request and filter the uids only if the tokenizer IsLossy. if srcFn.fnType == compareAttrFn && len(srcFn.tokens) > 0 { span.Annotate(nil, "handleCompareFunction") - if err := qs.handleCompareFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil { + if err := qs.handleCompareFunction(ctx, args); err != nil { return nil, err } } @@ -944,7 +952,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint // If geo filter, do value check for correctness. if srcFn.geoQuery != nil { span.Annotate(nil, "handleGeoFunction") - if err := qs.filterGeoFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil { + if err := qs.filterGeoFunction(ctx, args); err != nil { return nil, err } } @@ -953,7 +961,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint // for hasFn as filtering for it has already been done in handleHasFunction. if srcFn.fnType != hasFn && needsStringFiltering(srcFn, q.Langs, attr) { span.Annotate(nil, "filterStringFunction") - if err := qs.filterStringFunction(funcArgs{q, gid, srcFn, out}); err != nil { + if err := qs.filterStringFunction(args); err != nil { return nil, err } } @@ -1122,8 +1130,8 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e return err } - // Only if the tokenizer that we used IsLossy, then we need to fetch - // and compare the actual values. + // Only if the tokenizer that we used IsLossy + // then we need to fetch and compare the actual values. span.Annotatef(nil, "Tokenizer: %s, Lossy: %t", tokenizer.Name(), tokenizer.IsLossy()) if tokenizer.IsLossy() { // Need to evaluate inequality for entries in the first bucket. @@ -1136,12 +1144,11 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e rowsToFilter := 0 switch { case arg.srcFn.fname == eq: - // If fn is eq, we could have multiple arguments and hence multiple rows - // to filter. + // If fn is eq, we could have multiple arguments and hence multiple rows to filter. rowsToFilter = len(arg.srcFn.tokens) case arg.srcFn.tokens[0] == arg.srcFn.ineqValueToken: // If operation is not eq and ineqValueToken equals first token, - // then we need to filter first row.. + // then we need to filter first row. rowsToFilter = 1 } isList := schema.State().IsList(attr) @@ -1506,10 +1513,15 @@ func matchRegex(value types.Val, regex *cregexp.Regexp) bool { } type functionContext struct { - tokens []string - geoQuery *types.GeoQueryData - intersectDest bool - ineqValue types.Val + tokens []string + geoQuery *types.GeoQueryData + intersectDest bool + ineqValue types.Val + // eqTokens is used by compareAttr functions. It stores values corresponding to each + // function argument. There could be multiple arguments to `eq` function but only one for + // other compareAttr functions. + // TODO(@Animesh): change field names which could explain their uses better. Check if we + // really need all of ineqValue, eqTokens, tokens eqTokens []types.Val ineqValueToken string n int @@ -1558,6 +1570,7 @@ func parseSrcFn(q *pb.Query) (*functionContext, error) { fnType, f := parseFuncType(q.SrcFunc) attr := q.Attr fc := &functionContext{fnType: fnType, fname: f} + isIndexedAttr := schema.State().IsIndexed(attr) var err error t, err := schema.State().TypeOf(attr) @@ -1600,6 +1613,11 @@ func parseSrcFn(q *pb.Query) (*functionContext, error) { return nil, errors.Errorf("Got error: %v while running: %v", err, q.SrcFunc) } + fc.eqTokens = append(fc.eqTokens, fc.ineqValue) + if !isIndexedAttr { + // In case of non-indexed predicate we won't have any tokens. + continue + } // Get tokens ge / le ineqValueToken. if tokens, fc.ineqValueToken, err = getInequalityTokens(q.ReadTs, attr, f, fc.ineqValue); err != nil { @@ -1609,15 +1627,18 @@ func parseSrcFn(q *pb.Query) (*functionContext, error) { continue } fc.tokens = append(fc.tokens, tokens...) - fc.eqTokens = append(fc.eqTokens, fc.ineqValue) } - // Number of index keys is more than no. of uids to filter, so its better to fetch data keys - // directly and compare. Lets make tokens empty. + // In case of non-indexed predicate, there won't be any tokens. We will fetch value + // from data keys. + // If number of index keys is more than no. of uids to filter, so its better to fetch values + // from data keys directly and compare. Lets make tokens empty. // We don't do this for eq because eq could have multiple arguments and we would have to // compare the value with all of them. Also eq would usually have less arguments, hence we // won't be fetching many index keys. - if q.UidList != nil && len(fc.tokens) > len(q.UidList.Uids) && fc.fname != eq { + if q.UidList != nil && !isIndexedAttr { + fc.n = len(q.UidList.Uids) + } else if q.UidList != nil && len(fc.tokens) > len(q.UidList.Uids) && fc.fname != eq { fc.tokens = fc.tokens[:0] fc.n = len(q.UidList.Uids) } else {