From 916518fd4fcdc00ea809d409f13692a038f2d1e5 Mon Sep 17 00:00:00 2001
From: Animesh Chandra Pathak <animesh@dgraph.io>
Date: Tue, 14 Jan 2020 16:24:46 +0530
Subject: [PATCH] Support filtering on nonindexed predicate (#4531)

---
 query/common_test.go |  29 ++++-
 query/query0_test.go | 264 +++++++++++++++++++++++++++++++++++++++++++
 query/query1_test.go |  17 +--
 worker/task.go       |  65 +++++++----
 4 files changed, 341 insertions(+), 34 deletions(-)
diff --git a/query/common_test.go b/query/common_test.go
index b434531cd7d..9d57519ab39 100644
--- a/query/common_test.go
+++ b/query/common_test.go
@@ -256,7 +256,6 @@ value                          : string @index(trigram) .
 full_name                      : string @index(hash) .
 nick_name                      : string @index(term) .
 royal_title                    : string @index(hash, term, fulltext) @lang .
-noindex_name                   : string .
 school                         : [uid] @count .
 lossy                          : string @index(term) @lang .
 occupations                    : [string] @index(term) .
@@ -288,6 +287,11 @@ boss                           : uid .
 newfriend                      : [uid] .
 owner                          : [uid] .
 noconflict_pred                : string @noconflict .
+noindex_name                   : string .
+noindex_age                    : int .
+noindex_dob                    : datetime .
+noindex_alive                  : bool .
+noindex_salary                 : float .
 language                       : [string] .
 `
 
@@ -390,6 +394,29 @@ func populateCluster() {
 		<1> <full_name> "Michonne's large name for hashing" .
 
 		<1> <noindex_name> "Michonne's name not indexed" .
+		<2> <noindex_name> "King Lear's name not indexed" .
+		<3> <noindex_name> "Margaret's name not indexed" .
+		<4> <noindex_name> "Leonard's name not indexed" .
+
+		<1> <noindex_age> "21" .
+		<2> <noindex_age> "22" .
+		<3> <noindex_age> "23" .
+		<4> <noindex_age> "24" .
+
+		<1> <noindex_dob> "1810-11-01" .
+		<2> <noindex_dob> "1710-11-01" .
+		<3> <noindex_dob> "1610-11-01" .
+		<4> <noindex_dob> "1510-11-01" .
+
+		<1> <noindex_alive> "true" .
+		<2> <noindex_alive> "false" .
+		<3> <noindex_alive> "false" .
+		<4> <noindex_alive> "true" .
+
+		<1> <noindex_salary> "501.23" .
+		<2> <noindex_salary> "589.04" .
+		<3> <noindex_salary> "459.47" .
+		<4> <noindex_salary> "967.68" .
 
 		<1> <friend> <23> .
 		<1> <friend> <24> .
diff --git a/query/query0_test.go b/query/query0_test.go
index 8e9a5f7ad63..d1f0cd31555 100644
--- a/query/query0_test.go
+++ b/query/query0_test.go
@@ -2674,6 +2674,270 @@ func TestCountUidWithAlias(t *testing.T) {
 		js)
 }
 
+func TestFilterNonIndexedPredicate(t *testing.T) {
+	t.Parallel()
+	tests := []struct {
+		name   string
+		query  string
+		result string
+	}{
+		{
+			`Test ge filter on non-indexed string`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_name, "Leonard's name not indexed")) {
+					noindex_name
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_name":"Michonne's name not indexed"},{"noindex_name":"Margaret's name not indexed"},{"noindex_name":"Leonard's name not indexed"}]}}`,
+		},
+		{
+			`Test gt filter on non-indexed string`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_name, "Leonard's name not indexed")) {
+					noindex_name
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_name":"Michonne's name not indexed"},{"noindex_name":"Margaret's name not indexed"}]}}`,
+		},
+		{
+			`Test le filter on non-indexed string`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(le(noindex_name, "Leonard's name not indexed")) {
+					noindex_name
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_name":"King Lear's name not indexed"},{"noindex_name":"Leonard's name not indexed"}]}}`,
+		},
+		{
+			`Test lt filter on non-indexed string`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_name, "Leonard's name not indexed")){
+					noindex_name
+				}
+			},
+			`,
+			`{"data":{"me":[{"noindex_name":"King Lear's name not indexed"}]}}`,
+		},
+		{
+			`Test eq filter on non-indexed string`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_name, "King Lear's name not indexed")) {
+					noindex_name
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_name":"King Lear's name not indexed"}]}}`,
+		},
+		{
+			`Test ge filter on non-indexed int`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_age, "22")) {
+					noindex_age
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_age":22},{"noindex_age":23},{"noindex_age":24}]}}`,
+		},
+		{
+			`Test gt filter on non-indexed int`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_age, "22")) {
+					noindex_age
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_age":23},{"noindex_age":24}]}}`,
+		},
+		{
+			`Test le filter on non-indexed int`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(le(noindex_age, "22")) {
+					noindex_age
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_age":21},{"noindex_age":22}]}}`,
+		},
+		{
+			`Test lt filter on non-indexed int`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_age, "22")){
+					noindex_age
+				}
+			},
+			`,
+			`{"data":{"me":[{"noindex_age":21}]}}`,
+		},
+		{
+			`Test eq filter on non-indexed int`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_age, "22")) {
+					noindex_age
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_age":22}]}}`,
+		},
+		{
+			`Test ge filter on non-indexed datetime`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_dob, "1610-11-01")) {
+					noindex_dob
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_dob":"1810-11-01T00:00:00Z"},{"noindex_dob":"1710-11-01T00:00:00Z"},{"noindex_dob":"1610-11-01T00:00:00Z"}]}}`,
+		},
+		{
+			`Test gt filter on non-indexed datetime`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_dob, "1610-11-01")) {
+					noindex_dob
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_dob":"1810-11-01T00:00:00Z"},{"noindex_dob":"1710-11-01T00:00:00Z"}]}}`,
+		},
+		{
+			`Test le filter on non-indexed datetime`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(le(noindex_dob, "1610-11-01")) {
+					noindex_dob
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_dob":"1610-11-01T00:00:00Z"},{"noindex_dob":"1510-11-01T00:00:00Z"}]}}`,
+		},
+		{
+			`Test lt filter on non-indexed datetime`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_dob, "1610-11-01")){
+					noindex_dob
+				}
+			},
+			`,
+			`{"data":{"me":[{"noindex_dob":"1510-11-01T00:00:00Z"}]}}`,
+		},
+		{
+			`Test eq filter on non-indexed datetime`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_dob, "1610-11-01")) {
+					noindex_dob
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_dob":"1610-11-01T00:00:00Z"}]}}`,
+		},
+		{
+			`Test ge filter on non-indexed float`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(ge(noindex_salary, "589.04")) {
+					noindex_salary
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_salary":589.040000},{"noindex_salary":967.680000}]}}`,
+		},
+		{
+			`Test gt filter on non-indexed float`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(gt(noindex_salary, "589.04")) {
+					noindex_salary
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_salary":967.680000}]}}`,
+		},
+		{
+			`Test le filter on non-indexed float`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(le(noindex_salary, "589.04")) {
+					noindex_salary
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_salary":501.230000},{"noindex_salary":589.040000},{"noindex_salary":459.470000}]}}`,
+		},
+		{
+			`Test lt filter on non-indexed float`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(lt(noindex_salary, "589.04")){
+					noindex_salary
+				}
+			},
+			`,
+			`{"data":{"me":[{"noindex_salary":501.230000},{"noindex_salary":459.470000}]}}`,
+		},
+		{
+			`Test eq filter on non-indexed float`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_salary, "589.04")) {
+					noindex_salary
+				}
+			}
+			`,
+			`{"data":{"me":[{"noindex_salary":589.040000}]}}`,
+		},
+		{
+			`Test eq filter on non-indexed bool`,
+			`
+			{
+				me(func: uid(1, 2, 3, 4)) @filter(eq(noindex_alive, true)) {
+					uid
+					noindex_name
+					noindex_alive
+				}
+			}
+			`,
+			`{"data":{"me":[{"uid":"0x1","noindex_name":"Michonne's name not indexed","noindex_alive":true},{"uid":"0x4","noindex_name":"Leonard's name not indexed","noindex_alive":true}]}}`,
+		},
+		{
+			`Test filtering of non indexed predicate inside query`,
+			`
+			{
+				me(func: uid(0x01)) {
+					friend @filter(ge(survival_rate, 1.6)) {
+						name
+						survival_rate
+					}
+				}
+			}
+			`,
+			`{"data":{"me":[{"friend":[{"name":"Rick Grimes","survival_rate":1.600000},{"name":"Glenn Rhee","survival_rate":1.600000},{"name":"Daryl Dixon","survival_rate":1.600000},{"name":"Andrea","survival_rate":1.600000}]}]}}`,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			js := processQueryNoErr(t, tc.query)
+			require.JSONEq(t, js, tc.result)
+		})
+	}
+}
+
 var client *dgo.Dgraph
 
 func TestMain(m *testing.M) {
diff --git a/query/query1_test.go b/query/query1_test.go
index 52731e18595..f09d14f6f70 100644
--- a/query/query1_test.go
+++ b/query/query1_test.go
@@ -81,22 +81,17 @@ func TestSchemaBlock5(t *testing.T) {
 	require.JSONEq(t, `{"data":{"schema":[{"predicate":"name","type":"string","index":true,"tokenizer":["term","exact","trigram"],"count":true,"lang":true}]}}`, js)
 }
 
-func TestFilterNonIndexedPredicateFail(t *testing.T) {
-
-	// filtering on non indexing predicate fails
+func TestNonIndexedPredicateAtRoot(t *testing.T) {
 	query := `
-		{
-			me(func: uid(0x01)) {
-				friend @filter(le(survival_rate, 30)) {
-					uid
-					name
-					age
-				}
-			}
+	{
+		me(func: ge(noindex_name, "Michonne")) {
+			noindex_name
 		}
+	}
 	`
 	_, err := processQuery(context.Background(), t, query)
 	require.Error(t, err)
+	require.Contains(t, err.Error(), "Predicate noindex_name is not indexed")
 }
 
 func TestMultipleSamePredicateInBlockFail(t *testing.T) {
diff --git a/worker/task.go b/worker/task.go
index ccb7f6f252d..938e3f60ad3 100644
--- a/worker/task.go
+++ b/worker/task.go
@@ -269,9 +269,17 @@ func parseFuncTypeHelper(name string) (FuncType, string) {
 	}
 }
 
-func needsIndex(fnType FuncType) bool {
+func needsIndex(fnType FuncType, uidList *pb.List) bool {
 	switch fnType {
-	case compareAttrFn, geoFn, fullTextSearchFn, standardFn, matchFn:
+	case compareAttrFn:
+		if uidList != nil {
+			// UidList is not nil means this is a filter. Filter predicate is not indexed, so
+			// instead of fetching values by index key, we will fetch value by data key
+			// (from uid and predicate) and apply filter on values.
+			return false
+		}
+		return true
+	case geoFn, fullTextSearchFn, standardFn, matchFn:
 		return true
 	}
 	return false
@@ -852,7 +860,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint
 		return nil, errors.Errorf("Predicate %s doesn't have reverse edge", attr)
 	}
 
-	if needsIndex(srcFn.fnType) && !schema.State().IsIndexed(q.Attr) {
+	if needsIndex(srcFn.fnType, q.UidList) && !schema.State().IsIndexed(q.Attr) {
 		return nil, errors.Errorf("Predicate %s is not indexed", q.Attr)
 	}
 
@@ -913,21 +921,21 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint
 
 	if srcFn.fnType == compareScalarFn && srcFn.isFuncAtRoot {
 		span.Annotate(nil, "handleCompareScalarFunction")
-		if err := qs.handleCompareScalarFunction(funcArgs{q, gid, srcFn, out}); err != nil {
+		if err := qs.handleCompareScalarFunction(args); err != nil {
 			return nil, err
 		}
 	}
 
 	if srcFn.fnType == regexFn {
 		span.Annotate(nil, "handleRegexFunction")
-		if err := qs.handleRegexFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil {
+		if err := qs.handleRegexFunction(ctx, args); err != nil {
 			return nil, err
 		}
 	}
 
 	if srcFn.fnType == matchFn {
 		span.Annotate(nil, "handleMatchFunction")
-		if err := qs.handleMatchFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil {
+		if err := qs.handleMatchFunction(ctx, args); err != nil {
 			return nil, err
 		}
 	}
@@ -936,7 +944,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint
 	// request and filter the uids only if the tokenizer IsLossy.
 	if srcFn.fnType == compareAttrFn && len(srcFn.tokens) > 0 {
 		span.Annotate(nil, "handleCompareFunction")
-		if err := qs.handleCompareFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil {
+		if err := qs.handleCompareFunction(ctx, args); err != nil {
 			return nil, err
 		}
 	}
@@ -944,7 +952,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint
 	// If geo filter, do value check for correctness.
 	if srcFn.geoQuery != nil {
 		span.Annotate(nil, "handleGeoFunction")
-		if err := qs.filterGeoFunction(ctx, funcArgs{q, gid, srcFn, out}); err != nil {
+		if err := qs.filterGeoFunction(ctx, args); err != nil {
 			return nil, err
 		}
 	}
@@ -953,7 +961,7 @@ func (qs *queryState) helpProcessTask(ctx context.Context, q *pb.Query, gid uint
 	// for hasFn as filtering for it has already been done in handleHasFunction.
 	if srcFn.fnType != hasFn && needsStringFiltering(srcFn, q.Langs, attr) {
 		span.Annotate(nil, "filterStringFunction")
-		if err := qs.filterStringFunction(funcArgs{q, gid, srcFn, out}); err != nil {
+		if err := qs.filterStringFunction(args); err != nil {
 			return nil, err
 		}
 	}
@@ -1122,8 +1130,8 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e
 		return err
 	}
 
-	// Only if the tokenizer that we used IsLossy, then we need to fetch
-	// and compare the actual values.
+	// Only if the tokenizer that we used IsLossy
+	// then we need to fetch and compare the actual values.
 	span.Annotatef(nil, "Tokenizer: %s, Lossy: %t", tokenizer.Name(), tokenizer.IsLossy())
 	if tokenizer.IsLossy() {
 		// Need to evaluate inequality for entries in the first bucket.
@@ -1136,12 +1144,11 @@ func (qs *queryState) handleCompareFunction(ctx context.Context, arg funcArgs) e
 		rowsToFilter := 0
 		switch {
 		case arg.srcFn.fname == eq:
-			// If fn is eq, we could have multiple arguments and hence multiple rows
-			// to filter.
+			// If fn is eq, we could have multiple arguments and hence multiple rows to filter.
 			rowsToFilter = len(arg.srcFn.tokens)
 		case arg.srcFn.tokens[0] == arg.srcFn.ineqValueToken:
 			// If operation is not eq and ineqValueToken equals first token,
-			// then we need to filter first row..
+			// then we need to filter first row.
 			rowsToFilter = 1
 		}
 		isList := schema.State().IsList(attr)
@@ -1506,10 +1513,15 @@ func matchRegex(value types.Val, regex *cregexp.Regexp) bool {
 }
 
 type functionContext struct {
-	tokens         []string
-	geoQuery       *types.GeoQueryData
-	intersectDest  bool
-	ineqValue      types.Val
+	tokens        []string
+	geoQuery      *types.GeoQueryData
+	intersectDest bool
+	ineqValue     types.Val
+	// eqTokens is used by compareAttr functions. It stores values corresponding to each
+	// function argument. There could be multiple arguments to `eq` function but only one for
+	// other compareAttr functions.
+	// TODO(@Animesh): change field names which could explain their uses better. Check if we
+	// really need all of ineqValue, eqTokens, tokens
 	eqTokens       []types.Val
 	ineqValueToken string
 	n              int
@@ -1558,6 +1570,7 @@ func parseSrcFn(q *pb.Query) (*functionContext, error) {
 	fnType, f := parseFuncType(q.SrcFunc)
 	attr := q.Attr
 	fc := &functionContext{fnType: fnType, fname: f}
+	isIndexedAttr := schema.State().IsIndexed(attr)
 	var err error
 
 	t, err := schema.State().TypeOf(attr)
@@ -1600,6 +1613,11 @@ func parseSrcFn(q *pb.Query) (*functionContext, error) {
 				return nil, errors.Errorf("Got error: %v while running: %v", err,
 					q.SrcFunc)
 			}
+			fc.eqTokens = append(fc.eqTokens, fc.ineqValue)
+			if !isIndexedAttr {
+				// In case of non-indexed predicate we won't have any tokens.
+				continue
+			}
 			// Get tokens ge / le ineqValueToken.
 			if tokens, fc.ineqValueToken, err = getInequalityTokens(q.ReadTs, attr, f,
 				fc.ineqValue); err != nil {
@@ -1609,15 +1627,18 @@ func parseSrcFn(q *pb.Query) (*functionContext, error) {
 				continue
 			}
 			fc.tokens = append(fc.tokens, tokens...)
-			fc.eqTokens = append(fc.eqTokens, fc.ineqValue)
 		}
 
-		// Number of index keys is more than no. of uids to filter, so its better to fetch data keys
-		// directly and compare. Lets make tokens empty.
+		// In case of non-indexed predicate, there won't be any tokens. We will fetch value
+		// from data keys.
+		// If number of index keys is more than no. of uids to filter, so its better to fetch values
+		// from data keys directly and compare. Lets make tokens empty.
 		// We don't do this for eq because eq could have multiple arguments and we would have to
 		// compare the value with all of them. Also eq would usually have less arguments, hence we
 		// won't be fetching many index keys.
-		if q.UidList != nil && len(fc.tokens) > len(q.UidList.Uids) && fc.fname != eq {
+		if q.UidList != nil && !isIndexedAttr {
+			fc.n = len(q.UidList.Uids)
+		} else if q.UidList != nil && len(fc.tokens) > len(q.UidList.Uids) && fc.fname != eq {
 			fc.tokens = fc.tokens[:0]
 			fc.n = len(q.UidList.Uids)
 		} else {