From fbd54f3857df8b8d31f9ef1675a8933870488888 Mon Sep 17 00:00:00 2001 From: Ahsan Barkati Date: Wed, 31 Mar 2021 13:31:38 +0530 Subject: [PATCH] fix(query): Fix pagination with match functions (#7668) Fix queries involving `regexp`, `allofterms`, `alloftext` and `match` function with pagination. These functions relies on indexes and needs to fetch postings/uids for each relevant index key to generate the final result. Remove early pagination for these cases. --- query/common_test.go | 23 ++++++++++++++++++++ query/query.go | 27 +++++++++++++---------- query/query0_test.go | 52 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 12 deletions(-) diff --git a/query/common_test.go b/query/common_test.go index 34b93e65823..de2070c8d13 100644 --- a/query/common_test.go +++ b/query/common_test.go @@ -328,6 +328,10 @@ gender : string . indexpred : string @index(exact) . pred : string . pname : string . +tweet-a : string @index(trigram) . +tweet-b : string @index(term) . +tweet-c : string @index(fulltext) . +tweet-d : string @index(trigram) . ` func populateCluster() { @@ -828,6 +832,25 @@ func populateCluster() { <67> "I" . <68> "J" . <69> "K" . + + <61> "aaa" . + <62> "aaaa" . + <63> "aaaab" . + <64> "aaaabb" . + + <61> "indiana" . + <62> "indiana" . + <63> "indiana jones" . + <64> "indiana pop" . + + <61> "I am a citizen" . + <62> "I am a citizen" . + <63> "I am a citizen" . + <64> "I am a citizen of Paradis Island" . + + <61> "aaabxxx" . + <62> "aaacdxx" . + <63> "aaabcd" . `) if err != nil { panic(fmt.Sprintf("Could not able add triple to the cluster. Got error %v", err.Error())) diff --git a/query/query.go b/query/query.go index 9368da565a6..828ae60c8b1 100644 --- a/query/query.go +++ b/query/query.go @@ -984,19 +984,22 @@ func calculateFirstN(sg *SubGraph) int32 { // name // } // } - // - should be has function (Right now, I'm doing it for has, later it can be extended) - // { - // q(func: has(name), first:1) { - // name - // } - // } - // isSupportedFunction := sg.SrcFunc != nil && sg.SrcFunc.Name == "has" + // - should not be one of those function which fetches some results and then do further + // processing to narrow down the result. For example: allofterm will fetch the index postings + // for each term and then do an intersection. + // TODO: Look into how we can optimize queries involving these functions. + + shouldExclude := false + if sg.SrcFunc != nil { + switch sg.SrcFunc.Name { + case "regexp", "alloftext", "allofterms", "match": + shouldExclude = true + default: + shouldExclude = false + } + } - // Manish: Shouldn't all functions allow this? If we don't have a order and we don't have a - // filter, then we can respect the first N, offset Y arguments when retrieving data. - isSupportedFunction := true - if len(sg.Filters) == 0 && len(sg.Params.Order) == 0 && - isSupportedFunction { + if len(sg.Filters) == 0 && len(sg.Params.Order) == 0 && !shouldExclude { // Offset also added because, we need n results to trim the offset. if sg.Params.Count != 0 { count = sg.Params.Count + sg.Params.Offset diff --git a/query/query0_test.go b/query/query0_test.go index cc52c04b843..118e0f938a1 100644 --- a/query/query0_test.go +++ b/query/query0_test.go @@ -3432,6 +3432,58 @@ func TestEqFilterWithoutIndex(t *testing.T) { } +func TestMatchingWithPagination(t *testing.T) { + tests := []struct { + name string + query string + expected string + }{ + { + `Test regexp matching with pagination`, + `{ + me(func: regexp(tweet-a, /aaa.b/), first:1){ + tweet-a + } + }`, + `{"data":{"me":[{"tweet-a":"aaaab"}]}}`, + }, + { + `Test term matching with pagination`, + `{ + me(func: allofterms(tweet-b, "indiana jones"), first:1){ + tweet-b + } + }`, + `{"data":{"me":[{"tweet-b":"indiana jones"}]}}`, + }, + { + `Test full-text matching with pagination`, + `{ + me(func: alloftext(tweet-c, "I am a citizen of Paradis Island"), first:1){ + tweet-c + } + }`, + `{"data":{"me":[{"tweet-c":"I am a citizen of Paradis Island"}]}}`, + }, + { + `Test match function with pagination`, + `{ + me(func: match(tweet-d, "aaaaaa", 3), first:1) { + tweet-d + } + }`, + `{"data":{"me":[{"tweet-d":"aaabcd"}]}}`, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + result := processQueryNoErr(t, tc.query) + require.JSONEq(t, tc.expected, result) + }) + } +} + var client *dgo.Dgraph func TestMain(m *testing.M) {