|
52 | 52 | emptyValueList = pb.ValueList{Values: []*pb.TaskValue{}}
|
53 | 53 | )
|
54 | 54 |
|
55 |
| -func invokeNetworkRequest( |
56 |
| - ctx context.Context, addr string, f func(context.Context, pb.WorkerClient) (interface{}, error)) (interface{}, error) { |
| 55 | +func invokeNetworkRequest(ctx context.Context, addr string, |
| 56 | + f func(context.Context, pb.WorkerClient) (interface{}, error)) (interface{}, error) { |
57 | 57 | pl, err := conn.Get().Get(addr)
|
58 | 58 | if err != nil {
|
59 | 59 | return &emptyResult, x.Wrapf(err, "dispatchTaskOverNetwork: while retrieving connection.")
|
@@ -270,7 +270,7 @@ func parseFuncTypeHelper(name string) (FuncType, string) {
|
270 | 270 |
|
271 | 271 | func needsIndex(fnType FuncType) bool {
|
272 | 272 | switch fnType {
|
273 |
| - case CompareAttrFn, GeoFn, RegexFn, FullTextSearchFn, StandardFn: |
| 273 | + case CompareAttrFn, GeoFn, FullTextSearchFn, StandardFn: |
274 | 274 | return true
|
275 | 275 | default:
|
276 | 276 | return false
|
@@ -876,88 +876,102 @@ func (qs *queryState) handleCompareScalarFunction(arg funcArgs) error {
|
876 | 876 | }
|
877 | 877 |
|
878 | 878 | func (qs *queryState) handleRegexFunction(ctx context.Context, arg funcArgs) error {
|
| 879 | + span := otrace.FromContext(ctx) |
| 880 | + stop := x.SpanTimer(span, "handleRegexFunction") |
| 881 | + defer stop() |
| 882 | + if span != nil { |
| 883 | + span.Annotatef(nil, "Number of uids: %d. args.srcFn: %+v", arg.srcFn.n, arg.srcFn) |
| 884 | + } |
| 885 | + |
879 | 886 | attr := arg.q.Attr
|
880 | 887 | typ, err := schema.State().TypeOf(attr)
|
| 888 | + span.Annotatef(nil, "Attr: %s. Type: %s", attr, typ.Name()) |
881 | 889 | if err != nil || !typ.IsScalar() {
|
882 | 890 | return x.Errorf("Attribute not scalar: %s %v", attr, typ)
|
883 | 891 | }
|
884 | 892 | if typ != types.StringID {
|
885 | 893 | return x.Errorf("Got non-string type. Regex match is allowed only on string type.")
|
886 | 894 | }
|
887 |
| - tokenizers := schema.State().TokenizerNames(attr) |
888 |
| - var found bool |
889 |
| - for _, t := range tokenizers { |
890 |
| - if t == "trigram" { // TODO(tzdybal) - maybe just rename to 'regex' tokenizer? |
891 |
| - found = true |
892 |
| - } |
893 |
| - } |
894 |
| - if !found { |
895 |
| - return x.Errorf("Attribute %v does not have trigram index for regex matching.", attr) |
896 |
| - } |
| 895 | + useIndex := schema.State().HasTokenizer(tok.IdentTrigram, attr) |
| 896 | + span.Annotatef(nil, "Trigram index found: %t, func at root: %t", |
| 897 | + useIndex, arg.srcFn.isFuncAtRoot) |
897 | 898 |
|
898 | 899 | query := cindex.RegexpQuery(arg.srcFn.regex.Syntax)
|
899 | 900 | empty := pb.List{}
|
900 |
| - uids, err := uidsForRegex(attr, arg, query, &empty) |
| 901 | + uids := &pb.List{} |
| 902 | + |
| 903 | + // Here we determine the list of uids to match. |
| 904 | + switch { |
| 905 | + // If this is a filter eval, use the given uid list (good) |
| 906 | + case arg.q.UidList != nil && len(arg.q.UidList.Uids) != 0: |
| 907 | + uids = arg.q.UidList |
| 908 | + |
| 909 | + // Prefer to use an index (fast) |
| 910 | + case useIndex: |
| 911 | + uids, err = uidsForRegex(attr, arg, query, &empty) |
| 912 | + if err != nil { |
| 913 | + return err |
| 914 | + } |
| 915 | + |
| 916 | + // No index and at root, return error instructing user to use `has` or index. |
| 917 | + default: |
| 918 | + return x.Errorf( |
| 919 | + "Attribute %v does not have trigram index for regex matching. "+ |
| 920 | + "Please add a trigram index or use has/uid function with regexp() as filter.", |
| 921 | + attr) |
| 922 | + } |
| 923 | + |
| 924 | + arg.out.UidMatrix = append(arg.out.UidMatrix, uids) |
901 | 925 | isList := schema.State().IsList(attr)
|
902 | 926 | lang := langForFunc(arg.q.Langs)
|
903 |
| - if uids != nil { |
904 |
| - arg.out.UidMatrix = append(arg.out.UidMatrix, uids) |
905 | 927 |
|
906 |
| - filtered := &pb.List{} |
907 |
| - for _, uid := range uids.Uids { |
908 |
| - select { |
909 |
| - case <-ctx.Done(): |
910 |
| - return ctx.Err() |
911 |
| - default: |
912 |
| - } |
913 |
| - pl, err := qs.cache.Get(x.DataKey(attr, uid)) |
914 |
| - if err != nil { |
915 |
| - return err |
916 |
| - } |
| 928 | + span.Annotatef(nil, "Total uids: %d, list: %t lang: %v", len(uids.Uids), isList, lang) |
917 | 929 |
|
918 |
| - var val types.Val |
919 |
| - if lang != "" { |
920 |
| - val, err = pl.ValueForTag(arg.q.ReadTs, lang) |
921 |
| - } else if isList { |
922 |
| - vals, err := pl.AllUntaggedValues(arg.q.ReadTs) |
923 |
| - if err == posting.ErrNoValue { |
924 |
| - continue |
925 |
| - } else if err != nil { |
926 |
| - return err |
927 |
| - } |
928 |
| - for _, val := range vals { |
929 |
| - // convert data from binary to appropriate format |
930 |
| - strVal, err := types.Convert(val, types.StringID) |
931 |
| - if err == nil && matchRegex(strVal, arg.srcFn.regex) { |
932 |
| - filtered.Uids = append(filtered.Uids, uid) |
933 |
| - break |
934 |
| - } |
935 |
| - } |
| 930 | + filtered := &pb.List{} |
| 931 | + for _, uid := range uids.Uids { |
| 932 | + select { |
| 933 | + case <-ctx.Done(): |
| 934 | + return ctx.Err() |
| 935 | + default: |
| 936 | + } |
| 937 | + pl, err := qs.cache.Get(x.DataKey(attr, uid)) |
| 938 | + if err != nil { |
| 939 | + return err |
| 940 | + } |
936 | 941 |
|
937 |
| - continue |
938 |
| - } else { |
939 |
| - val, err = pl.Value(arg.q.ReadTs) |
940 |
| - } |
| 942 | + vals := make([]types.Val, 1) |
| 943 | + switch { |
| 944 | + case lang != "": |
| 945 | + vals[0], err = pl.ValueForTag(arg.q.ReadTs, lang) |
| 946 | + |
| 947 | + case isList: |
| 948 | + vals, err = pl.AllUntaggedValues(arg.q.ReadTs) |
941 | 949 |
|
| 950 | + default: |
| 951 | + vals[0], err = pl.Value(arg.q.ReadTs) |
| 952 | + } |
| 953 | + if err != nil { |
942 | 954 | if err == posting.ErrNoValue {
|
943 | 955 | continue
|
944 |
| - } else if err != nil { |
945 |
| - return err |
946 | 956 | }
|
| 957 | + return err |
| 958 | + } |
947 | 959 |
|
| 960 | + for _, val := range vals { |
948 | 961 | // convert data from binary to appropriate format
|
949 | 962 | strVal, err := types.Convert(val, types.StringID)
|
950 | 963 | if err == nil && matchRegex(strVal, arg.srcFn.regex) {
|
951 | 964 | filtered.Uids = append(filtered.Uids, uid)
|
| 965 | + // NOTE: We only add the uid once. |
| 966 | + break |
952 | 967 | }
|
953 | 968 | }
|
| 969 | + } |
954 | 970 |
|
955 |
| - for i := 0; i < len(arg.out.UidMatrix); i++ { |
956 |
| - algo.IntersectWith(arg.out.UidMatrix[i], filtered, arg.out.UidMatrix[i]) |
957 |
| - } |
958 |
| - } else { |
959 |
| - return err |
| 971 | + for i := 0; i < len(arg.out.UidMatrix); i++ { |
| 972 | + algo.IntersectWith(arg.out.UidMatrix[i], filtered, arg.out.UidMatrix[i]) |
960 | 973 | }
|
| 974 | + |
961 | 975 | return nil
|
962 | 976 | }
|
963 | 977 |
|
|
0 commit comments