Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

opt(query): Use sroar in pb.List #7864

Merged
merged 15 commits into from
Jun 16, 2021
29 changes: 22 additions & 7 deletions algo/uidlist.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,42 @@ package algo
import (
"sort"

"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/sroar"
)

const jump = 32 // Jump size in InsersectWithJump.

// ApplyFilter applies a filter to our UIDList.
// TODO: ApplyFilter in this way should only happen for sorted uids. For normal
// filter, it should use Bitmap FastAnd or And.
func ApplyFilter(u *pb.List, f func(uint64, int) bool) {
out := u.Uids[:0]
for i, uid := range u.Uids {
if f(uid, i) {
out = append(out, uid)
uids := codec.GetUids(u)
var out []uint64
for i, x := range uids {
if f(x, i) {
out = append(out, x)
}
}
u.Uids = out

if len(u.SortedUids) > 0 {
u.SortedUids = out
} else {
b := sroar.NewBitmap()
b.SetMany(out)
u.Bitmap = codec.ToBytes(b)
}
}

// IndexOf performs a binary search on the uids slice and returns the index at
// which it finds the uid, else returns -1
func IndexOf(u *pb.List, uid uint64) int {
i := sort.Search(len(u.Uids), func(i int) bool { return u.Uids[i] >= uid })
if i < len(u.Uids) && u.Uids[i] == uid {
bm := codec.FromList(u)
// TODO(Ahsan): We might want bm.Rank()
uids := bm.ToArray()
i := sort.Search(len(uids), func(i int) bool { return uids[i] >= uid })
if i < len(uids) && uids[i] == uid {
return i
}
return -1
Expand Down
5 changes: 3 additions & 2 deletions algo/uidlist_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,18 @@ package algo
import (
"testing"

"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/stretchr/testify/require"
)

func newList(data []uint64) *pb.List {
return &pb.List{Uids: data}
return &pb.List{SortedUids: data}
}

func TestApplyFilterUint(t *testing.T) {
l := []uint64{1, 2, 3, 4, 5}
u := newList(l)
ApplyFilter(u, func(a uint64, idx int) bool { return (l[idx] % 2) == 1 })
require.Equal(t, []uint64{1, 3, 5}, u.Uids)
require.Equal(t, []uint64{1, 3, 5}, codec.GetUids(u))
}
58 changes: 49 additions & 9 deletions codec/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,51 @@ func ApproxLen(bitmap []byte) int {

func ToList(rm *sroar.Bitmap) *pb.List {
return &pb.List{
Uids: rm.ToArray(),
// Bitmap: ToBytes(rm),
Bitmap: ToBytes(rm),
}
}

func ToSortedList(rm *sroar.Bitmap) *pb.List {
return &pb.List{
SortedUids: rm.ToArray(),
}
}

func ListCardinality(l *pb.List) uint64 {
if l == nil {
return 0
}
if len(l.SortedUids) > 0 {
return uint64(len(l.SortedUids))
}
b := FromList(l)
return uint64(b.GetCardinality())
}

func OneUid(uid uint64) *pb.List {
bm := sroar.NewBitmap()
bm.Set(uid)
return ToList(bm)
}

func GetUids(l *pb.List) []uint64 {
if l == nil {
return []uint64{}
}
if len(l.SortedUids) > 0 {
return l.SortedUids
}
return FromList(l).ToArray()
}

func BitmapToSorted(l *pb.List) {
if l == nil {
return
}
l.SortedUids = FromList(l).ToArray()
l.Bitmap = nil
}

func And(rm *sroar.Bitmap, l *pb.List) {
rl := FromList(l)
rm.And(rl)
Expand Down Expand Up @@ -94,21 +134,21 @@ func ToBytes(bm *sroar.Bitmap) []byte {
if bm.IsEmpty() {
return nil
}
return bm.ToBuffer()
// TODO: We should not use ToBufferWithCopy always.
return bm.ToBufferWithCopy()
}

func FromList(l *pb.List) *sroar.Bitmap {
iw := sroar.NewBitmap()
if l == nil {
return iw
}

if len(l.BitmapDoNotUse) > 0 {
// Only one of Uids or Bitmap should be defined.
iw = sroar.FromBuffer(l.BitmapDoNotUse)
if len(l.SortedUids) > 0 {
iw.SetMany(l.SortedUids)
}
if len(l.Uids) > 0 {
iw.SetMany(l.Uids)
if len(l.Bitmap) > 0 {
// TODO: We should not use FromBufferWithCopy always.
iw = sroar.FromBufferWithCopy(l.Bitmap)
}
return iw
}
Expand Down
4 changes: 2 additions & 2 deletions edgraph/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1549,8 +1549,8 @@ func processQuery(ctx context.Context, qc *queryContext) (*api.Response, error)
// If the list of UIDs is empty but the map of values is not,
// we need to get the UIDs from the keys in the map.
var uidList []uint64
if v.OrderedUIDs != nil && len(v.OrderedUIDs.Uids) > 0 {
uidList = v.OrderedUIDs.Uids
if v.OrderedUIDs != nil && len(v.OrderedUIDs.SortedUids) > 0 {
uidList = v.OrderedUIDs.SortedUids
} else if !v.UidMap.IsEmpty() {
uidList = v.UidMap.ToArray()
} else {
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.12

// replace github.com/dgraph-io/badger/v3 => /home/mrjn/go/src/github.com/dgraph-io/badger
// replace github.com/dgraph-io/ristretto => /home/mrjn/go/src/github.com/dgraph-io/ristretto
// replace github.com/dgraph-io/roaring => /home/mrjn/go/src/github.com/dgraph-io/roaring
// replace github.com/dgraph-io/sroar => /home/ash/go/src/github.com/dgraph-io/sroar

require (
contrib.go.opencensus.io/exporter/jaeger v0.1.0
Expand All @@ -24,7 +24,7 @@ require (
github.com/dgraph-io/graphql-transport-ws v0.0.0-20210511143556-2cef522f1f15
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3
github.com/dgraph-io/simdjson-go v0.3.0
github.com/dgraph-io/sroar v0.0.0-20210522085927-7150620bb343
github.com/dgraph-io/sroar v0.0.0-20210604145002-865050cb7465
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3 h1:jU/wpYsEL
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3/go.mod h1:fux0lOrBhrVCJd3lcTHsIJhq1T2rokOu6v9Vcb3Q9ug=
github.com/dgraph-io/simdjson-go v0.3.0 h1:h71LO7vR4LHMPUhuoGN8bqGm1VNfGOlAG8BI6iDUKw0=
github.com/dgraph-io/simdjson-go v0.3.0/go.mod h1:Otpysdjaxj9OGaJusn4pgQV7OFh2bELuHANq0I78uvY=
github.com/dgraph-io/sroar v0.0.0-20210522085927-7150620bb343 h1:sm/HpWavHElAxuO/fqs+uNxqu/1arpmQodwMwc6joFQ=
github.com/dgraph-io/sroar v0.0.0-20210522085927-7150620bb343/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE=
github.com/dgraph-io/sroar v0.0.0-20210604145002-865050cb7465 h1:wj9EEeLJyJnopcmtJlw5TzOcEgudIYLWGDE3cBpYUBQ=
github.com/dgraph-io/sroar v0.0.0-20210604145002-865050cb7465/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1 h1:CaO/zOnF8VvUfEbhRatPcwKVWamvbYd8tQGRWacE9kU=
Expand Down
33 changes: 18 additions & 15 deletions posting/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -1274,27 +1274,30 @@ func (l *List) Uids(opt ListOptions) (*pb.List, error) {
// Before this, we were only picking math.Int32 number of uids.
// Now we're picking everything.
if opt.First == 0 {
out.Uids = bm.ToArray()
out.Bitmap = codec.ToBytes(bm)
// TODO: Not yet ready to use Bitmap for data transfer. We'd have to deal with all the
// places where List.Uids is being called.
// out.Bitmap = codec.ToBytes(bm)
return out, nil
}

var itr *sroar.Iterator
if opt.First > 0 {
itr = bm.NewIterator()
} else {
itr = bm.NewReverseIterator()
}
num := abs(opt.First)
for len(out.Uids) < num && itr.HasNext() {
out.Uids = append(out.Uids, itr.Next())
num := uint64(abs(opt.First))
sz := uint64(bm.GetCardinality())
if num < sz {
if opt.First > 0 {
x, err := bm.Select(num)
if err != nil {
return nil, errors.Wrap(err, "While selecting Uids")
}
codec.RemoveRange(bm, x, math.MaxUint64)
} else {
x, err := bm.Select(sz - num)
if err != nil {
return nil, errors.Wrap(err, "While selecting Uids")
}
codec.RemoveRange(bm, 0, x)
}
}
return out, nil

// errors.Wrapf(err, "cannot retrieve UIDs from list with key %s",
// hex.EncodeToString(l.key))
return codec.ToList(bm), nil
}

// Postings calls postFn with the postings that are common with
Expand Down
39 changes: 23 additions & 16 deletions posting/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,8 +502,8 @@ func TestAddMutation_mrjn2(t *testing.T) {
opt := ListOptions{ReadTs: uint64(i)}
list, err := ol.Uids(opt)
require.NoError(t, err)
require.EqualValues(t, 1, len(list.Uids))
require.EqualValues(t, uint64(i), list.Uids[0])
require.EqualValues(t, 1, codec.ListCardinality(list))
require.EqualValues(t, uint64(i), codec.GetUids(list)[0])
}
require.EqualValues(t, 0, ol.Length(readTs, 0))
require.NoError(t, ol.commitMutation(1, 0))
Expand Down Expand Up @@ -557,8 +557,9 @@ func TestAddMutation_mrjn2(t *testing.T) {
opts := ListOptions{ReadTs: 15}
list, err := ol.Uids(opts)
require.NoError(t, err)
require.EqualValues(t, 7, list.Uids[0])
require.EqualValues(t, 9, list.Uids[1])
uids := codec.GetUids(list)
require.EqualValues(t, 7, uids[0])
require.EqualValues(t, 9, uids[1])
}
}

Expand Down Expand Up @@ -1077,8 +1078,9 @@ func TestMultiPartListBasic(t *testing.T) {
opt := ListOptions{ReadTs: uint64(size) + 1}
l, err := ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, commits, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids := codec.GetUids(l)
require.Equal(t, commits, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(i+1), uid)
}
}
Expand Down Expand Up @@ -1289,10 +1291,12 @@ func TestMultiPartListWriteToDisk(t *testing.T) {
require.NoError(t, err)
newUids, err := newList.Uids(opt)
require.NoError(t, err)
require.Equal(t, commits, len(originalUids.Uids))
require.Equal(t, len(originalUids.Uids), len(newUids.Uids))
for i := range originalUids.Uids {
require.Equal(t, originalUids.Uids[i], newUids.Uids[i])
origUids := codec.GetUids(originalUids)
newIds := codec.GetUids(newUids)
require.Equal(t, commits, len(origUids))
require.Equal(t, len(origUids), len(newIds))
for i := range origUids {
require.Equal(t, origUids[i], newIds[i])
}
}

Expand Down Expand Up @@ -1355,8 +1359,9 @@ func TestMultiPartListDeleteAndAdd(t *testing.T) {
opt := ListOptions{ReadTs: math.MaxUint64}
l, err := ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, size, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids := codec.GetUids(l)
require.Equal(t, size, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(i+1), uid)
}

Expand Down Expand Up @@ -1391,8 +1396,9 @@ func TestMultiPartListDeleteAndAdd(t *testing.T) {
opt = ListOptions{ReadTs: math.MaxUint64}
l, err = ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, size/2, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids = codec.GetUids(l)
require.Equal(t, size/2, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(size/2)+uint64(i+1), uid)
}

Expand Down Expand Up @@ -1426,8 +1432,9 @@ func TestMultiPartListDeleteAndAdd(t *testing.T) {
opt = ListOptions{ReadTs: math.MaxUint64}
l, err = ol.Uids(opt)
require.NoError(t, err)
require.Equal(t, size, len(l.Uids), "List of Uids received: %+v", l.Uids)
for i, uid := range l.Uids {
uids = codec.GetUids(l)
require.Equal(t, size, len(uids), "List of Uids received: %+v", uids)
for i, uid := range uids {
require.Equal(t, uint64(i+1), uid)
}
}
Expand Down
7 changes: 4 additions & 3 deletions posting/mvcc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"math"
"testing"

"github.com/dgraph-io/dgraph/codec"
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/dgraph/x"
"github.com/stretchr/testify/require"
Expand All @@ -38,7 +39,7 @@ func TestRollupTimestamp(t *testing.T) {

uidList, err := l.Uids(ListOptions{ReadTs: 7})
require.NoError(t, err)
require.Equal(t, 3, len(uidList.Uids))
require.Equal(t, uint64(3), codec.ListCardinality(uidList))

edge := &pb.DirectedEdge{
Entity: 1,
Expand All @@ -53,7 +54,7 @@ func TestRollupTimestamp(t *testing.T) {

uidList, err = nl.Uids(ListOptions{ReadTs: 11})
require.NoError(t, err)
require.Equal(t, 0, len(uidList.Uids))
require.Equal(t, uint64(0), codec.ListCardinality(uidList))

// Now check that we don't lost the highest version during a rollup operation, despite the STAR
// delete marker being the most recent update.
Expand All @@ -71,7 +72,7 @@ func TestPostingListRead(t *testing.T) {
require.NoError(t, err)
uidList, err := nl.Uids(ListOptions{ReadTs: uint64(readTs)})
require.NoError(t, err)
require.Equal(t, sz, len(uidList.Uids))
require.Equal(t, uint64(sz), codec.ListCardinality(uidList))
}

addEdgeToUID(t, attr, 1, 2, 1, 2)
Expand Down
5 changes: 3 additions & 2 deletions protos/pb.proto
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ import "github.com/gogo/protobuf/gogoproto/gogo.proto";
/* option (gogoproto.goproto_getters_all) = true; */

message List {
repeated fixed64 uids = 1;
bytes bitmap_do_not_use = 2;
/* repeated fixed64 uids = 1; *1/ */
bytes bitmap = 2;
repeated fixed64 sortedUids = 3;
}

message TaskValue {
Expand Down
Loading