Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(release/v20.07) refactor: Simplify how list splits are tracked. #6070

Merged
merged 2 commits into from
Aug 6, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 15 additions & 25 deletions posting/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,13 @@ func shouldSplit(plist *pb.PostingList) bool {
return plist.Size() >= maxListSize && len(plist.Pack.Blocks) > 1
}

func (out *rollupOutput) updateSplits() {
if out.plist == nil {
out.plist = &pb.PostingList{}
}
out.plist.Splits = out.splits()
}

func (out *rollupOutput) recursiveSplit() {
// Call splitUpList. Otherwise the map of startUids to parts won't be initialized.
out.splitUpList()
Expand All @@ -1367,7 +1374,7 @@ func (out *rollupOutput) splitUpList() {
var lists []*pb.PostingList

// If list is not split yet, insert the main list.
if len(out.plist.Splits) == 0 {
if len(out.parts) == 0 {
lists = append(lists, out.plist)
}

Expand All @@ -1377,13 +1384,10 @@ func (out *rollupOutput) splitUpList() {
lists = append(lists, part)
}

// List of startUids for each list part after the splitting process is complete.
var newSplits []uint64

for i, list := range lists {
startUid := uint64(1)
// If the list is split, select the right startUid for this list.
if len(out.plist.Splits) > 0 {
if len(out.parts) > 0 {
startUid = out.plist.Splits[i]
}

Expand All @@ -1393,23 +1397,11 @@ func (out *rollupOutput) splitUpList() {
startUids, pls := binSplit(startUid, list)
for i, startUid := range startUids {
out.parts[startUid] = pls[i]
newSplits = append(newSplits, startUid)
}
} else {
// No need to split the list. Add the startUid to the array of new splits.
newSplits = append(newSplits, startUid)
}
}

// No new lists were created so there's no need to update the list of splits.
if len(newSplits) == len(lists) {
return
}

// The splits changed so update them.
out.plist = &pb.PostingList{
Splits: newSplits,
}
out.updateSplits()
}

// binSplit takes the given plist and returns two new plists, each with
Expand Down Expand Up @@ -1447,28 +1439,26 @@ func binSplit(lowUid uint64, plist *pb.PostingList) ([]uint64, []*pb.PostingList

// removeEmptySplits updates the split list by removing empty posting lists' startUids.
func (out *rollupOutput) removeEmptySplits() {
var splits []uint64
for startUid, plist := range out.parts {
// Do not remove the first split for now, as every multi-part list should always
// have a split starting with UID 1.
if startUid == 1 {
splits = append(splits, startUid)
continue
}

if !isPlistEmpty(plist) {
splits = append(splits, startUid)
if isPlistEmpty(plist) {
delete(out.parts, startUid)
}
}
out.plist.Splits = splits
sortSplits(splits)
out.updateSplits()

if len(out.plist.Splits) == 1 {
if len(out.parts) == 1 && isPlistEmpty(out.parts[1]) {
// Only the first split remains. If it's also empty, remove it as well.
// This should mark the entire list for deletion. Please note that the
// startUid of the first part is always one because a node can never have
// its uid set to zero.
if isPlistEmpty(out.parts[1]) {
delete(out.parts, 1)
out.plist.Splits = []uint64{}
}
}
Expand Down
12 changes: 12 additions & 0 deletions posting/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,16 @@ func TestAfterUIDCountWithCommit(t *testing.T) {
require.EqualValues(t, 0, ol.Length(txn.StartTs, 300))
}

func verifySplits(t *testing.T, splits []uint64) {
require.Equal(t, uint64(1), splits[0])
for i, uid := range splits {
if i == 0 {
continue
}
require.Greater(t, uid, splits[i-1])
}
}

func createMultiPartList(t *testing.T, size int, addLabel bool) (*List, int) {
// For testing, set the max list size to a lower threshold.
maxListSize = 5000
Expand Down Expand Up @@ -936,6 +946,7 @@ func createMultiPartList(t *testing.T, size int, addLabel bool) (*List, int) {
ol, err = getNew(key, ps, math.MaxUint64)
require.NoError(t, err)
require.True(t, len(ol.plist.Splits) > 0)
verifySplits(t, ol.plist.Splits)

return ol, commits
}
Expand Down Expand Up @@ -969,6 +980,7 @@ func createAndDeleteMultiPartList(t *testing.T, size int) (*List, int) {
commits++
}
require.True(t, len(ol.plist.Splits) > 0)
verifySplits(t, ol.plist.Splits)

// Delete all the previously inserted entries from the list.
baseStartTs := uint64(size) + 1
Expand Down