Address PR review comments

hypermodeinc · Jun 18, 2019 · 3100387 · 3100387
1 parent b1ea9d4
commit 3100387
Show file tree

Hide file tree

Showing 6 changed files with 273 additions and 326 deletions.
diff --git a/algo/algo_test.go b/algo/algo_test.go
diff --git a/codec/codec.go b/codec/codec.go
@@ -48,16 +48,15 @@ func (e *Encoder) packBlock() {
 	if len(e.uids) == 0 {
 		return
 	}
-	block := &pb.UidBlock{Base: e.uids[0], UidNum: uint32(len(e.uids))}
+	block := &pb.UidBlock{Base: e.uids[0], NumUids: uint32(len(e.uids))}
 	last := e.uids[0]
 	e.uids = e.uids[1:]
 
 	var out bytes.Buffer
 	buf := make([]byte, 17)
 	tmpUids := make([]uint32, 4)
 	for {
-		i := 0
-		for ; i < 4; i++ {
+		for i := 0; i < 4; i++ {
 			if i >= len(e.uids) {
 				// Padding with '0' because Encode4 encodes only in batch of 4.
 				tmpUids[i] = 0
@@ -126,28 +125,29 @@ func (d *Decoder) unpackBlock() []uint64 {
 	last := block.Base
 	d.uids = append(d.uids, last)
 
-	var tmpUids [4]uint32
-	deltas := block.Deltas
-
-	// Decoding always expects the encoded byte array to be of
-	// length >= 4. Padding doesn't affect the decoded values.
-	deltas = append(deltas, 0, 0, 0)
-
-	// Read back the encoded varints.
-	// Due to padding of 3 '0's, it might be the case that we don't
-	// completely consume the byte array. We are encoding uids in group
-	// of 4, that requires atleast 5 bytes. So if we are left with deltas
-	// of length < 5, those are probably the padded 0s.
-	for len(deltas) >= 5 {
-		groupvarint.Decode4(tmpUids[:], deltas)
-		deltas = deltas[groupvarint.BytesUsed[deltas[0]]:]
+	tmpUids := make([]uint32, 4)
+	var sum uint64
+	encData := block.Deltas
+
+	for uint32(len(d.uids)) < block.NumUids {
+
+		if len(encData) < 17 {
+			// Decode4 decodes 4 uids from encData. It moves slice(encData) forward while
+			// decoding and expects it to be of length >= 4 at all the stages. Padding
+			// with zero to make sure lenght is always >= 4.
+			encData = append(encData, 0, 0, 0)
+		}
+
+		groupvarint.Decode4(tmpUids[:], encData)
+		encData = encData[groupvarint.BytesUsed[encData[0]]:]
 		for i := 0; i < 4; i++ {
-			d.uids = append(d.uids, uint64(tmpUids[i])+last)
-			last += uint64(tmpUids[i])
+			sum = last + uint64(tmpUids[i])
+			d.uids = append(d.uids, sum)
+			last = sum
 		}
 	}
 
-	d.uids = d.uids[:block.UidNum]
+	d.uids = d.uids[:block.NumUids]
 	return d.uids
 }
 
@@ -297,7 +297,7 @@ func ExactLen(pack *pb.UidPack) int {
 	}
 	num := 0
 	for _, b := range pack.Blocks {
-		num += int(b.UidNum) // UidNum includes the base UID.
+		num += int(b.NumUids) // NumUids includes the base UID.
 	}
 	return num
 }

diff --git a/codec/codec_test.go b/codec/codec_test.go
@@ -20,7 +20,6 @@ import (
 	"bytes"
 	"compress/gzip"
 	"encoding/binary"
-	"fmt"
 	"math"
 	"math/rand"
 	"sort"
@@ -109,21 +108,10 @@ func TestSeek(t *testing.T) {
 	}
 
 	dec.blockIdx = 0
-	var found bool
-	for i := 200; i < 10000; i += 100 {
-		found = false
+	for i := 100; i < 10000; i += 100 {
 		uids := dec.LinearSeek(uint64(i))
 
-		for _, uid := range uids {
-			if uid == uint64(i) {
-				found = true
-				break
-			}
-		}
-
-		if !found {
-			t.Errorf("Couldn't find %d using LinearSeek\n", i)
-		}
+		require.Contains(t, uids, uint64(i))
 	}
 }
 
@@ -267,16 +255,6 @@ func benchmarkUidPackDecode(b *testing.B, blockSize int) {
 	}
 }
 
-type UInts []uint64
-
-func (u UInts) Len() int { return len(u) }
-
-func (u UInts) Less(i, j int) bool { return u[i] < u[j] }
-
-func (u UInts) Swap(i, j int) {
-	u[i], u[j] = u[j], u[i]
-}
-
 func TestEncoding(t *testing.T) {
 	bigInts := make([]uint64, 5)
 	bigInts[0] = 0xf000000000000000
@@ -286,28 +264,24 @@ func TestEncoding(t *testing.T) {
 	bigInts[4] = 0x0f0f0f0f00000000
 
 	rand.Seed(time.Now().UnixNano())
-	var testNums = []int{0, 1, 2, 3, 5, 13, 18, 100, 99, 98}
+	var lengths = []int{0, 1, 2, 3, 5, 13, 18, 100, 99, 98}
 
-	for tc := 0; tc < len(testNums); tc++ {
-		ints := make([]uint64, testNums[tc])
-		for i := 0; i < 50 && i < testNums[tc]; i++ {
+	for tc := 0; tc < len(lengths); tc++ {
+		ints := make([]uint64, lengths[tc])
+
+		for i := 0; i < 50 && i < lengths[tc]; i++ {
 			ints[i] = uint64(rand.Uint32())
 		}
 
-		for i := 50; i < testNums[tc]; i++ {
+		for i := 50; i < lengths[tc]; i++ {
 			ints[i] = uint64(rand.Uint32()) + bigInts[rand.Intn(5)]
 		}
 
-		sort.Sort(UInts(ints))
+		sort.Slice(ints, func(i, j int) bool { return ints[i] < ints[j] })
 
 		encodedInts := Encode(ints, 256)
 		decodedInts := Decode(encodedInts, 0)
 
-		for i := 0; i < testNums[tc]; i++ {
-			if ints[i] != decodedInts[i] {
-				fmt.Println(ints[i], decodedInts[i])
-				t.Errorf("Expected: %d Actual: %d", ints[i], decodedInts[i])
-			}
-		}
+		require.Equal(t, ints, decodedInts)
 	}
 }
diff --git a/codec/decoderversion.go b/codec/decoderversion.go
@@ -3,8 +3,10 @@
 
 package codec
 
-import "fmt"
+import (
+	"github.com/golang/glog"
+)
 
 func init() {
-	fmt.Println("[Decoder]: Using assembly version of decoder")
+	glog.Infof("[Decoder]: Using assembly version of decoder")
 }
diff --git a/protos/pb.proto b/protos/pb.proto
@@ -278,11 +278,11 @@ message UidBlock {
 	// because when the PB is brought to memory, Go would always use 8-bytes per integer. Instead,
 	// storing it as a byte slice is a lot cheaper in memory.
 	bytes deltas = 2;
-	// uidNum is the number of UIDs in the block. We are including this because we want to 
+	// num_uids is the number of UIDs in the block. We are including this because we want to 
 	// swtich encoding to groupvarint encoding. Current avaialble open source version implements
 	// encoding and decoding for uint32. We want to wrap it around our logic to use it here.
 	// Default Blocksize is 256 so uint32 would be sufficient.
-	uint32 uidNum = 3;
+	uint32 num_uids = 3;
 }
 
 message UidPack {