From 6a3d129cfec0bd01037b77bef584d10addc0b1a7 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 11:15:16 +0800 Subject: [PATCH 1/7] support vectorized operation for null bitmaps --- util/chunk/column.go | 10 ++++++++++ util/chunk/column_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/util/chunk/column.go b/util/chunk/column.go index 476e4bb332593..f9089430b5ab9 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -628,3 +628,13 @@ func (c *Column) CopyReconstruct(sel []int, dst *Column) *Column { } return dst } + +// OrNulls does the OR operation with all columns in the arguments. +// The user should ensure that all these columns have the same length. +func (c *Column) OrNulls(cols ...*Column) { + for _, col := range cols { + for i := range c.nullBitmap { + c.nullBitmap[i] |= col.nullBitmap[i] + } + } +} diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 75795555a5a62..f6c30074104f3 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -885,3 +885,35 @@ func BenchmarkTimeVec(b *testing.B) { } } } + +func genNullCols(n int) []*Column { + cols := make([]*Column, n) + for i := range cols { + cols[i] = NewColumn(types.NewFieldType(mysql.TypeLonglong), 1024) + cols[i].ResizeInt64(1024, false) + for j := 0; j < 1024; j++ { + if rand.Intn(10) < 5 { + cols[i].SetNull(j, true) + } + } + } + return cols +} + +func BenchmarkOrNullsVectorized(b *testing.B) { + cols := genNullCols(3) + b.ResetTimer() + for i := 0; i < b.N; i++ { + cols[0].OrNulls(cols[1:]...) + } +} + +func BenchmarkOrNullsNonVectorized(b *testing.B) { + cols := genNullCols(3) + b.ResetTimer() + for i := 0; i < b.N; i++ { + for i := 0; i < 1024; i++ { + cols[0].SetNull(i, cols[1].IsNull(i) || cols[2].IsNull(i)) + } + } +} From 94757d8d3d5c74e2dea033b70d471429a072f84b Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 11:18:53 +0800 Subject: [PATCH 2/7] add more comments --- util/chunk/column.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index f9089430b5ab9..211109c325106 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -630,7 +630,8 @@ func (c *Column) CopyReconstruct(sel []int, dst *Column) *Column { } // OrNulls does the OR operation with all columns in the arguments. -// The user should ensure that all these columns have the same length. +// The user should ensure that all these columns have the same length, and +// data stored in these columns are fixed-length type. func (c *Column) OrNulls(cols ...*Column) { for _, col := range cols { for i := range c.nullBitmap { From 11f81ae1ec669447840cff83db5eea5aaf4e5e30 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 14:09:40 +0800 Subject: [PATCH 3/7] add UT --- util/chunk/column.go | 9 ++++++--- util/chunk/column_test.go | 20 +++++++++++++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index 211109c325106..381a6e02b0ddf 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -629,13 +629,16 @@ func (c *Column) CopyReconstruct(sel []int, dst *Column) *Column { return dst } -// OrNulls does the OR operation with all columns in the arguments. +// MergeNulls merges these columns' null bitmaps. +// For a row, if any column of it is null, the result is null. +// It works like: if col1.IsNull || col2.IsNull || col3.IsNull. // The user should ensure that all these columns have the same length, and // data stored in these columns are fixed-length type. -func (c *Column) OrNulls(cols ...*Column) { +func (c *Column) MergeNulls(cols ...*Column) { for _, col := range cols { for i := range c.nullBitmap { - c.nullBitmap[i] |= col.nullBitmap[i] + // 1 is null while 0 is not null, so do AND operations here. + c.nullBitmap[i] &= col.nullBitmap[i] } } } diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index f6c30074104f3..dd433a7884ada 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -900,11 +900,29 @@ func genNullCols(n int) []*Column { return cols } +func (s *testChunkSuite) TestVectorizedNulls(c *check.C) { + for i := 0; i < 256; i++ { + cols := genNullCols(4) + lCol, rCol := cols[0], cols[1] + vecResult, rowResult := cols[2], cols[3] + vecResult.SetNulls(0, 1024, false) + rowResult.SetNulls(0, 1024, false) + vecResult.MergeNulls(lCol, rCol) + for i := 0; i < 1024; i++ { + rowResult.SetNull(i, lCol.IsNull(i) || rCol.IsNull(i)) + } + + for i := 0; i < 1024; i ++ { + c.Assert(rowResult.IsNull(i), check.Equals, vecResult.IsNull(i)) + } + } +} + func BenchmarkOrNullsVectorized(b *testing.B) { cols := genNullCols(3) b.ResetTimer() for i := 0; i < b.N; i++ { - cols[0].OrNulls(cols[1:]...) + cols[0].MergeNulls(cols[1:]...) } } From a5aead4982775b58f1de2827ea5076e3f142103e Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 14:36:56 +0800 Subject: [PATCH 4/7] refmt --- util/chunk/column_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index dd433a7884ada..007eb511a24b7 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -912,7 +912,7 @@ func (s *testChunkSuite) TestVectorizedNulls(c *check.C) { rowResult.SetNull(i, lCol.IsNull(i) || rCol.IsNull(i)) } - for i := 0; i < 1024; i ++ { + for i := 0; i < 1024; i++ { c.Assert(rowResult.IsNull(i), check.Equals, vecResult.IsNull(i)) } } From 8b62dafed7655321d1401ae10c56c349b5aae17b Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 15:32:07 +0800 Subject: [PATCH 5/7] address comments --- util/chunk/column.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index 381a6e02b0ddf..a1bb3e7bfabd7 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -637,7 +637,7 @@ func (c *Column) CopyReconstruct(sel []int, dst *Column) *Column { func (c *Column) MergeNulls(cols ...*Column) { for _, col := range cols { for i := range c.nullBitmap { - // 1 is null while 0 is not null, so do AND operations here. + // bit 0 is null, 1 is not null, so do AND operations here. c.nullBitmap[i] &= col.nullBitmap[i] } } From 522ce7f6965aa10f6531ac3077fc3c733f141f77 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 15:36:37 +0800 Subject: [PATCH 6/7] Update util/chunk/column_test.go Co-Authored-By: Feng Liyuan --- util/chunk/column_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 007eb511a24b7..190501ebec847 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -926,7 +926,7 @@ func BenchmarkOrNullsVectorized(b *testing.B) { } } -func BenchmarkOrNullsNonVectorized(b *testing.B) { +func BenchmarkMergeNullsNonVectorized(b *testing.B) { cols := genNullCols(3) b.ResetTimer() for i := 0; i < b.N; i++ { From 7b3604243de3cfc91e23eb2bd30c18033e8736fe Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Thu, 5 Sep 2019 15:36:46 +0800 Subject: [PATCH 7/7] Update util/chunk/column_test.go Co-Authored-By: Feng Liyuan --- util/chunk/column_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 190501ebec847..867b5fe4e26a7 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -918,7 +918,7 @@ func (s *testChunkSuite) TestVectorizedNulls(c *check.C) { } } -func BenchmarkOrNullsVectorized(b *testing.B) { +func BenchmarkMergeNullsVectorized(b *testing.B) { cols := genNullCols(3) b.ResetTimer() for i := 0; i < b.N; i++ {