From ded1c4dd2cff15aefe94113af06384c3d1eb3f3e Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 13:46:14 +0800 Subject: [PATCH 01/17] add vectorized API for column --- util/chunk/chunk.go | 12 ++-- util/chunk/column.go | 136 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 6 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 66e88d94fddea..2611927c4f384 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -66,12 +66,7 @@ func New(fields []*types.FieldType, cap, maxChunkSize int) *Chunk { } for _, f := range fields { - elemLen := getFixedLen(f) - if elemLen == varElemLen { - chk.columns = append(chk.columns, newVarLenColumn(chk.capacity, nil)) - } else { - chk.columns = append(chk.columns, newFixedLenColumn(elemLen, chk.capacity)) - } + chk.columns = append(chk.columns, NewColumn(f, chk.capacity)) } return chk @@ -555,6 +550,11 @@ func (c *Chunk) AppendDatum(colIdx int, d *types.Datum) { } } +// Column returns the specific column. +func (c *Chunk) Column(colID int) *Column { + return c.columns[colID] +} + func writeTime(buf []byte, t types.Time) { binary.BigEndian.PutUint16(buf, uint16(t.Time.Year())) buf[2] = uint8(t.Time.Month()) diff --git a/util/chunk/column.go b/util/chunk/column.go index d1ff51f803200..35fb743b6ea85 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -14,10 +14,12 @@ package chunk import ( + "reflect" "unsafe" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" + "github.com/pingcap/tidb/util/hack" ) // AppendDuration appends a duration value into this Column. @@ -57,6 +59,16 @@ type Column struct { elemBuf []byte } +// NewColumn creates a new column with the specific length and capacity. +func NewColumn(ft *types.FieldType, cap int) *Column { + typeSize := getFixedLen(ft) + if typeSize == varElemLen { + return newFixedLenColumn(typeSize, cap) + } else { + return newVarLenColumn(cap, nil) + } +} + func (c *Column) isFixed() bool { return c.elemBuf != nil } @@ -190,3 +202,127 @@ func (c *Column) AppendTime(t types.Time) { writeTime(c.elemBuf, t) c.finishAppendFixed() } + +const ( + sizeInt64 = int(unsafe.Sizeof(int64(0))) + sizeUint64 = int(unsafe.Sizeof(uint64(0))) + sizeFloat32 = int(unsafe.Sizeof(float32(0))) + sizeFloat64 = int(unsafe.Sizeof(float64(0))) + sizeTime = int(unsafe.Sizeof(types.Time{})) + sizeDuration = int(unsafe.Sizeof(types.Duration{})) + sizeMyDecimal = int(unsafe.Sizeof(types.MyDecimal{})) +) + +// Int64s returns a int64 slice stored in this Column. +func (c *Column) Int64s() []int64 { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []int64 + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeInt64 + return res +} + +// Uint64s returns a uint64 slice stored in this Column. +func (c *Column) Uint64s() []uint64 { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []uint64 + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeUint64 + return res +} + +// Float32s returns a float32 slice stored in this Column. +func (c *Column) Float32s() []float32 { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []float32 + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeFloat32 + return res +} + +// Float64s returns a float64 slice stored in this Column. +func (c *Column) Float64s() []float64 { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []float64 + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeFloat64 + return res +} + +// Times returns a Time slice stored in this Column. +func (c *Column) Times() []types.Time { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []types.Time + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeTime + return res +} + +// Durations returns a Duration slice stored in this Column. +func (c *Column) Durations() []types.Duration { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []types.Duration + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeDuration + return res +} + +// MyDecimals returns a MyDecimal slice stored in this Column. +func (c *Column) MyDecimals() []types.MyDecimal { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + var res []types.MyDecimal + s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) + s.Data = h.Data + s.Len = c.length + s.Cap = h.Cap / sizeMyDecimal + return res +} + +// GetString returns the string in the specific row. +func (c *Column) GetString(rowID int) string { + return string(hack.String(c.data[c.offsets[rowID]:c.offsets[rowID+1]])) +} + +// GetString returns the JSON in the specific row. +func (c *Column) GetJSON(rowID int) json.BinaryJSON { + start := c.offsets[rowID] + return json.BinaryJSON{TypeCode: c.data[start], Value: c.data[start+1 : c.offsets[rowID+1]]} +} + +// GetBytes returns the byte slice in the specific row. +func (c *Column) GetBytes(rowID int) []byte { + return c.data[c.offsets[rowID]:c.offsets[rowID+1]] +} + +// GetEnum returns the Enum in the specific row. +func (c *Column) GetEnum(rowID int) types.Enum { + name, val := c.getNameValue(rowID) + return types.Enum{Name: name, Value: val} +} + +// GetSet returns the Set in the specific row. +func (c *Column) GetSet(rowID int) types.Set { + name, val := c.getNameValue(rowID) + return types.Set{Name: name, Value: val} +} + +// GetString returns the byte slice in the specific row. +func (c *Column) getNameValue(rowID int) (string, uint64) { + start, end := c.offsets[rowID], c.offsets[rowID+1] + if start == end { + return "", 0 + } + return string(hack.String(c.data[start+8 : end])), *(*uint64)(unsafe.Pointer(&c.data[start])) +} From f4933a3eff39618c09d7256be5fab66de07c2cfc Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 14:25:11 +0800 Subject: [PATCH 02/17] add UT --- util/chunk/chunk.go | 2 +- util/chunk/column.go | 40 +++++++++++++++------------------------ util/chunk/column_test.go | 34 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 26 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 2611927c4f384..2b575259f46cb 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -471,7 +471,7 @@ func (c *Chunk) AppendUint64(colIdx int, u uint64) { // AppendFloat32 appends a float32 value to the chunk. func (c *Chunk) AppendFloat32(colIdx int, f float32) { - c.columns[colIdx].appendFloat32(f) + c.columns[colIdx].AppendFloat32(f) } // AppendFloat64 appends a float64 value to the chunk. diff --git a/util/chunk/column.go b/util/chunk/column.go index 35fb743b6ea85..d020313099362 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -15,6 +15,7 @@ package chunk import ( "reflect" + "time" "unsafe" "github.com/pingcap/tidb/types" @@ -167,8 +168,8 @@ func (c *Column) AppendUint64(u uint64) { c.finishAppendFixed() } -// appendFloat32 appends a float32 value into this Column. -func (c *Column) appendFloat32(f float32) { +// AppendFloat32 appends a float32 value into this Column. +func (c *Column) AppendFloat32(f float32) { *(*float32)(unsafe.Pointer(&c.elemBuf[0])) = f c.finishAppendFixed() } @@ -213,7 +214,7 @@ const ( sizeMyDecimal = int(unsafe.Sizeof(types.MyDecimal{})) ) -// Int64s returns a int64 slice stored in this Column. +// Int64s returns an int64 slice stored in this Column. func (c *Column) Int64s() []int64 { h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) var res []int64 @@ -257,28 +258,6 @@ func (c *Column) Float64s() []float64 { return res } -// Times returns a Time slice stored in this Column. -func (c *Column) Times() []types.Time { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) - var res []types.Time - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeTime - return res -} - -// Durations returns a Duration slice stored in this Column. -func (c *Column) Durations() []types.Duration { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) - var res []types.Duration - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeDuration - return res -} - // MyDecimals returns a MyDecimal slice stored in this Column. func (c *Column) MyDecimals() []types.MyDecimal { h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) @@ -318,6 +297,17 @@ func (c *Column) GetSet(rowID int) types.Set { return types.Set{Name: name, Value: val} } +// GetTime returns the Time in the specific row. +func (c *Column) GetTime(rowID int) types.Time { + return readTime(c.data[rowID*16:]) +} + +// GetDuration returns the Duration in the specific row. +func (c *Column) GetDuration(rowID int, fillFsp int) types.Duration { + dur := *(*int64)(unsafe.Pointer(&c.data[rowID*8])) + return types.Duration{Duration: time.Duration(dur), Fsp: fillFsp} +} + // GetString returns the byte slice in the specific row. func (c *Column) getNameValue(rowID int) (string, uint64) { start, end := c.offsets[rowID], c.offsets[rowID+1] diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index ff577fea30ac4..9ccda4998ef1d 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -14,7 +14,10 @@ package chunk import ( + "fmt" "github.com/pingcap/check" + "github.com/pingcap/parser/mysql" + "github.com/pingcap/tidb/types" ) func equalColumn(c1, c2 *Column) bool { @@ -67,3 +70,34 @@ func (s *testChunkSuite) TestLargeStringColumnOffset(c *check.C) { col.offsets[0] = 6 << 30 c.Check(col.offsets[0], check.Equals, int64(6<<30)) // test no overflow. } + +func (s *testChunkSuite) TestI64Column(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeLonglong)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendInt64(int64(i)) + } + + i64s := col.Int64s() + for i := 0; i < 1024; i++ { + c.Assert(i64s[i], check.Equals, int64(i)) + i64s[i] ++ + } + + it := NewIterator4Chunk(chk) + var i int64 + for row := it.Begin(); row != it.End(); row = it.Next() { + c.Assert(row.GetInt64(0), check.Equals, int64(i+1)) + i++ + } +} + +func (s *testChunkSuite) TestStringColumn(c *check.C) { + col := NewColumn(types.NewFieldType(mysql.TypeVarString), 1024) + for i := 0; i < 1024; i++ { + col.AppendString(fmt.Sprintf("%v", i)) + } + for i := 0; i < 1024; i++ { + c.Assert(col.GetString(i), check.Equals, fmt.Sprintf("%v", i)) + } +} From ade0be884e867d15cc783e04a0e8e4d37d8fc3d8 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 14:31:16 +0800 Subject: [PATCH 03/17] add UT --- util/chunk/column.go | 4 ++-- util/chunk/column_test.go | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index d020313099362..ef5c0810775c5 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -64,9 +64,9 @@ type Column struct { func NewColumn(ft *types.FieldType, cap int) *Column { typeSize := getFixedLen(ft) if typeSize == varElemLen { - return newFixedLenColumn(typeSize, cap) - } else { return newVarLenColumn(cap, nil) + } else { + return newFixedLenColumn(typeSize, cap) } } diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 9ccda4998ef1d..76ec9cbbbbc21 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -92,6 +92,48 @@ func (s *testChunkSuite) TestI64Column(c *check.C) { } } +func (s *testChunkSuite) TestF64Column(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeDouble)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendFloat64(float64(i)) + } + + f64s := col.Float64s() + for i := 0; i < 1024; i++ { + c.Assert(f64s[i], check.Equals, float64(i)) + f64s[i] /= 2 + } + + it := NewIterator4Chunk(chk) + var i int64 + for row := it.Begin(); row != it.End(); row = it.Next() { + c.Assert(row.GetFloat64(0), check.Equals, float64(i)/2) + i++ + } +} + +func (s *testChunkSuite) TestF32Column(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeFloat)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendFloat32(float32(i)) + } + + f32s := col.Float32s() + for i := 0; i < 1024; i++ { + c.Assert(f32s[i], check.Equals, float32(i)) + f32s[i] /= 2 + } + + it := NewIterator4Chunk(chk) + var i int64 + for row := it.Begin(); row != it.End(); row = it.Next() { + c.Assert(row.GetFloat32(0), check.Equals, float32(i)/2) + i++ + } +} + func (s *testChunkSuite) TestStringColumn(c *check.C) { col := NewColumn(types.NewFieldType(mysql.TypeVarString), 1024) for i := 0; i < 1024; i++ { From b6e29ad5810e40f38208b33eabb55bb3ab3c073b Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 14:48:03 +0800 Subject: [PATCH 04/17] add UT --- util/chunk/column_test.go | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 76ec9cbbbbc21..b0abdf18dd330 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -134,6 +134,55 @@ func (s *testChunkSuite) TestF32Column(c *check.C) { } } +func (s *testChunkSuite) TestMyDecimal(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeNewDecimal)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + d := new(types.MyDecimal) + if err := d.FromFloat64(float64(i) * 1.1); err != nil { + c.Fatal(err) + } + col.AppendMyDecimal(d) + } + + ds := col.MyDecimals() + for i := 0; i < 1024; i++ { + d := new(types.MyDecimal) + if err := d.FromFloat64(float64(i) * 1.1); err != nil { + c.Fatal(err) + } + c.Assert(d.Compare(&ds[i]), check.Equals, 0) + + if err := types.DecimalAdd(&ds[i], d, &ds[i]); err != nil { + c.Fatal(err) + } + } + + it := NewIterator4Chunk(chk) + var i int64 + for row := it.Begin(); row != it.End(); row = it.Next() { + d := new(types.MyDecimal) + if err := d.FromFloat64(float64(i) * 1.1 * 2); err != nil { + c.Fatal(err) + } + + delta := new(types.MyDecimal) + if err := types.DecimalSub(d, row.GetMyDecimal(0), delta); err != nil { + c.Fatal(err) + } + + fDelta, err := delta.ToFloat64() + if err != nil { + c.Fatal(err) + } + if fDelta > 0.0001 || fDelta < -0.0001 { + c.Fatal() + } + + i++ + } +} + func (s *testChunkSuite) TestStringColumn(c *check.C) { col := NewColumn(types.NewFieldType(mysql.TypeVarString), 1024) for i := 0; i < 1024; i++ { From a993efd58c563c689e8b26139cbdb5fd9ad73f92 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:00:13 +0800 Subject: [PATCH 05/17] add UT --- util/chunk/column_test.go | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index b0abdf18dd330..df5f91fb149a5 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -184,11 +184,17 @@ func (s *testChunkSuite) TestMyDecimal(c *check.C) { } func (s *testChunkSuite) TestStringColumn(c *check.C) { - col := NewColumn(types.NewFieldType(mysql.TypeVarString), 1024) + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeVarString)}, 1024) + col := chk.Column(0) for i := 0; i < 1024; i++ { - col.AppendString(fmt.Sprintf("%v", i)) + col.AppendString(fmt.Sprintf("%v", i*i)) } - for i := 0; i < 1024; i++ { - c.Assert(col.GetString(i), check.Equals, fmt.Sprintf("%v", i)) + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + c.Assert(row.GetString(0), check.Equals, fmt.Sprintf("%v", i*i)) + c.Assert(col.GetString(i), check.Equals, fmt.Sprintf("%v", i*i)) + i++ } } From dbb140fa48f86a4fae2b3bd728916b037f5f0fdc Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:07:31 +0800 Subject: [PATCH 06/17] add UT --- util/chunk/column.go | 5 +++++ util/chunk/column_test.go | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/util/chunk/column.go b/util/chunk/column.go index ef5c0810775c5..ba362ea9a0967 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -49,6 +49,11 @@ func (c *Column) AppendJSON(j json.BinaryJSON) { c.finishAppendVar() } +// AppendJSON appends a Set value into this Column. +func (c *Column) AppendSet(set types.Set) { + c.appendNameValue(set.Name, set.Value) +} + // Column stores one column of data in Apache Arrow format. // See https://arrow.apache.org/docs/memory_layout.html type Column struct { diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index df5f91fb149a5..0ea6c20265cee 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -198,3 +198,23 @@ func (s *testChunkSuite) TestStringColumn(c *check.C) { i++ } } + +func (s *testChunkSuite) TestSetColumn(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeSet)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendSet(types.Set{Name: fmt.Sprintf("%v", i), Value: uint64(i)}) + } + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + s1 := col.GetSet(i) + s2 := row.GetSet(0) + c.Assert(s1.Name, check.Equals, s2.Name) + c.Assert(s1.Value, check.Equals, s2.Value) + c.Assert(s1.Name, check.Equals, fmt.Sprintf("%v", i)) + c.Assert(s1.Value, check.Equals, uint64(i)) + i++ + } +} From 31906e7cdde92cccaea16eea81765bf3ee543bca Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:19:47 +0800 Subject: [PATCH 07/17] add UT for JSON --- util/chunk/column_test.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 0ea6c20265cee..ac116efed029b 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -18,6 +18,7 @@ import ( "github.com/pingcap/check" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/types/json" ) func equalColumn(c1, c2 *Column) bool { @@ -218,3 +219,24 @@ func (s *testChunkSuite) TestSetColumn(c *check.C) { i++ } } + +func (s *testChunkSuite) TestJSONColumn(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeJSON)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + j := new(json.BinaryJSON) + if err := j.UnmarshalJSON([]byte(fmt.Sprintf(`{"%v":%v}`, i, i))); err != nil { + c.Fatal(err) + } + col.AppendJSON(*j) + } + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + j1 := col.GetJSON(i) + j2 := row.GetJSON(0) + c.Assert(j1.String(), check.Equals, j2.String()) + i++ + } +} From 70f7523ab6f31b967861bd9aba68c85d23ad7555 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:25:27 +0800 Subject: [PATCH 08/17] add UT for Time --- util/chunk/column.go | 2 -- util/chunk/column_test.go | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index ba362ea9a0967..3ff54877d7f52 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -214,8 +214,6 @@ const ( sizeUint64 = int(unsafe.Sizeof(uint64(0))) sizeFloat32 = int(unsafe.Sizeof(float32(0))) sizeFloat64 = int(unsafe.Sizeof(float64(0))) - sizeTime = int(unsafe.Sizeof(types.Time{})) - sizeDuration = int(unsafe.Sizeof(types.Duration{})) sizeMyDecimal = int(unsafe.Sizeof(types.MyDecimal{})) ) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index ac116efed029b..4960a02347dc5 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -19,6 +19,7 @@ import ( "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" + "time" ) func equalColumn(c1, c2 *Column) bool { @@ -240,3 +241,21 @@ func (s *testChunkSuite) TestJSONColumn(c *check.C) { i++ } } + +func (s *testChunkSuite) TestTimeColumn(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeDatetime)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendTime(types.CurrentTime(mysql.TypeDatetime)) + time.Sleep(time.Millisecond / 10) + } + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + j1 := col.GetTime(i) + j2 := row.GetTime(0) + c.Assert(j1.Compare(j2), check.Equals, 0) + i++ + } +} From 6703a161e77f4ac351a73c77a2f0ed6ca891a117 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:27:17 +0800 Subject: [PATCH 09/17] add UT for Duration --- util/chunk/column_test.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 4960a02347dc5..54a5e51d52edc 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -259,3 +259,20 @@ func (s *testChunkSuite) TestTimeColumn(c *check.C) { i++ } } + +func (s *testChunkSuite) TestDurationColumn(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeDuration)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendDuration(types.Duration{Duration: time.Second * time.Duration(i)}) + } + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + j1 := col.GetDuration(i, 0) + j2 := row.GetDuration(0, 0) + c.Assert(j1.Compare(j2), check.Equals, 0) + i++ + } +} From 93795b3760836c83599bf842522ee8addbe6486e Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:30:31 +0800 Subject: [PATCH 10/17] add UT for TestEnumColumn --- util/chunk/column.go | 5 +++++ util/chunk/column_test.go | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/util/chunk/column.go b/util/chunk/column.go index 3ff54877d7f52..a6a6f294a1f13 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -209,6 +209,11 @@ func (c *Column) AppendTime(t types.Time) { c.finishAppendFixed() } +// AppendEnum appends a Enum value into this Column. +func (c *Column) AppendEnum(enum types.Enum) { + c.appendNameValue(enum.Name, enum.Value) +} + const ( sizeInt64 = int(unsafe.Sizeof(int64(0))) sizeUint64 = int(unsafe.Sizeof(uint64(0))) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 54a5e51d52edc..289c6fff3a3c3 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -276,3 +276,23 @@ func (s *testChunkSuite) TestDurationColumn(c *check.C) { i++ } } + +func (s *testChunkSuite) TestEnumColumn(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeEnum)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + col.AppendEnum(types.Enum{Name: fmt.Sprintf("%v", i), Value: uint64(i)}) + } + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + s1 := col.GetEnum(i) + s2 := row.GetEnum(0) + c.Assert(s1.Name, check.Equals, s2.Name) + c.Assert(s1.Value, check.Equals, s2.Value) + c.Assert(s1.Name, check.Equals, fmt.Sprintf("%v", i)) + c.Assert(s1.Value, check.Equals, uint64(i)) + i++ + } +} From 3dc43895bfca724b866f894ceba2c7462d2482ef Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:33:42 +0800 Subject: [PATCH 11/17] add UT for IsNull --- util/chunk/chunk.go | 8 ++++---- util/chunk/chunk_util.go | 6 +++--- util/chunk/column.go | 3 ++- util/chunk/column_test.go | 24 ++++++++++++++++++++++++ util/chunk/mutrow.go | 4 ++-- util/chunk/row.go | 2 +- 6 files changed, 36 insertions(+), 11 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 2b575259f46cb..4217d64f1595a 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -305,7 +305,7 @@ func (c *Chunk) AppendRow(row Row) { func (c *Chunk) AppendPartialRow(colIdx int, row Row) { for i, rowCol := range row.c.columns { chkCol := c.columns[colIdx+i] - chkCol.appendNullBitmap(!rowCol.isNull(row.idx)) + chkCol.appendNullBitmap(!rowCol.IsNull(row.idx)) if rowCol.isFixed() { elemLen := len(rowCol.elemBuf) offset := row.idx * elemLen @@ -333,7 +333,7 @@ func (c *Chunk) PreAlloc(row Row) (rowIdx uint32) { rowIdx = uint32(c.NumRows()) for i, srcCol := range row.c.columns { dstCol := c.columns[i] - dstCol.appendNullBitmap(!srcCol.isNull(row.idx)) + dstCol.appendNullBitmap(!srcCol.IsNull(row.idx)) elemLen := len(srcCol.elemBuf) if !srcCol.isFixed() { elemLen = int(srcCol.offsets[row.idx+1] - srcCol.offsets[row.idx]) @@ -416,7 +416,7 @@ func (c *Chunk) Append(other *Chunk, begin, end int) { } } for i := begin; i < end; i++ { - dst.appendNullBitmap(!src.isNull(i)) + dst.appendNullBitmap(!src.IsNull(i)) dst.length++ } } @@ -434,7 +434,7 @@ func (c *Chunk) TruncateTo(numRows int) { col.offsets = col.offsets[:numRows+1] } for i := numRows; i < col.length; i++ { - if col.isNull(i) { + if col.IsNull(i) { col.nullCount-- } } diff --git a/util/chunk/chunk_util.go b/util/chunk/chunk_util.go index 9bc6dddb73fda..be15dafe44a87 100644 --- a/util/chunk/chunk_util.go +++ b/util/chunk/chunk_util.go @@ -47,7 +47,7 @@ func copySelectedInnerRows(innerColOffset, outerColOffset int, src *Chunk, selec if !selected[i] { continue } - dstCol.appendNullBitmap(!srcCol.isNull(i)) + dstCol.appendNullBitmap(!srcCol.IsNull(i)) dstCol.length++ elemLen := len(srcCol.elemBuf) @@ -59,7 +59,7 @@ func copySelectedInnerRows(innerColOffset, outerColOffset int, src *Chunk, selec if !selected[i] { continue } - dstCol.appendNullBitmap(!srcCol.isNull(i)) + dstCol.appendNullBitmap(!srcCol.IsNull(i)) dstCol.length++ start, end := srcCol.offsets[i], srcCol.offsets[i+1] @@ -86,7 +86,7 @@ func copyOuterRows(innerColOffset, outerColOffset int, src *Chunk, numRows int, } for i, srcCol := range srcCols { dstCol := dst.columns[outerColOffset+i] - dstCol.appendMultiSameNullBitmap(!srcCol.isNull(row.idx), numRows) + dstCol.appendMultiSameNullBitmap(!srcCol.IsNull(row.idx), numRows) dstCol.length += numRows if srcCol.isFixed() { elemLen := len(srcCol.elemBuf) diff --git a/util/chunk/column.go b/util/chunk/column.go index a6a6f294a1f13..9e1ec5a8eaae3 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -91,7 +91,8 @@ func (c *Column) Reset() { c.data = c.data[:0] } -func (c *Column) isNull(rowIdx int) bool { +// IsNull returns if this row is null. +func (c *Column) IsNull(rowIdx int) bool { nullByte := c.nullBitmap[rowIdx/8] return nullByte&(1<<(uint(rowIdx)&7)) == 0 } diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 289c6fff3a3c3..6f667ce03a14e 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -296,3 +296,27 @@ func (s *testChunkSuite) TestEnumColumn(c *check.C) { i++ } } + +func (s *testChunkSuite) TestNullsColumn(c *check.C) { + chk := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeLonglong)}, 1024) + col := chk.Column(0) + for i := 0; i < 1024; i++ { + if i%2 == 0 { + col.AppendNull() + continue + } + col.AppendInt64(int64(i)) + } + + it := NewIterator4Chunk(chk) + var i int + for row := it.Begin(); row != it.End(); row = it.Next() { + if i%2 == 0 { + c.Assert(row.IsNull(0), check.Equals, true) + c.Assert(col.IsNull(i), check.Equals, true) + } else { + c.Assert(row.GetInt64(0), check.Equals, int64(i)) + } + i++ + } +} diff --git a/util/chunk/mutrow.go b/util/chunk/mutrow.go index aa36326559930..b0bddc18f3029 100644 --- a/util/chunk/mutrow.go +++ b/util/chunk/mutrow.go @@ -203,7 +203,7 @@ func makeMutRowBytesColumn(bin []byte) *Column { func (mr MutRow) SetRow(row Row) { for colIdx, rCol := range row.c.columns { mrCol := mr.c.columns[colIdx] - if rCol.isNull(row.idx) { + if rCol.IsNull(row.idx) { mrCol.nullBitmap[0] = 0 continue } @@ -351,7 +351,7 @@ func setMutRowJSON(col *Column, j json.BinaryJSON) { func (mr MutRow) ShallowCopyPartialRow(colIdx int, row Row) { for i, srcCol := range row.c.columns { dstCol := mr.c.columns[colIdx+i] - if !srcCol.isNull(row.idx) { + if !srcCol.IsNull(row.idx) { // MutRow only contains one row, so we can directly set the whole byte. dstCol.nullBitmap[0] = 1 } else { diff --git a/util/chunk/row.go b/util/chunk/row.go index 0d282558fc0e1..b4c22d6b02210 100644 --- a/util/chunk/row.go +++ b/util/chunk/row.go @@ -220,5 +220,5 @@ func (r Row) GetDatum(colIdx int, tp *types.FieldType) types.Datum { // IsNull returns if the datum in the chunk.Row is null. func (r Row) IsNull(colIdx int) bool { - return r.c.columns[colIdx].isNull(r.idx) + return r.c.columns[colIdx].IsNull(r.idx) } From e59ac63ec7b6232360d5ed726aba07ef2650cc19 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 15:57:43 +0800 Subject: [PATCH 12/17] refmt --- util/chunk/column_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 6f667ce03a14e..59b709a92ac95 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -15,11 +15,12 @@ package chunk import ( "fmt" + "time" + "github.com/pingcap/check" "github.com/pingcap/parser/mysql" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" - "time" ) func equalColumn(c1, c2 *Column) bool { From 37d81d769ecaaaeaeeccfdc2d58af4df1cadd577 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 16:00:05 +0800 Subject: [PATCH 13/17] refmt --- util/chunk/column_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 59b709a92ac95..0b0687f3bbb80 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -84,7 +84,7 @@ func (s *testChunkSuite) TestI64Column(c *check.C) { i64s := col.Int64s() for i := 0; i < 1024; i++ { c.Assert(i64s[i], check.Equals, int64(i)) - i64s[i] ++ + i64s[i]++ } it := NewIterator4Chunk(chk) From 669f40106843948fae95ca2645b7965ee5cbbb24 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 16:03:11 +0800 Subject: [PATCH 14/17] make lint happy --- util/chunk/column.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index 9e1ec5a8eaae3..c3c417412cba6 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -49,7 +49,7 @@ func (c *Column) AppendJSON(j json.BinaryJSON) { c.finishAppendVar() } -// AppendJSON appends a Set value into this Column. +// AppendSet appends a Set value into this Column. func (c *Column) AppendSet(set types.Set) { c.appendNameValue(set.Name, set.Value) } @@ -70,9 +70,8 @@ func NewColumn(ft *types.FieldType, cap int) *Column { typeSize := getFixedLen(ft) if typeSize == varElemLen { return newVarLenColumn(cap, nil) - } else { - return newFixedLenColumn(typeSize, cap) } + return newFixedLenColumn(typeSize, cap) } func (c *Column) isFixed() bool { @@ -283,7 +282,7 @@ func (c *Column) GetString(rowID int) string { return string(hack.String(c.data[c.offsets[rowID]:c.offsets[rowID+1]])) } -// GetString returns the JSON in the specific row. +// GetJSON returns the JSON in the specific row. func (c *Column) GetJSON(rowID int) json.BinaryJSON { start := c.offsets[rowID] return json.BinaryJSON{TypeCode: c.data[start], Value: c.data[start+1 : c.offsets[rowID+1]]} @@ -317,7 +316,6 @@ func (c *Column) GetDuration(rowID int, fillFsp int) types.Duration { return types.Duration{Duration: time.Duration(dur), Fsp: fillFsp} } -// GetString returns the byte slice in the specific row. func (c *Column) getNameValue(rowID int) (string, uint64) { start, end := c.offsets[rowID], c.offsets[rowID+1] if start == end { From c8b731bb4c99d67e1d4adca0158a4b973037f88b Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 16:19:05 +0800 Subject: [PATCH 15/17] refmt --- util/chunk/column.go | 37 ++++++++++++------------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index c3c417412cba6..33824096c2141 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -222,58 +222,45 @@ const ( sizeMyDecimal = int(unsafe.Sizeof(types.MyDecimal{})) ) +func (c *Column) castSliceHeader(header *reflect.SliceHeader, typeSize int) { + h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) + header.Data = h.Data + header.Len = c.length + header.Cap = h.Cap / typeSize +} + // Int64s returns an int64 slice stored in this Column. func (c *Column) Int64s() []int64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) var res []int64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeInt64 + c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeInt64) return res } // Uint64s returns a uint64 slice stored in this Column. func (c *Column) Uint64s() []uint64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) var res []uint64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeUint64 + c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeUint64) return res } // Float32s returns a float32 slice stored in this Column. func (c *Column) Float32s() []float32 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) var res []float32 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeFloat32 + c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeFloat32) return res } // Float64s returns a float64 slice stored in this Column. func (c *Column) Float64s() []float64 { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) var res []float64 - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeFloat64 + c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeFloat64) return res } // MyDecimals returns a MyDecimal slice stored in this Column. func (c *Column) MyDecimals() []types.MyDecimal { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) var res []types.MyDecimal - s := (*reflect.SliceHeader)(unsafe.Pointer(&res)) - s.Data = h.Data - s.Len = c.length - s.Cap = h.Cap / sizeMyDecimal + c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeMyDecimal) return res } From 359047d4d8310d4fa3b729d437de0d1d2f7f383d Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 16:31:50 +0800 Subject: [PATCH 16/17] address comments --- util/chunk/column.go | 9 ++++----- util/chunk/column_test.go | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/util/chunk/column.go b/util/chunk/column.go index 33824096c2141..d06b1f2e92669 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -223,10 +223,9 @@ const ( ) func (c *Column) castSliceHeader(header *reflect.SliceHeader, typeSize int) { - h := (*reflect.SliceHeader)(unsafe.Pointer(&c.data)) - header.Data = h.Data + header.Data = uintptr(unsafe.Pointer(&c.data[0])) header.Len = c.length - header.Cap = h.Cap / typeSize + header.Cap = cap(c.data) / typeSize } // Int64s returns an int64 slice stored in this Column. @@ -257,8 +256,8 @@ func (c *Column) Float64s() []float64 { return res } -// MyDecimals returns a MyDecimal slice stored in this Column. -func (c *Column) MyDecimals() []types.MyDecimal { +// Decimals returns a MyDecimal slice stored in this Column. +func (c *Column) Decimals() []types.MyDecimal { var res []types.MyDecimal c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeMyDecimal) return res diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 0b0687f3bbb80..210e270155b84 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -148,7 +148,7 @@ func (s *testChunkSuite) TestMyDecimal(c *check.C) { col.AppendMyDecimal(d) } - ds := col.MyDecimals() + ds := col.Decimals() for i := 0; i < 1024; i++ { d := new(types.MyDecimal) if err := d.FromFloat64(float64(i) * 1.1); err != nil { From 21db4a1f4ec1082e1aabe2ed6859321d10a8f6e5 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Mon, 22 Jul 2019 18:40:44 +0800 Subject: [PATCH 17/17] address comments --- util/chunk/chunk.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/chunk/chunk.go b/util/chunk/chunk.go index 4217d64f1595a..60bc217ac74e5 100644 --- a/util/chunk/chunk.go +++ b/util/chunk/chunk.go @@ -551,8 +551,8 @@ func (c *Chunk) AppendDatum(colIdx int, d *types.Datum) { } // Column returns the specific column. -func (c *Chunk) Column(colID int) *Column { - return c.columns[colID] +func (c *Chunk) Column(colIdx int) *Column { + return c.columns[colIdx] } func writeTime(buf []byte, t types.Time) {