From 27913b8be3604e02274b222b8c3374adcfcd9614 Mon Sep 17 00:00:00 2001 From: Yuanjia Zhang Date: Fri, 9 Aug 2019 15:04:03 +0800 Subject: [PATCH] add resize and reserve for Column update comment update comment update update comments add UT address comments address comment add comment --- expression/vectorized.go | 21 ++++--- util/chunk/column.go | 121 ++++++++++++++++++++++++++------------ util/chunk/column_test.go | 48 +++++++++++---- 3 files changed, 134 insertions(+), 56 deletions(-) diff --git a/expression/vectorized.go b/expression/vectorized.go index ae9c5a23777ab..a83fea092dd45 100644 --- a/expression/vectorized.go +++ b/expression/vectorized.go @@ -25,12 +25,13 @@ func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.C tp := expr.GetType() switch tp.EvalType() { case types.ETInt: - result.PreAllocInt64(n) + result.ResizeInt64(n) v, isNull, err := expr.EvalInt(ctx, chunk.Row{}) if err != nil { return err } - if isNull { // all slots are set to null by PreAlloc() + if isNull { + result.SetNulls(0, n, true) return nil } i64s := result.Int64s() @@ -39,12 +40,13 @@ func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.C } result.SetNulls(0, n, false) case types.ETReal: - result.PreAllocFloat64(n) + result.ResizeFloat64(n) v, isNull, err := expr.EvalReal(ctx, chunk.Row{}) if err != nil { return err } - if isNull { // all slots are set to null by PreAlloc() + if isNull { + result.SetNulls(0, n, true) return nil } f64s := result.Float64s() @@ -53,12 +55,13 @@ func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.C } result.SetNulls(0, n, false) case types.ETDecimal: - result.PreAllocDecimal(n) + result.ResizeDecimal(n) v, isNull, err := expr.EvalDecimal(ctx, chunk.Row{}) if err != nil { return err } - if isNull { // all slots are set to null by PreAlloc() + if isNull { + result.SetNulls(0, n, true) return nil } ds := result.Decimals() @@ -82,7 +85,7 @@ func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.C } } case types.ETDuration: - result.Reset() + result.ResizeDuration(n) v, isNull, err := expr.EvalDuration(ctx, chunk.Row{}) if err != nil { return err @@ -97,7 +100,7 @@ func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.C } } case types.ETJson: - result.Reset() + result.ReserveJSON(n) v, isNull, err := expr.EvalJSON(ctx, chunk.Row{}) if err != nil { return err @@ -112,7 +115,7 @@ func genVecFromConstExpr(ctx sessionctx.Context, expr Expression, input *chunk.C } } case types.ETString: - result.Reset() + result.ReserveString(n) v, isNull, err := expr.EvalString(ctx, chunk.Row{}) if err != nil { return err diff --git a/util/chunk/column.go b/util/chunk/column.go index 09a2fb7c9058d..9609e2e6eede1 100644 --- a/util/chunk/column.go +++ b/util/chunk/column.go @@ -242,33 +242,57 @@ const ( sizeGoDuration = int(unsafe.Sizeof(time.Duration(0))) ) -// preAlloc allocates space for a fixed-length-type slice and resets all slots to null. -func (c *Column) preAlloc(length, typeSize int) { - nData := length * typeSize - if len(c.data) >= nData { - c.data = c.data[:nData] +// resize resizes the column so that it contains n elements, only valid for fixed-length types. +func (c *Column) resize(n, typeSize int) { + sizeData := n * typeSize + if cap(c.data) >= sizeData { + (*reflect.SliceHeader)(unsafe.Pointer(&c.data)).Len = sizeData } else { - c.data = make([]byte, nData) + c.data = make([]byte, sizeData) } - nBitmap := (length + 7) >> 3 - if len(c.nullBitmap) >= nBitmap { - c.nullBitmap = c.nullBitmap[:nBitmap] - for i := range c.nullBitmap { - // resets all slots to null. - c.nullBitmap[i] = 0 - } + sizeNulls := (n + 7) >> 3 + if cap(c.nullBitmap) >= sizeNulls { + (*reflect.SliceHeader)(unsafe.Pointer(&c.nullBitmap)).Len = sizeNulls } else { - c.nullBitmap = make([]byte, nBitmap) + c.nullBitmap = make([]byte, sizeNulls) } - if c.elemBuf != nil && len(c.elemBuf) >= typeSize { - c.elemBuf = c.elemBuf[:typeSize] + if cap(c.elemBuf) >= typeSize { + (*reflect.SliceHeader)(unsafe.Pointer(&c.elemBuf)).Len = typeSize } else { c.elemBuf = make([]byte, typeSize) } - c.length = length + c.length = n +} + +// reserve makes the column capacity be at least enough to contain n elements. +// this method is only valid for var-length types and estElemSize is the estimated size of this type. +func (c *Column) reserve(n, estElemSize int) { + sizeData := n * estElemSize + if cap(c.data) >= sizeData { + c.data = c.data[:0] + } else { + c.data = make([]byte, 0, sizeData) + } + + sizeNulls := (n + 7) >> 3 + if cap(c.nullBitmap) >= sizeNulls { + c.nullBitmap = c.nullBitmap[:0] + } else { + c.nullBitmap = make([]byte, 0, sizeNulls) + } + + sizeOffs := n + 1 + if cap(c.offsets) >= sizeOffs { + c.offsets = c.offsets[:1] + } else { + c.offsets = make([]int64, 1, sizeOffs) + } + + c.elemBuf = nil + c.length = 0 } // SetNull sets the rowIdx to null. @@ -313,38 +337,63 @@ func (c *Column) nullCount() int { return cnt } -// PreAllocInt64 allocates space for an int64 slice and resets all slots to null. -func (c *Column) PreAllocInt64(length int) { - c.preAlloc(length, sizeInt64) +// ResizeInt64 resizes the column so that it contains n int64 elements. +func (c *Column) ResizeInt64(n int) { + c.resize(n, sizeInt64) +} + +// ResizeUint64 resizes the column so that it contains n uint64 elements. +func (c *Column) ResizeUint64(n int) { + c.resize(n, sizeUint64) +} + +// ResizeFloat32 resizes the column so that it contains n float32 elements. +func (c *Column) ResizeFloat32(n int) { + c.resize(n, sizeFloat32) +} + +// ResizeFloat64 resizes the column so that it contains n float64 elements. +func (c *Column) ResizeFloat64(n int) { + c.resize(n, sizeFloat64) +} + +// ResizeDecimal resizes the column so that it contains n decimal elements. +func (c *Column) ResizeDecimal(n int) { + c.resize(n, sizeMyDecimal) +} + +// ResizeDuration resizes the column so that it contains n duration elements. +func (c *Column) ResizeDuration(n int) { + c.resize(n, sizeGoDuration) } -// PreAllocUint64 allocates space for a uint64 slice and resets all slots to null. -func (c *Column) PreAllocUint64(length int) { - c.preAlloc(length, sizeUint64) +// ReserveString changes the column capacity to store n string elements and set the length to zero. +func (c *Column) ReserveString(n int) { + c.reserve(n, 8) } -// PreAllocFloat32 allocates space for a float32 slice and resets all slots to null. -func (c *Column) PreAllocFloat32(length int) { - c.preAlloc(length, sizeFloat32) +// ReserveBytes changes the column capacity to store n bytes elements and set the length to zero. +func (c *Column) ReserveBytes(n int) { + c.reserve(n, 8) } -// PreAllocFloat64 allocates space for a float64 slice and resets all slots to null. -func (c *Column) PreAllocFloat64(length int) { - c.preAlloc(length, sizeFloat64) +// ReserveJSON changes the column capacity to store n JSON elements and set the length to zero. +func (c *Column) ReserveJSON(n int) { + c.reserve(n, 8) } -// PreAllocDecimal allocates space for a decimal slice and resets all slots to null. -func (c *Column) PreAllocDecimal(length int) { - c.preAlloc(length, sizeMyDecimal) +// ReserveSet changes the column capacity to store n set elements and set the length to zero. +func (c *Column) ReserveSet(n int) { + c.reserve(n, 8) } -// PreAllocDuration allocates space for a duration slice and resets all slots to null. -func (c *Column) PreAllocDuration(length int) { - c.preAlloc(length, sizeGoDuration) +// ReserveEnum changes the column capacity to store n enum elements and set the length to zero. +func (c *Column) ReserveEnum(n int) { + c.reserve(n, 8) } func (c *Column) castSliceHeader(header *reflect.SliceHeader, typeSize int) { - header.Data = uintptr(unsafe.Pointer(&c.data[0])) + header.Data = (*reflect.SliceHeader)(unsafe.Pointer(&c.data)).Data header.Len = c.length header.Cap = cap(c.data) / typeSize } diff --git a/util/chunk/column_test.go b/util/chunk/column_test.go index 5cc7a0dc0766b..2397bfda72e9c 100644 --- a/util/chunk/column_test.go +++ b/util/chunk/column_test.go @@ -581,7 +581,7 @@ func (s *testChunkSuite) TestReconstructVarLen(c *check.C) { func (s *testChunkSuite) TestPreAllocInt64(c *check.C) { col := NewColumn(types.NewFieldType(mysql.TypeLonglong), 128) - col.PreAllocInt64(256) + col.ResizeInt64(256) i64s := col.Int64s() c.Assert(len(i64s), check.Equals, 256) for i := 0; i < 256; i++ { @@ -597,7 +597,7 @@ func (s *testChunkSuite) TestPreAllocUint64(c *check.C) { tll := types.NewFieldType(mysql.TypeLonglong) tll.Flag |= mysql.UnsignedFlag col := NewColumn(tll, 128) - col.PreAllocUint64(256) + col.ResizeUint64(256) u64s := col.Uint64s() c.Assert(len(u64s), check.Equals, 256) for i := 0; i < 256; i++ { @@ -611,7 +611,7 @@ func (s *testChunkSuite) TestPreAllocUint64(c *check.C) { func (s *testChunkSuite) TestPreAllocFloat32(c *check.C) { col := newFixedLenColumn(sizeFloat32, 128) - col.PreAllocFloat32(256) + col.ResizeFloat32(256) f32s := col.Float32s() c.Assert(len(f32s), check.Equals, 256) for i := 0; i < 256; i++ { @@ -625,7 +625,7 @@ func (s *testChunkSuite) TestPreAllocFloat32(c *check.C) { func (s *testChunkSuite) TestPreAllocFloat64(c *check.C) { col := newFixedLenColumn(sizeFloat64, 128) - col.PreAllocFloat64(256) + col.ResizeFloat64(256) f64s := col.Float64s() c.Assert(len(f64s), check.Equals, 256) for i := 0; i < 256; i++ { @@ -639,7 +639,7 @@ func (s *testChunkSuite) TestPreAllocFloat64(c *check.C) { func (s *testChunkSuite) TestPreAllocDecimal(c *check.C) { col := newFixedLenColumn(sizeMyDecimal, 128) - col.PreAllocDecimal(256) + col.ResizeDecimal(256) ds := col.Decimals() c.Assert(len(ds), check.Equals, 256) for i := 0; i < 256; i++ { @@ -652,7 +652,7 @@ func (s *testChunkSuite) TestPreAllocDecimal(c *check.C) { func (s *testChunkSuite) TestNull(c *check.C) { col := newFixedLenColumn(sizeFloat64, 32) - col.PreAllocFloat64(1024) + col.ResizeFloat64(1024) c.Assert(col.nullCount(), check.Equals, 1024) notNulls := make(map[int]struct{}) @@ -667,21 +667,24 @@ func (s *testChunkSuite) TestNull(c *check.C) { c.Assert(col.IsNull(idx), check.Equals, false) } - col.PreAllocFloat64(8) + col.ResizeFloat64(8) + col.SetNulls(0, 8, true) col.SetNull(7, false) c.Assert(col.nullCount(), check.Equals, 7) - col.PreAllocFloat64(8) + col.ResizeFloat64(8) + col.SetNulls(0, 8, true) c.Assert(col.nullCount(), check.Equals, 8) - col.PreAllocFloat64(9) + col.ResizeFloat64(9) + col.SetNulls(0, 9, true) col.SetNull(8, false) c.Assert(col.nullCount(), check.Equals, 8) } func (s *testChunkSuite) TestSetNulls(c *check.C) { col := newFixedLenColumn(sizeFloat64, 32) - col.PreAllocFloat64(1024) + col.ResizeFloat64(1024) c.Assert(col.nullCount(), check.Equals, 1024) col.SetNulls(0, 1024, false) @@ -707,6 +710,29 @@ func (s *testChunkSuite) TestSetNulls(c *check.C) { } } +func (s *testChunkSuite) TestResizeReserve(c *check.C) { + cI64s := newFixedLenColumn(sizeInt64, 0) + c.Assert(cI64s.length, check.Equals, 0) + for i := 0; i < 100; i++ { + t := rand.Intn(1024) + cI64s.ResizeInt64(t) + c.Assert(cI64s.length, check.Equals, t) + c.Assert(len(cI64s.Int64s()), check.Equals, t) + } + cI64s.ResizeInt64(0) + c.Assert(cI64s.length, check.Equals, 0) + c.Assert(len(cI64s.Int64s()), check.Equals, 0) + + cStrs := newVarLenColumn(0, nil) + for i := 0; i < 100; i++ { + t := rand.Intn(1024) + cStrs.ReserveString(t) + c.Assert(cStrs.length, check.Equals, 0) + } + cStrs.ReserveString(0) + c.Assert(cStrs.length, check.Equals, 0) +} + func BenchmarkDurationRow(b *testing.B) { chk1 := NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeDuration)}, 1024) col1 := chk1.Column(0) @@ -748,7 +774,7 @@ func BenchmarkDurationVec(b *testing.B) { b.ResetTimer() for k := 0; k < b.N; k++ { - result.PreAllocDuration(1024) + result.ResizeDuration(1024) for i := 0; i < 1024; i++ { d1 := types.Duration{Duration: ds1[i]} d2 := types.Duration{Duration: ds2[i]}