Skip to content
22 changes: 11 additions & 11 deletions util/chunk/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,7 @@ func New(fields []*types.FieldType, cap, maxChunkSize int) *Chunk {
}

for _, f := range fields {
elemLen := getFixedLen(f)
if elemLen == varElemLen {
chk.columns = append(chk.columns, newVarLenColumn(chk.capacity, nil))
} else {
chk.columns = append(chk.columns, newFixedLenColumn(elemLen, chk.capacity))
}
chk.columns = append(chk.columns, NewColumn(f, chk.capacity))
}

return chk
Expand Down Expand Up @@ -310,7 +305,7 @@ func (c *Chunk) AppendRow(row Row) {
func (c *Chunk) AppendPartialRow(colIdx int, row Row) {
for i, rowCol := range row.c.columns {
chkCol := c.columns[colIdx+i]
chkCol.appendNullBitmap(!rowCol.isNull(row.idx))
chkCol.appendNullBitmap(!rowCol.IsNull(row.idx))
if rowCol.isFixed() {
elemLen := len(rowCol.elemBuf)
offset := row.idx * elemLen
Expand Down Expand Up @@ -338,7 +333,7 @@ func (c *Chunk) PreAlloc(row Row) (rowIdx uint32) {
rowIdx = uint32(c.NumRows())
for i, srcCol := range row.c.columns {
dstCol := c.columns[i]
dstCol.appendNullBitmap(!srcCol.isNull(row.idx))
dstCol.appendNullBitmap(!srcCol.IsNull(row.idx))
elemLen := len(srcCol.elemBuf)
if !srcCol.isFixed() {
elemLen = int(srcCol.offsets[row.idx+1] - srcCol.offsets[row.idx])
Expand Down Expand Up @@ -421,7 +416,7 @@ func (c *Chunk) Append(other *Chunk, begin, end int) {
}
}
for i := begin; i < end; i++ {
dst.appendNullBitmap(!src.isNull(i))
dst.appendNullBitmap(!src.IsNull(i))
dst.length++
}
}
Expand All @@ -439,7 +434,7 @@ func (c *Chunk) TruncateTo(numRows int) {
col.offsets = col.offsets[:numRows+1]
}
for i := numRows; i < col.length; i++ {
if col.isNull(i) {
if col.IsNull(i) {
col.nullCount--
}
}
Expand Down Expand Up @@ -476,7 +471,7 @@ func (c *Chunk) AppendUint64(colIdx int, u uint64) {

// AppendFloat32 appends a float32 value to the chunk.
func (c *Chunk) AppendFloat32(colIdx int, f float32) {
c.columns[colIdx].appendFloat32(f)
c.columns[colIdx].AppendFloat32(f)
}

// AppendFloat64 appends a float64 value to the chunk.
Expand Down Expand Up @@ -555,6 +550,11 @@ func (c *Chunk) AppendDatum(colIdx int, d *types.Datum) {
}
}

// Column returns the specific column.
func (c *Chunk) Column(colIdx int) *Column {
return c.columns[colIdx]
}

func writeTime(buf []byte, t types.Time) {
binary.BigEndian.PutUint16(buf, uint16(t.Time.Year()))
buf[2] = uint8(t.Time.Month())
Expand Down
6 changes: 3 additions & 3 deletions util/chunk/chunk_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func copySelectedInnerRows(innerColOffset, outerColOffset int, src *Chunk, selec
if !selected[i] {
continue
}
dstCol.appendNullBitmap(!srcCol.isNull(i))
dstCol.appendNullBitmap(!srcCol.IsNull(i))
dstCol.length++

elemLen := len(srcCol.elemBuf)
Expand All @@ -59,7 +59,7 @@ func copySelectedInnerRows(innerColOffset, outerColOffset int, src *Chunk, selec
if !selected[i] {
continue
}
dstCol.appendNullBitmap(!srcCol.isNull(i))
dstCol.appendNullBitmap(!srcCol.IsNull(i))
dstCol.length++

start, end := srcCol.offsets[i], srcCol.offsets[i+1]
Expand All @@ -86,7 +86,7 @@ func copyOuterRows(innerColOffset, outerColOffset int, src *Chunk, numRows int,
}
for i, srcCol := range srcCols {
dstCol := dst.columns[outerColOffset+i]
dstCol.appendMultiSameNullBitmap(!srcCol.isNull(row.idx), numRows)
dstCol.appendMultiSameNullBitmap(!srcCol.IsNull(row.idx), numRows)
dstCol.length += numRows
if srcCol.isFixed() {
elemLen := len(srcCol.elemBuf)
Expand Down
125 changes: 122 additions & 3 deletions util/chunk/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,13 @@
package chunk

import (
"reflect"
"time"
"unsafe"

"github.com/pingcap/tidb/types"
"github.com/pingcap/tidb/types/json"
"github.com/pingcap/tidb/util/hack"
)

// AppendDuration appends a duration value into this Column.
Expand Down Expand Up @@ -46,6 +49,11 @@ func (c *Column) AppendJSON(j json.BinaryJSON) {
c.finishAppendVar()
}

// AppendSet appends a Set value into this Column.
func (c *Column) AppendSet(set types.Set) {
c.appendNameValue(set.Name, set.Value)
}

// Column stores one column of data in Apache Arrow format.
// See https://arrow.apache.org/docs/memory_layout.html
type Column struct {
Expand All @@ -57,6 +65,15 @@ type Column struct {
elemBuf []byte
}

// NewColumn creates a new column with the specific length and capacity.
func NewColumn(ft *types.FieldType, cap int) *Column {
typeSize := getFixedLen(ft)
if typeSize == varElemLen {
return newVarLenColumn(cap, nil)
}
return newFixedLenColumn(typeSize, cap)
}

func (c *Column) isFixed() bool {
return c.elemBuf != nil
}
Expand All @@ -73,7 +90,8 @@ func (c *Column) Reset() {
c.data = c.data[:0]
}

func (c *Column) isNull(rowIdx int) bool {
// IsNull returns if this row is null.
func (c *Column) IsNull(rowIdx int) bool {
nullByte := c.nullBitmap[rowIdx/8]
return nullByte&(1<<(uint(rowIdx)&7)) == 0
}
Expand Down Expand Up @@ -155,8 +173,8 @@ func (c *Column) AppendUint64(u uint64) {
c.finishAppendFixed()
}

// appendFloat32 appends a float32 value into this Column.
func (c *Column) appendFloat32(f float32) {
// AppendFloat32 appends a float32 value into this Column.
func (c *Column) AppendFloat32(f float32) {
*(*float32)(unsafe.Pointer(&c.elemBuf[0])) = f
c.finishAppendFixed()
}
Expand Down Expand Up @@ -190,3 +208,104 @@ func (c *Column) AppendTime(t types.Time) {
writeTime(c.elemBuf, t)
c.finishAppendFixed()
}

// AppendEnum appends a Enum value into this Column.
func (c *Column) AppendEnum(enum types.Enum) {
c.appendNameValue(enum.Name, enum.Value)
}

const (
sizeInt64 = int(unsafe.Sizeof(int64(0)))
sizeUint64 = int(unsafe.Sizeof(uint64(0)))
sizeFloat32 = int(unsafe.Sizeof(float32(0)))
sizeFloat64 = int(unsafe.Sizeof(float64(0)))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we consider unsigned float32 and float64?

sizeMyDecimal = int(unsafe.Sizeof(types.MyDecimal{}))
)

func (c *Column) castSliceHeader(header *reflect.SliceHeader, typeSize int) {
header.Data = uintptr(unsafe.Pointer(&c.data[0]))
header.Len = c.length
header.Cap = cap(c.data) / typeSize
}

// Int64s returns an int64 slice stored in this Column.
func (c *Column) Int64s() []int64 {
var res []int64
c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeInt64)
return res
}

// Uint64s returns a uint64 slice stored in this Column.
func (c *Column) Uint64s() []uint64 {
var res []uint64
c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeUint64)
return res
}

// Float32s returns a float32 slice stored in this Column.
func (c *Column) Float32s() []float32 {
var res []float32
c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeFloat32)
return res
}

// Float64s returns a float64 slice stored in this Column.
func (c *Column) Float64s() []float64 {
var res []float64
c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeFloat64)
return res
}

// Decimals returns a MyDecimal slice stored in this Column.
func (c *Column) Decimals() []types.MyDecimal {
var res []types.MyDecimal
c.castSliceHeader((*reflect.SliceHeader)(unsafe.Pointer(&res)), sizeMyDecimal)
return res
}

// GetString returns the string in the specific row.
func (c *Column) GetString(rowID int) string {
return string(hack.String(c.data[c.offsets[rowID]:c.offsets[rowID+1]]))
Comment thread
qw4990 marked this conversation as resolved.
Outdated
}

// GetJSON returns the JSON in the specific row.
func (c *Column) GetJSON(rowID int) json.BinaryJSON {
start := c.offsets[rowID]
return json.BinaryJSON{TypeCode: c.data[start], Value: c.data[start+1 : c.offsets[rowID+1]]}
}

// GetBytes returns the byte slice in the specific row.
func (c *Column) GetBytes(rowID int) []byte {
return c.data[c.offsets[rowID]:c.offsets[rowID+1]]
}

// GetEnum returns the Enum in the specific row.
func (c *Column) GetEnum(rowID int) types.Enum {
name, val := c.getNameValue(rowID)
return types.Enum{Name: name, Value: val}
}

// GetSet returns the Set in the specific row.
func (c *Column) GetSet(rowID int) types.Set {
name, val := c.getNameValue(rowID)
return types.Set{Name: name, Value: val}
}

// GetTime returns the Time in the specific row.
func (c *Column) GetTime(rowID int) types.Time {
return readTime(c.data[rowID*16:])
}

// GetDuration returns the Duration in the specific row.
func (c *Column) GetDuration(rowID int, fillFsp int) types.Duration {
dur := *(*int64)(unsafe.Pointer(&c.data[rowID*8]))
return types.Duration{Duration: time.Duration(dur), Fsp: fillFsp}
}

func (c *Column) getNameValue(rowID int) (string, uint64) {
start, end := c.offsets[rowID], c.offsets[rowID+1]
if start == end {
return "", 0
}
return string(hack.String(c.data[start+8 : end])), *(*uint64)(unsafe.Pointer(&c.data[start]))
}
Loading