diff --git a/cpp/.clang-format b/cpp/.clang-format index 26651a2cd..da6eee613 100644 --- a/cpp/.clang-format +++ b/cpp/.clang-format @@ -65,7 +65,6 @@ ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DeriveLineEnding: true -DerivePointerAlignment: true DisableFormat: false EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: LogicalBlock @@ -207,7 +206,7 @@ SpacesInParentheses: false SpacesInSquareBrackets: false SpaceBeforeSquareBrackets: false BitFieldColonSpacing: Both -Standard: Auto +Standard: Cpp11 StatementAttributeLikeMacros: - Q_EMIT StatementMacros: diff --git a/cpp/src/common/device_id.h b/cpp/src/common/device_id.h index b3a173db1..613ebc567 100644 --- a/cpp/src/common/device_id.h +++ b/cpp/src/common/device_id.h @@ -48,6 +48,9 @@ class IDeviceID { virtual bool operator<(const IDeviceID& other) { return false; } virtual bool operator==(const IDeviceID& other) { return false; } virtual bool operator!=(const IDeviceID& other) { return false; } + virtual std::string* get_split_segname_at(int pos) { return nullptr; } + virtual int get_split_seg_num() { return 0; } + virtual void split_table_name() {} protected: IDeviceID() : empty_segments_() {} @@ -90,6 +93,9 @@ class StringArrayDeviceID : public IDeviceID { for (const auto& segment : segments_) { delete segment; } + for (const auto& prefix_segments : prefix_segments_) { + delete prefix_segments; + } } std::string get_device_name() const override { @@ -192,9 +198,36 @@ class StringArrayDeviceID : public IDeviceID { return !(*this == other); } + void split_table_name() override { init_prefix_segments(); } + + std::string* get_split_segname_at(int pos) override { + if (prefix_segments_.size() == 0 || prefix_segments_.size() == 1) { + return segments_[pos]; + } else { + if (pos < prefix_segments_.size()) { + return prefix_segments_[pos]; + } else { + return segments_[pos - prefix_segments_.size() + 1]; + } + } + } + + int get_split_seg_num() override { + return prefix_segments_.size() == 0 + ? segments_.size() + : segments_.size() + prefix_segments_.size() - 1; + } + private: std::vector segments_; + std::vector prefix_segments_; + void init_prefix_segments() { + auto splits = storage::PathNodesGenerator::invokeParser(*segments_[0]); + for (int i = 0; i < splits.size(); ++i) { + prefix_segments_.push_back(new std::string(splits[i])); + } + } static std::vector formalize( const std::vector& segments) { auto it = diff --git a/cpp/src/common/schema.h b/cpp/src/common/schema.h index 06e7e7e42..499dd5bc7 100644 --- a/cpp/src/common/schema.h +++ b/cpp/src/common/schema.h @@ -46,8 +46,8 @@ struct MeasurementSchema { common::TSDataType data_type_; common::TSEncoding encoding_; common::CompressionType compression_type_; - storage::ChunkWriter *chunk_writer_; - ValueChunkWriter *value_chunk_writer_; + storage::ChunkWriter* chunk_writer_; + ValueChunkWriter* value_chunk_writer_; std::map props_; MeasurementSchema() @@ -58,7 +58,7 @@ struct MeasurementSchema { chunk_writer_(nullptr), value_chunk_writer_(nullptr) {} - MeasurementSchema(const std::string &measurement_name, + MeasurementSchema(const std::string& measurement_name, common::TSDataType data_type) : measurement_name_(measurement_name), data_type_(data_type), @@ -67,7 +67,7 @@ struct MeasurementSchema { chunk_writer_(nullptr), value_chunk_writer_(nullptr) {} - MeasurementSchema(const std::string &measurement_name, + MeasurementSchema(const std::string& measurement_name, common::TSDataType data_type, common::TSEncoding encoding, common::CompressionType compression_type) : measurement_name_(measurement_name), @@ -88,7 +88,7 @@ struct MeasurementSchema { } } - int serialize_to(common::ByteStream &out) { + int serialize_to(common::ByteStream& out) { int ret = common::E_OK; if (RET_FAIL( common::SerializationUtil::write_str(measurement_name_, out))) { @@ -102,7 +102,7 @@ struct MeasurementSchema { if (ret == common::E_OK) { if (RET_FAIL(common::SerializationUtil::write_ui32(props_.size(), out))) { - for (const auto &prop : props_) { + for (const auto& prop : props_) { if (RET_FAIL(common::SerializationUtil::write_str( prop.first, out))) { } else if (RET_FAIL(common::SerializationUtil::write_str( @@ -115,7 +115,7 @@ struct MeasurementSchema { return ret; } - int deserialize_from(common::ByteStream &in) { + int deserialize_from(common::ByteStream& in) { int ret = common::E_OK; uint8_t data_type = common::TSDataType::INVALID_DATATYPE, encoding = common::TSEncoding::INVALID_ENCODING, @@ -153,8 +153,8 @@ struct MeasurementSchema { } }; -typedef std::map MeasurementSchemaMap; -typedef std::map::iterator +typedef std::map MeasurementSchemaMap; +typedef std::map::iterator MeasurementSchemaMapIter; typedef std::pair MeasurementSchemaMapInsertResult; @@ -164,7 +164,7 @@ struct MeasurementSchemaGroup { // measurement_name -> MeasurementSchema MeasurementSchemaMap measurement_schema_map_; bool is_aligned_ = false; - TimeChunkWriter *time_chunk_writer_ = nullptr; + TimeChunkWriter* time_chunk_writer_ = nullptr; ~MeasurementSchemaGroup() { if (time_chunk_writer_ != nullptr) { @@ -195,11 +195,11 @@ class TableSchema { * Each ColumnSchema defines the schema for one column * in the table. */ - TableSchema(const std::string &table_name, - const std::vector &column_schemas) + TableSchema(const std::string& table_name, + const std::vector& column_schemas) : table_name_(table_name) { to_lowercase_inplace(table_name_); - for (const common::ColumnSchema &column_schema : column_schemas) { + for (const common::ColumnSchema& column_schema : column_schemas) { column_schemas_.emplace_back(std::make_shared( column_schema.get_column_name(), column_schema.get_data_type())); @@ -207,16 +207,16 @@ class TableSchema { column_schema.get_column_category()); } int idx = 0; - for (const auto &measurement_schema : column_schemas_) { + for (const auto& measurement_schema : column_schemas_) { to_lowercase_inplace(measurement_schema->measurement_name_); column_pos_index_.insert( std::make_pair(measurement_schema->measurement_name_, idx++)); } } - TableSchema(const std::string &table_name, - const std::vector &column_schemas, - const std::vector &column_categories) + TableSchema(const std::string& table_name, + const std::vector& column_schemas, + const std::vector& column_categories) : table_name_(table_name), column_categories_(column_categories) { to_lowercase_inplace(table_name_); for (const auto column_schema : column_schemas) { @@ -226,34 +226,42 @@ class TableSchema { } } int idx = 0; - for (const auto &measurement_schema : column_schemas_) { + for (const auto& measurement_schema : column_schemas_) { to_lowercase_inplace(measurement_schema->measurement_name_); column_pos_index_.insert( std::make_pair(measurement_schema->measurement_name_, idx++)); } } - TableSchema(TableSchema &&other) noexcept + TableSchema(TableSchema&& other) noexcept : table_name_(std::move(other.table_name_)), column_schemas_(std::move(other.column_schemas_)), column_categories_(std::move(other.column_categories_)) {} - TableSchema(const TableSchema &other) noexcept + TableSchema(const TableSchema& other) noexcept : table_name_(other.table_name_), column_categories_(other.column_categories_) { - for (const auto &column_schema : other.column_schemas_) { + for (const auto& column_schema : other.column_schemas_) { // Just call default construction column_schemas_.emplace_back( std::make_shared(*column_schema)); } int idx = 0; - for (const auto &measurement_schema : column_schemas_) { + for (const auto& measurement_schema : column_schemas_) { column_pos_index_.insert( std::make_pair(measurement_schema->measurement_name_, idx++)); } } - int serialize_to(common::ByteStream &out) { + // In cases where data is retrieved from a tree to form the table, + // there is no table name in the tree path, so adjustments are needed for + // this scenario. This flag is used specifically for such cases. + // TODO(Colin): remove this. + void set_virtual_table() { is_virtual_table_ = true; } + + bool is_virtual_table() { return is_virtual_table_; } + + int serialize_to(common::ByteStream& out) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_var_uint( column_schemas_.size(), out))) { @@ -271,7 +279,7 @@ class TableSchema { return ret; } - int deserialize(common::ByteStream &in) { + int deserialize(common::ByteStream& in) { int ret = common::E_OK; uint32_t num_columns; if (RET_FAIL( @@ -294,9 +302,9 @@ class TableSchema { ~TableSchema() { column_schemas_.clear(); } - const std::string &get_table_name() { return table_name_; } + const std::string& get_table_name() { return table_name_; } - void set_table_name(const std::string &table_name) { + void set_table_name(const std::string& table_name) { table_name_ = table_name; } @@ -310,7 +318,7 @@ class TableSchema { int32_t get_columns_num() const { return column_schemas_.size(); } - int find_column_index(const std::string &column_name) { + int find_column_index(const std::string& column_name) { std::string lower_case_column_name = to_lower(column_name); auto it = column_pos_index_.find(lower_case_column_name); if (it != column_pos_index_.end()) { @@ -333,10 +341,10 @@ class TableSchema { size_t get_column_pos_index_num() const { return column_pos_index_.size(); } - void update(ChunkGroupMeta *chunk_group_meta) { + void update(ChunkGroupMeta* chunk_group_meta) { for (auto iter = chunk_group_meta->chunk_meta_list_.begin(); iter != chunk_group_meta->chunk_meta_list_.end(); iter++) { - auto &chunk_meta = iter.get(); + auto& chunk_meta = iter.get(); if (chunk_meta->data_type_ == common::VECTOR) { continue; } @@ -365,7 +373,7 @@ class TableSchema { std::vector get_data_types() const { std::vector ret; - for (const auto &measurement_schema : column_schemas_) { + for (const auto& measurement_schema : column_schemas_) { ret.emplace_back(measurement_schema->data_type_); } return ret; @@ -375,12 +383,12 @@ class TableSchema { return column_categories_; } - std::vector > get_measurement_schemas() + std::vector> get_measurement_schemas() const { return column_schemas_; } - common::ColumnSchema get_column_schema(const std::string &column_name) { + common::ColumnSchema get_column_schema(const std::string& column_name) { int column_idx = find_column_index(column_name); if (column_idx == -1) { return common::ColumnSchema(); @@ -394,7 +402,7 @@ class TableSchema { } } - int32_t find_id_column_order(const std::string &column_name) { + int32_t find_id_column_order(const std::string& column_name) { std::string lower_case_column_name = to_lower(column_name); int column_order = 0; @@ -412,17 +420,18 @@ class TableSchema { private: std::string table_name_; - std::vector > column_schemas_; + std::vector> column_schemas_; std::vector column_categories_; std::map column_pos_index_; + bool is_virtual_table_ = false; }; struct Schema { - typedef std::unordered_map > + typedef std::unordered_map> TableSchemasMap; TableSchemasMap table_schema_map_; - void update_table_schema(ChunkGroupMeta *chunk_group_meta) { + void update_table_schema(ChunkGroupMeta* chunk_group_meta) { std::shared_ptr device_id = chunk_group_meta->device_id_; auto table_name = device_id->get_table_name(); if (table_schema_map_.find(table_name) == table_schema_map_.end()) { @@ -431,7 +440,7 @@ struct Schema { table_schema_map_[table_name]->update(chunk_group_meta); } void register_table_schema( - const std::shared_ptr &table_schema) { + const std::shared_ptr& table_schema) { table_schema_map_[table_schema->get_table_name()] = table_schema; } }; diff --git a/cpp/src/common/tsblock/tsblock.h b/cpp/src/common/tsblock/tsblock.h index a0e94391b..dce94f8ad 100644 --- a/cpp/src/common/tsblock/tsblock.h +++ b/cpp/src/common/tsblock/tsblock.h @@ -44,7 +44,7 @@ class TsBlock { * information, such as insert scenarios, etc. Then we will use the given * number of rows */ - explicit TsBlock(TupleDesc *tupledesc, uint32_t max_row_count = 0) + explicit TsBlock(TupleDesc* tupledesc, uint32_t max_row_count = 0) : capacity_(g_config_value_.tsblock_max_memory_), row_count_(0), max_row_count_(max_row_count), @@ -60,9 +60,9 @@ class TsBlock { FORCE_INLINE uint32_t get_row_count() const { return row_count_; } - FORCE_INLINE TupleDesc *get_tuple_desc() const { return tuple_desc_; } + FORCE_INLINE TupleDesc* get_tuple_desc() const { return tuple_desc_; } - FORCE_INLINE Vector *get_vector(uint32_t index) { return vectors_[index]; } + FORCE_INLINE Vector* get_vector(uint32_t index) { return vectors_[index]; } FORCE_INLINE uint32_t get_column_count() const { return tuple_desc_->get_column_count(); @@ -104,8 +104,8 @@ class TsBlock { row_count_ = 0; } - FORCE_INLINE static int create_tsblock(TupleDesc *tupledesc, - TsBlock *&ret_tsblock, + FORCE_INLINE static int create_tsblock(TupleDesc* tupledesc, + TsBlock*& ret_tsblock, uint32_t max_row_count = 0) { int ret = common::E_OK; if (ret_tsblock == nullptr) { @@ -119,13 +119,13 @@ class TsBlock { } int init(); - void tsblock_to_json(ByteStream *byte_stream); + void tsblock_to_json(ByteStream* byte_stream); std::string debug_string(); private: int build_vector(common::TSDataType type, uint32_t row_count); - void write_data(ByteStream *__restrict byte_stream, char *__restrict val, + void write_data(ByteStream* __restrict byte_stream, char* __restrict val, uint32_t len, bool has_null, TSDataType type); private: @@ -134,13 +134,13 @@ class TsBlock { uint32_t max_row_count_; common::BitMap select_list_; - TupleDesc *tuple_desc_; - std::vector vectors_; + TupleDesc* tuple_desc_; + std::vector vectors_; }; class RowAppender { public: - explicit RowAppender(TsBlock *tsblock) : tsblock_(tsblock) {} + explicit RowAppender(TsBlock* tsblock) : tsblock_(tsblock) {} ~RowAppender() {} // todo:(yanghao) maybe need to consider select-list @@ -157,25 +157,37 @@ class RowAppender { tsblock_->row_count_--; } - FORCE_INLINE void append(uint32_t slot_index, const char *value, + FORCE_INLINE void append(uint32_t slot_index, const char* value, uint32_t len) { ASSERT(slot_index < tsblock_->tuple_desc_->get_column_count()); - Vector *vec = tsblock_->vectors_[slot_index]; - vec->append(value, len); + Vector* vec = tsblock_->vectors_[slot_index]; + // TODO(Colin): Refine this. + TSDataType datatype = vec->get_vector_type(); + if (len == 4 && datatype == INT64) { + int32_t int32_val = *reinterpret_cast(value); + int64_t int64_val = static_cast(int32_val); + vec->append(reinterpret_cast(&int64_val), 8); + } else if (len == 4 && datatype == DOUBLE) { + float float_val = *reinterpret_cast(value); + double double_val = static_cast(float_val); + vec->append(reinterpret_cast(&double_val), 8); + } else { + vec->append(value, len); + } } FORCE_INLINE void append_null(uint32_t slot_index) { - Vector *vec = tsblock_->vectors_[slot_index]; + Vector* vec = tsblock_->vectors_[slot_index]; vec->set_null(tsblock_->row_count_ - 1); } private: - TsBlock *tsblock_; + TsBlock* tsblock_; }; class ColAppender { public: - ColAppender(uint32_t column_index, TsBlock *tsblock) + ColAppender(uint32_t column_index, TsBlock* tsblock) : column_index_(column_index), column_row_count_(0), tsblock_(tsblock) { ASSERT(column_index < tsblock_->tuple_desc_->get_column_count()); vec_ = tsblock_->vectors_[column_index]; @@ -194,7 +206,7 @@ class ColAppender { } } - FORCE_INLINE void append(const char *value, uint32_t len) { + FORCE_INLINE void append(const char* value, uint32_t len) { vec_->append(value, len); } @@ -211,7 +223,7 @@ class ColAppender { } return E_OK; } - FORCE_INLINE int fill(const char *value, uint32_t len, uint32_t end_index) { + FORCE_INLINE int fill(const char* value, uint32_t len, uint32_t end_index) { while (column_row_count_ < end_index) { if (!add_row()) { return E_INVALID_ARG; @@ -225,14 +237,14 @@ class ColAppender { private: uint32_t column_index_; uint32_t column_row_count_; - TsBlock *tsblock_; - Vector *vec_; + TsBlock* tsblock_; + Vector* vec_; }; // todo:(yanghao) need to deal with select-list class RowIterator { public: - explicit RowIterator(TsBlock *tsblock) : tsblock_(tsblock), row_id_(0) { + explicit RowIterator(TsBlock* tsblock) : tsblock_(tsblock), row_id_(0) { column_count_ = tsblock_->tuple_desc_->get_column_count(); } @@ -264,17 +276,17 @@ class RowIterator { FORCE_INLINE void update_row_id() { row_id_++; } - FORCE_INLINE char *read(uint32_t column_index, uint32_t *__restrict len, - bool *__restrict null) { + FORCE_INLINE char* read(uint32_t column_index, uint32_t* __restrict len, + bool* __restrict null) { ASSERT(column_index < column_count_); - Vector *vec = tsblock_->vectors_[column_index]; + Vector* vec = tsblock_->vectors_[column_index]; return vec->read(len, null, row_id_); } std::string debug_string(); // for debug private: - TsBlock *tsblock_; + TsBlock* tsblock_; uint32_t row_id_; // The line number currently being reader uint32_t column_count_; }; @@ -282,7 +294,7 @@ class RowIterator { // todo:(yanghao) need to deal with select-list class ColIterator { public: - ColIterator(uint32_t column_index, const TsBlock *tsblock) + ColIterator(uint32_t column_index, const TsBlock* tsblock) : column_index_(column_index), row_id_(0), tsblock_(tsblock) { ASSERT(column_index < tsblock_->tuple_desc_->get_column_count()); vec_ = tsblock_->vectors_[column_index]; @@ -303,22 +315,22 @@ class ColIterator { FORCE_INLINE TSDataType get_data_type() { return vec_->get_vector_type(); } - FORCE_INLINE char *read(uint32_t *__restrict len, bool *__restrict null) { + FORCE_INLINE char* read(uint32_t* __restrict len, bool* __restrict null) { return vec_->read(len, null, row_id_); } - FORCE_INLINE char *read(uint32_t *len) { return vec_->read(len); } + FORCE_INLINE char* read(uint32_t* len) { return vec_->read(len); } FORCE_INLINE uint32_t get_column_index() { return column_index_; } private: uint32_t column_index_; uint32_t row_id_; - const TsBlock *tsblock_; - Vector *vec_; + const TsBlock* tsblock_; + Vector* vec_; }; -int merge_tsblock_by_row(TsBlock *sea, TsBlock *river); +int merge_tsblock_by_row(TsBlock* sea, TsBlock* river); } // end namespace common #endif // COMMON_TSBLOCK_TSBLOCK_H diff --git a/cpp/src/common/tsfile_common.h b/cpp/src/common/tsfile_common.h index dd22ca401..39cd027ef 100644 --- a/cpp/src/common/tsfile_common.h +++ b/cpp/src/common/tsfile_common.h @@ -39,7 +39,7 @@ namespace storage { -extern const char *MAGIC_STRING_TSFILE; +extern const char* MAGIC_STRING_TSFILE; constexpr int MAGIC_STRING_TSFILE_LEN = 6; extern const char VERSION_NUM_BYTE; extern const char CHUNK_GROUP_HEADER_MARKER; @@ -60,7 +60,7 @@ typedef int64_t TsFileID; struct PageHeader { uint32_t uncompressed_size_; uint32_t compressed_size_; - Statistic *statistic_; + Statistic* statistic_; PageHeader() : uncompressed_size_(0), compressed_size_(0), statistic_(nullptr) {} @@ -73,7 +73,7 @@ struct PageHeader { uncompressed_size_ = 0; compressed_size_ = 0; } - int deserialize_from(common::ByteStream &in, bool deserialize_stat, + int deserialize_from(common::ByteStream& in, bool deserialize_stat, common::TSDataType data_type) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_var_uint( @@ -99,7 +99,7 @@ struct PageHeader { } #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, const PageHeader &h) { + friend std::ostream& operator<<(std::ostream& os, const PageHeader& h) { os << "{uncompressed_size_=" << h.uncompressed_size_ << ", compressed_size_=" << h.uncompressed_size_; if (h.statistic_ == nullptr) { @@ -132,7 +132,7 @@ struct ChunkHeader { ~ChunkHeader() = default; - int serialize_to(common::ByteStream &out) { + int serialize_to(common::ByteStream& out) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_char(chunk_type_, out))) { } else if (RET_FAIL(common::SerializationUtil::write_var_str( @@ -148,7 +148,7 @@ struct ChunkHeader { } return ret; } - int deserialize_from(common::ByteStream &in) { + int deserialize_from(common::ByteStream& in) { int ret = common::E_OK; in.mark_read_pos(); if (RET_FAIL(common::SerializationUtil::read_char(chunk_type_, in))) { @@ -157,18 +157,18 @@ struct ChunkHeader { } else if (RET_FAIL(common::SerializationUtil::read_var_uint(data_size_, in))) { } else if (RET_FAIL(common::SerializationUtil::read_char( - (char &)data_type_, in))) { + (char&)data_type_, in))) { } else if (RET_FAIL(common::SerializationUtil::read_char( - (char &)compression_type_, in))) { + (char&)compression_type_, in))) { } else if (RET_FAIL(common::SerializationUtil::read_char( - (char &)encoding_type_, in))) { + (char&)encoding_type_, in))) { } else { serialized_size_ = in.get_mark_len(); } return ret; } #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, const ChunkHeader &h) { + friend std::ostream& operator<<(std::ostream& os, const ChunkHeader& h) { os << "{measurement_name=" << h.measurement_name_ << ", data_size=" << h.data_size_ << ", data_type=" << h.data_type_ << ", compression_type=" << h.compression_type_ @@ -197,7 +197,7 @@ struct ChunkMeta { common::String measurement_name_; common::TSDataType data_type_; int64_t offset_of_chunk_header_; - Statistic *statistic_; + Statistic* statistic_; char mask_; common::TSEncoding encoding_; common::CompressionType compression_type_; @@ -209,10 +209,10 @@ struct ChunkMeta { statistic_(nullptr), mask_(0) {} - int init(const common::String &measurement_name, + int init(const common::String& measurement_name, common::TSDataType data_type, int64_t offset_of_chunk_header, - Statistic *stat, char mask, common::TSEncoding encoding, - common::CompressionType compression_type, common::PageArena &pa) { + Statistic* stat, char mask, common::TSEncoding encoding, + common::CompressionType compression_type, common::PageArena& pa) { // TODO check parameter valid measurement_name_.dup_from(measurement_name, pa); data_type_ = data_type; @@ -223,10 +223,10 @@ struct ChunkMeta { compression_type_ = compression_type; return common::E_OK; } - FORCE_INLINE void clone_statistic_from(Statistic *stat) { + FORCE_INLINE void clone_statistic_from(Statistic* stat) { clone_statistic(stat, statistic_, data_type_); } - FORCE_INLINE int clone_from(ChunkMeta &that, common::PageArena *pa) { + FORCE_INLINE int clone_from(ChunkMeta& that, common::PageArena* pa) { int ret = common::E_OK; if (RET_FAIL(measurement_name_.dup_from(that.measurement_name_, *pa))) { return ret; @@ -244,7 +244,7 @@ struct ChunkMeta { mask_ = that.mask_; return ret; } - int serialize_to(common::ByteStream &out, bool serialize_statistic) { + int serialize_to(common::ByteStream& out, bool serialize_statistic) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_i64( offset_of_chunk_header_, out))) { @@ -253,8 +253,8 @@ struct ChunkMeta { } return ret; } - int deserialize_from(common::ByteStream &in, bool deserialize_stat, - common::PageArena *pa) { + int deserialize_from(common::ByteStream& in, bool deserialize_stat, + common::PageArena* pa) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_i64( offset_of_chunk_header_, in))) { @@ -270,7 +270,7 @@ struct ChunkMeta { return ret; } #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, const ChunkMeta &cm) { + friend std::ostream& operator<<(std::ostream& os, const ChunkMeta& cm) { os << "{measurement_name=" << cm.measurement_name_ << ", data_type=" << cm.data_type_ << ", offset_of_chunk_header=" << cm.offset_of_chunk_header_ @@ -287,16 +287,16 @@ struct ChunkMeta { struct ChunkGroupMeta { std::shared_ptr device_id_; - common::SimpleList chunk_meta_list_; + common::SimpleList chunk_meta_list_; - explicit ChunkGroupMeta(common::PageArena *pa_ptr) + explicit ChunkGroupMeta(common::PageArena* pa_ptr) : chunk_meta_list_(pa_ptr) {} FORCE_INLINE int init(std::shared_ptr device_id) { device_id_ = device_id; return 0; } - FORCE_INLINE int push(ChunkMeta *cm) { + FORCE_INLINE int push(ChunkMeta* cm) { return chunk_meta_list_.push_back(cm); } }; @@ -305,13 +305,13 @@ class ITimeseriesIndex { public: ITimeseriesIndex() {} ~ITimeseriesIndex() {} - virtual common::SimpleList *get_chunk_meta_list() const { + virtual common::SimpleList* get_chunk_meta_list() const { return nullptr; } - virtual common::SimpleList *get_time_chunk_meta_list() const { + virtual common::SimpleList* get_time_chunk_meta_list() const { return nullptr; } - virtual common::SimpleList *get_value_chunk_meta_list() const { + virtual common::SimpleList* get_value_chunk_meta_list() const { return nullptr; } @@ -321,7 +321,7 @@ class ITimeseriesIndex { virtual common::TSDataType get_data_type() const { return common::INVALID_DATATYPE; } - virtual Statistic *get_statistic() const { return nullptr; } + virtual Statistic* get_statistic() const { return nullptr; } }; /* @@ -368,19 +368,18 @@ class TimeseriesIndex : public ITimeseriesIndex { } } - int add_chunk_meta(ChunkMeta *chunk_meta, bool serialize_statistic); - FORCE_INLINE int set_measurement_name(common::String &measurement_name, - common::PageArena &pa) { + int add_chunk_meta(ChunkMeta* chunk_meta, bool serialize_statistic); + FORCE_INLINE int set_measurement_name(common::String& measurement_name, + common::PageArena& pa) { return measurement_name_.dup_from(measurement_name, pa); } - FORCE_INLINE void set_measurement_name(common::String &measurement_name) { + FORCE_INLINE void set_measurement_name(common::String& measurement_name) { measurement_name_.shallow_copy_from(measurement_name); } FORCE_INLINE virtual common::String get_measurement_name() const { return measurement_name_; } - virtual inline common::SimpleList *get_chunk_meta_list() - const { + virtual inline common::SimpleList* get_chunk_meta_list() const { return chunk_meta_list_; } FORCE_INLINE void set_ts_meta_type(char ts_meta_type) { @@ -405,7 +404,7 @@ class TimeseriesIndex : public ITimeseriesIndex { statistic_->reset(); return common::E_OK; } - virtual Statistic *get_statistic() const { return statistic_; } + virtual Statistic* get_statistic() const { return statistic_; } common::TsID get_ts_id() const { return ts_id_; } FORCE_INLINE void finish() { @@ -413,7 +412,7 @@ class TimeseriesIndex : public ITimeseriesIndex { chunk_meta_list_serialized_buf_.total_size(); } - int serialize_to(common::ByteStream &out) { + int serialize_to(common::ByteStream& out) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_char( timeseries_meta_type_, out))) { @@ -430,14 +429,14 @@ class TimeseriesIndex : public ITimeseriesIndex { return ret; } - int deserialize_from(common::ByteStream &in, common::PageArena *pa) { + int deserialize_from(common::ByteStream& in, common::PageArena* pa) { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_char(timeseries_meta_type_, in))) { } else if (RET_FAIL(common::SerializationUtil::read_mystring( measurement_name_, pa, in))) { } else if (RET_FAIL(common::SerializationUtil::read_char( - (char &)data_type_, in))) { + (char&)data_type_, in))) { } else if (RET_FAIL(common::SerializationUtil::read_var_uint( chunk_meta_list_data_size_, in))) { } else if (nullptr == @@ -447,22 +446,22 @@ class TimeseriesIndex : public ITimeseriesIndex { } else if (RET_FAIL(statistic_->deserialize_from(in))) { } else { statistic_from_pa_ = true; - void *chunk_meta_list_buf = pa->alloc(sizeof(*chunk_meta_list_)); + void* chunk_meta_list_buf = pa->alloc(sizeof(*chunk_meta_list_)); if (IS_NULL(chunk_meta_list_buf)) { return common::E_OOM; } const bool deserialize_chunk_meta_statistic = (timeseries_meta_type_ & 0x3F); // TODO chunk_meta_list_ = - new (chunk_meta_list_buf) common::SimpleList(pa); + new (chunk_meta_list_buf) common::SimpleList(pa); uint32_t start_pos = in.read_pos(); while (IS_SUCC(ret) && in.read_pos() < start_pos + chunk_meta_list_data_size_) { - void *cm_buf = pa->alloc(sizeof(ChunkMeta)); + void* cm_buf = pa->alloc(sizeof(ChunkMeta)); if (IS_NULL(cm_buf)) { ret = common::E_OOM; } else { - ChunkMeta *cm = new (cm_buf) ChunkMeta; + ChunkMeta* cm = new (cm_buf) ChunkMeta; cm->measurement_name_.shallow_copy_from( this->measurement_name_); cm->data_type_ = this->data_type_; @@ -477,7 +476,7 @@ class TimeseriesIndex : public ITimeseriesIndex { return ret; } - int clone_from(const TimeseriesIndex &that, common::PageArena *pa) { + int clone_from(const TimeseriesIndex& that, common::PageArena* pa) { int ret = common::E_OK; timeseries_meta_type_ = that.timeseries_meta_type_; chunk_meta_list_data_size_ = that.chunk_meta_list_data_size_; @@ -496,20 +495,20 @@ class TimeseriesIndex : public ITimeseriesIndex { } if (that.chunk_meta_list_ != nullptr) { - void *buf = pa->alloc(sizeof(*chunk_meta_list_)); + void* buf = pa->alloc(sizeof(*chunk_meta_list_)); if (IS_NULL(buf)) { return common::E_OOM; } - chunk_meta_list_ = new (buf) common::SimpleList(pa); - common::SimpleList::Iterator it; + chunk_meta_list_ = new (buf) common::SimpleList(pa); + common::SimpleList::Iterator it; for (it = that.chunk_meta_list_->begin(); IS_SUCC(ret) && it != that.chunk_meta_list_->end(); it++) { - ChunkMeta *cm = it.get(); - void *cm_buf = pa->alloc(sizeof(ChunkMeta)); + ChunkMeta* cm = it.get(); + void* cm_buf = pa->alloc(sizeof(ChunkMeta)); if (IS_NULL(cm_buf)) { return common::E_OOM; } else { - ChunkMeta *my_cm = new (cm_buf) ChunkMeta; + ChunkMeta* my_cm = new (cm_buf) ChunkMeta; if (RET_FAIL(my_cm->clone_from(*cm, pa))) { } else if (RET_FAIL(chunk_meta_list_->push_back(my_cm))) { } @@ -519,8 +518,8 @@ class TimeseriesIndex : public ITimeseriesIndex { return ret; } #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, - const TimeseriesIndex &tsi) { + friend std::ostream& operator<<(std::ostream& os, + const TimeseriesIndex& tsi) { os << "{meta_type=" << (int)tsi.timeseries_meta_type_ << ", chunk_meta_list_data_size=" << tsi.chunk_meta_list_data_size_ << ", measurement_name=" << tsi.measurement_name_ @@ -531,7 +530,7 @@ class TimeseriesIndex : public ITimeseriesIndex { if (tsi.chunk_meta_list_) { os << ", chunk_meta_list={"; int count = 0; - common::SimpleList::Iterator it = + common::SimpleList::Iterator it = tsi.chunk_meta_list_->begin(); for (; it != tsi.chunk_meta_list_->end(); it++, count++) { if (count != 0) { @@ -565,24 +564,24 @@ class TimeseriesIndex : public ITimeseriesIndex { * TimeseriesIndex.statistic_ is duplicated with ChunkMeta.statistic_. In * this case, we do not serialize ChunkMeta.statistic_. */ - Statistic *statistic_; + Statistic* statistic_; bool statistic_from_pa_; common::ByteStream chunk_meta_list_serialized_buf_; // common::PageArena page_arena_; - common::SimpleList *chunk_meta_list_; // for deserialize_from + common::SimpleList* chunk_meta_list_; // for deserialize_from }; class AlignedTimeseriesIndex : public ITimeseriesIndex { public: - TimeseriesIndex *time_ts_idx_; - TimeseriesIndex *value_ts_idx_; + TimeseriesIndex* time_ts_idx_; + TimeseriesIndex* value_ts_idx_; AlignedTimeseriesIndex() {} ~AlignedTimeseriesIndex() {} - virtual common::SimpleList *get_time_chunk_meta_list() const { + virtual common::SimpleList* get_time_chunk_meta_list() const { return time_ts_idx_->get_chunk_meta_list(); } - virtual common::SimpleList *get_value_chunk_meta_list() const { + virtual common::SimpleList* get_value_chunk_meta_list() const { return value_ts_idx_->get_chunk_meta_list(); } @@ -592,13 +591,13 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex { virtual common::TSDataType get_data_type() const { return time_ts_idx_->get_data_type(); } - virtual Statistic *get_statistic() const { + virtual Statistic* get_statistic() const { return value_ts_idx_->get_statistic(); } #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, - const AlignedTimeseriesIndex &tsi) { + friend std::ostream& operator<<(std::ostream& os, + const AlignedTimeseriesIndex& tsi) { os << "time_ts_idx=" << *tsi.time_ts_idx_; os << ", value_ts_idx=" << *tsi.value_ts_idx_; return os; @@ -609,7 +608,7 @@ class AlignedTimeseriesIndex : public ITimeseriesIndex { class TSMIterator { public: explicit TSMIterator( - common::SimpleList &chunk_group_meta_list) + common::SimpleList& chunk_group_meta_list) : chunk_group_meta_list_(chunk_group_meta_list), chunk_group_meta_iter_(), chunk_meta_iter_() {} @@ -617,38 +616,38 @@ class TSMIterator { // sort => iterate int init(); bool has_next() const; - int get_next(std::shared_ptr &ret_device_id, - common::String &ret_measurement_name, - TimeseriesIndex &ret_ts_index); + int get_next(std::shared_ptr& ret_device_id, + common::String& ret_measurement_name, + TimeseriesIndex& ret_ts_index); private: - common::SimpleList &chunk_group_meta_list_; - common::SimpleList::Iterator chunk_group_meta_iter_; - common::SimpleList::Iterator chunk_meta_iter_; + common::SimpleList& chunk_group_meta_list_; + common::SimpleList::Iterator chunk_group_meta_iter_; + common::SimpleList::Iterator chunk_meta_iter_; // timeseries measurenemnt chunk meta info // map >> std::map, - std::map>> + std::map>> tsm_chunk_meta_info_; // device iterator std::map, - std::map>>::iterator + std::map>>::iterator tsm_device_iter_; // measurement iterator - std::map>::iterator + std::map>::iterator tsm_measurement_iter_; }; /* =============== TsFile Index ================ */ struct IComparable { virtual ~IComparable() = default; - virtual bool operator<(const IComparable &other) const = 0; - virtual bool operator>(const IComparable &other) const = 0; - virtual bool operator==(const IComparable &other) const = 0; - virtual int compare(const IComparable &other) { + virtual bool operator<(const IComparable& other) const = 0; + virtual bool operator>(const IComparable& other) const = 0; + virtual bool operator==(const IComparable& other) const = 0; + virtual int compare(const IComparable& other) { if (this->operator<(other)) { return -1; } else if (this->operator==(other)) { @@ -663,27 +662,27 @@ struct IComparable { struct DeviceIDComparable : IComparable { std::shared_ptr device_id_; - explicit DeviceIDComparable(const std::shared_ptr &device_id) + explicit DeviceIDComparable(const std::shared_ptr& device_id) : device_id_(device_id) {} - bool operator<(const IComparable &other) const override { - const auto *other_device = - dynamic_cast(&other); + bool operator<(const IComparable& other) const override { + const auto* other_device = + dynamic_cast(&other); if (!other_device) throw std::runtime_error("Incompatible comparison"); return *device_id_ < *other_device->device_id_; } - bool operator>(const IComparable &other) const override { - const auto *other_device = - dynamic_cast(&other); + bool operator>(const IComparable& other) const override { + const auto* other_device = + dynamic_cast(&other); if (!other_device) throw std::runtime_error("Incompatible comparison"); return *device_id_ != *other_device->device_id_ && !(*device_id_ < *other_device->device_id_); } - bool operator==(const IComparable &other) const override { - const auto *other_device = - dynamic_cast(&other); + bool operator==(const IComparable& other) const override { + const auto* other_device = + dynamic_cast(&other); if (!other_device) throw std::runtime_error("Incompatible comparison"); return *device_id_ == *other_device->device_id_; } @@ -696,25 +695,25 @@ struct DeviceIDComparable : IComparable { struct StringComparable : IComparable { std::string value_; - explicit StringComparable(const std::string &value) : value_(value) {} + explicit StringComparable(const std::string& value) : value_(value) {} - bool operator<(const IComparable &other) const override { - const auto *other_string = - dynamic_cast(&other); + bool operator<(const IComparable& other) const override { + const auto* other_string = + dynamic_cast(&other); if (!other_string) throw std::runtime_error("Incompatible comparison"); return value_ < other_string->value_; } - bool operator>(const IComparable &other) const override { - const auto *other_string = - dynamic_cast(&other); + bool operator>(const IComparable& other) const override { + const auto* other_string = + dynamic_cast(&other); if (!other_string) throw std::runtime_error("Incompatible comparison"); return value_ > other_string->value_; } - bool operator==(const IComparable &other) const override { - const auto *other_string = - dynamic_cast(&other); + bool operator==(const IComparable& other) const override { + const auto* other_string = + dynamic_cast(&other); if (!other_string) throw std::runtime_error("Incompatible comparison"); return value_ == other_string->value_; } @@ -723,7 +722,7 @@ struct StringComparable : IComparable { }; struct IMetaIndexEntry { - static void self_destructor(IMetaIndexEntry *ptr) { + static void self_destructor(IMetaIndexEntry* ptr) { if (ptr) { ptr->~IMetaIndexEntry(); } @@ -731,9 +730,9 @@ struct IMetaIndexEntry { IMetaIndexEntry() = default; virtual ~IMetaIndexEntry() = default; - virtual int serialize_to(common::ByteStream &out) { return common::E_OK; } - virtual int deserialize_from(common::ByteStream &out, - common::PageArena *pa) { + virtual int serialize_to(common::ByteStream& out) { return common::E_OK; } + virtual int deserialize_from(common::ByteStream& out, + common::PageArena* pa) { return common::E_NOT_SUPPORT; } virtual int64_t get_offset() const = 0; @@ -743,11 +742,11 @@ struct IMetaIndexEntry { } virtual common::String get_name() const { return {}; } virtual std::shared_ptr get_device_id() const { return nullptr; } - virtual std::shared_ptr clone(common::PageArena *pa) = 0; + virtual std::shared_ptr clone(common::PageArena* pa) = 0; #ifndef NDEBUG - virtual void print(std::ostream &os) const {} - friend std::ostream &operator<<(std::ostream &os, - const IMetaIndexEntry &entry) { + virtual void print(std::ostream& os) const {} + friend std::ostream& operator<<(std::ostream& os, + const IMetaIndexEntry& entry) { entry.print(os); return os; } @@ -760,19 +759,19 @@ struct DeviceMetaIndexEntry : IMetaIndexEntry { DeviceMetaIndexEntry() = default; - DeviceMetaIndexEntry(const std::shared_ptr &device_id, + DeviceMetaIndexEntry(const std::shared_ptr& device_id, const int64_t offset) : device_id_(device_id), offset_(offset) {} ~DeviceMetaIndexEntry() override = default; - static void self_deleter(DeviceMetaIndexEntry *ptr) { + static void self_deleter(DeviceMetaIndexEntry* ptr) { if (ptr) { ptr->~DeviceMetaIndexEntry(); } } - int serialize_to(common::ByteStream &out) override { + int serialize_to(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(device_id_->serialize(out))) { } else if (RET_FAIL( @@ -781,10 +780,10 @@ struct DeviceMetaIndexEntry : IMetaIndexEntry { return ret; } - std::shared_ptr &get_device_id() { return device_id_; } + std::shared_ptr& get_device_id() { return device_id_; } - int deserialize_from(common::ByteStream &in, - common::PageArena *pa) override { + int deserialize_from(common::ByteStream& in, + common::PageArena* pa) override { int ret = common::E_OK; device_id_ = std::make_shared("init"); if (RET_FAIL(device_id_->deserialize(in))) { @@ -804,11 +803,11 @@ struct DeviceMetaIndexEntry : IMetaIndexEntry { std::shared_ptr get_device_id() const override { return device_id_; } - std::shared_ptr clone(common::PageArena *pa) override { + std::shared_ptr clone(common::PageArena* pa) override { return std::make_shared(device_id_, offset_); } #ifndef NDEBUG - void print(std::ostream &os) const override { + void print(std::ostream& os) const override { os << "name=" << device_id_ << ", offset=" << offset_; } #endif @@ -821,19 +820,19 @@ struct MeasurementMetaIndexEntry : IMetaIndexEntry { ~MeasurementMetaIndexEntry() override = default; MeasurementMetaIndexEntry() = default; - MeasurementMetaIndexEntry(const common::String &name, const int64_t offset, - common::PageArena &pa) { + MeasurementMetaIndexEntry(const common::String& name, const int64_t offset, + common::PageArena& pa) { offset_ = offset; name_.dup_from(name, pa); } - FORCE_INLINE int init(const std::string &str, const int64_t offset, - common::PageArena &pa) { + FORCE_INLINE int init(const std::string& str, const int64_t offset, + common::PageArena& pa) { offset_ = offset; return name_.dup_from(str, pa); } - int serialize_to(common::ByteStream &out) override { + int serialize_to(common::ByteStream& out) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::write_mystring(name_, out))) { } else if (RET_FAIL( @@ -842,8 +841,8 @@ struct MeasurementMetaIndexEntry : IMetaIndexEntry { return ret; } - int deserialize_from(common::ByteStream &in, - common::PageArena *pa) override { + int deserialize_from(common::ByteStream& in, + common::PageArena* pa) override { int ret = common::E_OK; if (RET_FAIL(common::SerializationUtil::read_mystring(name_, pa, in))) { } else if (RET_FAIL(common::SerializationUtil::read_i64(offset_, in))) { @@ -863,11 +862,11 @@ struct MeasurementMetaIndexEntry : IMetaIndexEntry { std::shared_ptr get_device_id() const override { return nullptr; } - std::shared_ptr clone(common::PageArena *pa) override { + std::shared_ptr clone(common::PageArena* pa) override { return std::make_shared(name_, offset_, *pa); } #ifndef NDEBUG - void print(std::ostream &os) const override { + void print(std::ostream& os) const override { os << "name=" << name_ << ", offset=" << offset_; } #endif @@ -881,7 +880,7 @@ enum MetaIndexNodeType { INVALID_META_NODE_TYPE = 4, }; #ifndef NDEBUG -static const char *meta_index_node_type_names[5] = { +static const char* meta_index_node_type_names[5] = { "INTERNAL_DEVICE", "LEAF_DEVICE", "INTERNAL_MEASUREMENT", "LEAF_MEASUREMENT", "INVALID_META_NODE_TYPE"}; #endif @@ -892,9 +891,9 @@ struct MetaIndexNode { std::vector> children_; int64_t end_offset_; MetaIndexNodeType node_type_; - common::PageArena *pa_; + common::PageArena* pa_; - explicit MetaIndexNode(common::PageArena *pa) + explicit MetaIndexNode(common::PageArena* pa) : children_(), end_offset_(0), node_type_(), pa_(pa) {} std::shared_ptr peek() { @@ -906,7 +905,7 @@ struct MetaIndexNode { ~MetaIndexNode() {} - static void self_deleter(MetaIndexNode *ptr) { + static void self_deleter(MetaIndexNode* ptr) { if (ptr) { ptr->~MetaIndexNode(); } @@ -914,10 +913,10 @@ struct MetaIndexNode { int binary_search_children( std::shared_ptr key, bool exact_search, - std::shared_ptr &ret_index_entry, - int64_t &ret_end_offset); + std::shared_ptr& ret_index_entry, + int64_t& ret_end_offset); - int serialize_to(common::ByteStream &out) { + int serialize_to(common::ByteStream& out) { int ret = common::E_OK; #if DEBUG_SE int64_t start_pos = out.total_size(); @@ -946,12 +945,12 @@ struct MetaIndexNode { return ret; } - int deserialize_from(const char *buf, int len) { + int deserialize_from(const char* buf, int len) { common::ByteStream bs; bs.wrap_from(buf, len); return deserialize_from(bs); } - int deserialize_from(common::ByteStream &in) { + int deserialize_from(common::ByteStream& in) { int ret = common::E_OK; uint32_t children_size = 0; if (RET_FAIL( @@ -959,7 +958,7 @@ struct MetaIndexNode { return ret; } for (uint32_t i = 0; i < children_size && IS_SUCC(ret); i++) { - void *entry_buf = pa_->alloc(sizeof(MeasurementMetaIndexEntry)); + void* entry_buf = pa_->alloc(sizeof(MeasurementMetaIndexEntry)); if (IS_NULL(entry_buf)) { return common::E_OOM; } @@ -987,12 +986,12 @@ struct MetaIndexNode { #endif return ret; } - int device_deserialize_from(const char *buf, int len) { + int device_deserialize_from(const char* buf, int len) { common::ByteStream bs; bs.wrap_from(buf, len); return device_deserialize_from(bs); } - int device_deserialize_from(common::ByteStream &in) { + int device_deserialize_from(common::ByteStream& in) { int ret = common::E_OK; uint32_t children_size = 0; if (RET_FAIL( @@ -1000,11 +999,11 @@ struct MetaIndexNode { return ret; } for (uint32_t i = 0; i < children_size && IS_SUCC(ret); i++) { - void *entry_buf = pa_->alloc(sizeof(DeviceMetaIndexEntry)); + void* entry_buf = pa_->alloc(sizeof(DeviceMetaIndexEntry)); if (IS_NULL(entry_buf)) { return common::E_OOM; } - auto *entry_ptr = new (entry_buf) DeviceMetaIndexEntry(); + auto* entry_ptr = new (entry_buf) DeviceMetaIndexEntry(); auto entry = std::shared_ptr( entry_ptr, DeviceMetaIndexEntry::self_deleter); if (RET_FAIL(entry->deserialize_from(in, pa_))) { @@ -1030,8 +1029,8 @@ struct MetaIndexNode { } #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, - const MetaIndexNode &node) { + friend std::ostream& operator<<(std::ostream& os, + const MetaIndexNode& node) { os << "end_offset=" << node.end_offset_ << ", node_type=" << meta_index_node_type_names[node.node_type_]; @@ -1073,16 +1072,16 @@ struct TsFileMeta { DeviceNodeMap; std::map> table_metadata_index_node_map_; - std::unordered_map tsfile_properties_; + std::unordered_map tsfile_properties_; typedef std::unordered_map> TableSchemasMap; TableSchemasMap table_schemas_; int64_t meta_offset_; - BloomFilter *bloom_filter_; - common::PageArena *page_arena_; + BloomFilter* bloom_filter_; + common::PageArena* page_arena_; - int get_table_metaindex_node(const std::string &table_name, - MetaIndexNode *&ret_node) { + int get_table_metaindex_node(const std::string& table_name, + MetaIndexNode*& ret_node) { std::map>::iterator it = table_metadata_index_node_map_.find(table_name); if (it == table_metadata_index_node_map_.end()) { @@ -1092,8 +1091,8 @@ struct TsFileMeta { return common::E_OK; } - int get_table_schema(const std::string &table_name, - std::shared_ptr &ret_schema) { + int get_table_schema(const std::string& table_name, + std::shared_ptr& ret_schema) { TableSchemasMap::iterator it = table_schemas_.find(table_name); if (it == table_schemas_.end()) { return common::E_TABLE_NOT_EXIST; @@ -1105,7 +1104,7 @@ struct TsFileMeta { TsFileMeta() : meta_offset_(0), bloom_filter_(nullptr), page_arena_(nullptr) {} - explicit TsFileMeta(common::PageArena *pa) + explicit TsFileMeta(common::PageArena* pa) : meta_offset_(0), bloom_filter_(nullptr), page_arena_(pa) {} ~TsFileMeta() { if (bloom_filter_ != nullptr) { @@ -1114,19 +1113,21 @@ struct TsFileMeta { for (auto properties : tsfile_properties_) { if (properties.second != nullptr) { delete properties.second; + properties.second = nullptr; } } + tsfile_properties_.clear(); table_metadata_index_node_map_.clear(); table_schemas_.clear(); } - int serialize_to(common::ByteStream &out); + int serialize_to(common::ByteStream& out); - int deserialize_from(common::ByteStream &in); + int deserialize_from(common::ByteStream& in); #ifndef NDEBUG - friend std::ostream &operator<<(std::ostream &os, - const TsFileMeta &tsfile_meta) { + friend std::ostream& operator<<(std::ostream& os, + const TsFileMeta& tsfile_meta) { os << "meta_offset=" << tsfile_meta.meta_offset_; return os; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.cc b/cpp/src/cwrapper/tsfile_cwrapper.cc index 1b09db49c..8a8f462b8 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.cc +++ b/cpp/src/cwrapper/tsfile_cwrapper.cc @@ -72,7 +72,7 @@ int set_global_compression(uint8_t compression) { return common::set_global_compression(compression); } -WriteFile write_file_new(const char *pathname, ERRNO *err_code) { +WriteFile write_file_new(const char* pathname, ERRNO* err_code) { int ret; init_tsfile_config(); @@ -86,14 +86,14 @@ WriteFile write_file_new(const char *pathname, ERRNO *err_code) { flags |= O_BINARY; #endif mode_t mode = 0666; - storage::WriteFile *file = new storage::WriteFile; + storage::WriteFile* file = new storage::WriteFile; ret = file->create(pathname, flags, mode); *err_code = ret; return file; } -TsFileWriter tsfile_writer_new(WriteFile file, TableSchema *schema, - ERRNO *err_code) { +TsFileWriter tsfile_writer_new(WriteFile file, TableSchema* schema, + ERRNO* err_code) { if (schema->column_num == 0) { *err_code = common::E_INVALID_SCHEMA; return nullptr; @@ -121,19 +121,19 @@ TsFileWriter tsfile_writer_new(WriteFile file, TableSchema *schema, static_cast(cur_schema.column_category)); } - storage::TableSchema *table_schema = + storage::TableSchema* table_schema = new storage::TableSchema(schema->table_name, column_schemas); auto table_writer = new storage::TsFileTableWriter( - static_cast(file), table_schema); + static_cast(file), table_schema); delete table_schema; *err_code = common::E_OK; return table_writer; } TsFileWriter tsfile_writer_new_with_memory_threshold(WriteFile file, - TableSchema *schema, + TableSchema* schema, uint64_t memory_threshold, - ERRNO *err_code) { + ERRNO* err_code) { if (schema->column_num == 0) { *err_code = common::E_INVALID_SCHEMA; return nullptr; @@ -154,17 +154,17 @@ TsFileWriter tsfile_writer_new_with_memory_threshold(WriteFile file, static_cast(cur_schema.column_category)); } - storage::TableSchema *table_schema = + storage::TableSchema* table_schema = new storage::TableSchema(schema->table_name, column_schemas); - auto table_writer = - new storage::TsFileTableWriter(static_cast(file), - table_schema, memory_threshold); + auto table_writer = new storage::TsFileTableWriter( + static_cast(file), table_schema, memory_threshold); *err_code = common::E_OK; delete table_schema; return table_writer; } -TsFileReader tsfile_reader_new(const char *pathname, ERRNO *err_code) { + +TsFileReader tsfile_reader_new(const char* pathname, ERRNO* err_code) { init_tsfile_config(); auto reader = new storage::TsFileReader(); int ret = reader->open(pathname); @@ -180,7 +180,7 @@ ERRNO tsfile_writer_close(TsFileWriter writer) { if (writer == nullptr) { return common::E_OK; } - auto *w = static_cast(writer); + auto* w = static_cast(writer); int ret = w->flush(); if (ret != common::E_OK) { return ret; @@ -194,12 +194,12 @@ ERRNO tsfile_writer_close(TsFileWriter writer) { } ERRNO tsfile_reader_close(TsFileReader reader) { - auto *ts_reader = static_cast(reader); + auto* ts_reader = static_cast(reader); delete ts_reader; return common::E_OK; } -Tablet tablet_new(char **column_name_list, TSDataType *data_types, +Tablet tablet_new(char** column_name_list, TSDataType* data_types, uint32_t column_num, uint32_t max_rows) { std::vector measurement_list; std::vector data_type_list; @@ -212,20 +212,20 @@ Tablet tablet_new(char **column_name_list, TSDataType *data_types, } uint32_t tablet_get_cur_row_size(Tablet tablet) { - return static_cast(tablet)->get_cur_row_size(); + return static_cast(tablet)->get_cur_row_size(); } ERRNO tablet_add_timestamp(Tablet tablet, uint32_t row_index, Timestamp timestamp) { - return static_cast(tablet)->add_timestamp(row_index, - timestamp); + return static_cast(tablet)->add_timestamp(row_index, + timestamp); } #define TABLET_ADD_VALUE_BY_NAME_DEF(type) \ ERRNO tablet_add_value_by_name_##type(Tablet tablet, uint32_t row_index, \ - const char *column_name, \ + const char* column_name, \ const type value) { \ - return static_cast(tablet)->add_value( \ + return static_cast(tablet)->add_value( \ row_index, storage::to_lower(column_name), value); \ } TABLET_ADD_VALUE_BY_NAME_DEF(int32_t); @@ -235,9 +235,9 @@ TABLET_ADD_VALUE_BY_NAME_DEF(double); TABLET_ADD_VALUE_BY_NAME_DEF(bool); ERRNO tablet_add_value_by_name_string(Tablet tablet, uint32_t row_index, - const char *column_name, - const char *value) { - return static_cast(tablet)->add_value( + const char* column_name, + const char* value) { + return static_cast(tablet)->add_value( row_index, storage::to_lower(column_name), common::String(value)); } @@ -245,14 +245,14 @@ ERRNO tablet_add_value_by_name_string(Tablet tablet, uint32_t row_index, ERRNO tablet_add_value_by_index_##type(Tablet tablet, uint32_t row_index, \ uint32_t column_index, \ const type value) { \ - return static_cast(tablet)->add_value( \ + return static_cast(tablet)->add_value( \ row_index, column_index, value); \ } ERRNO tablet_add_value_by_index_string(Tablet tablet, uint32_t row_index, uint32_t column_index, - const char *value) { - return static_cast(tablet)->add_value( + const char* value) { + return static_cast(tablet)->add_value( row_index, column_index, common::String(value)); } @@ -263,16 +263,16 @@ TABLE_ADD_VALUE_BY_INDEX_DEF(double); TABLE_ADD_VALUE_BY_INDEX_DEF(bool); // TsRecord API -TsRecord _ts_record_new(const char *device_id, Timestamp timestamp, +TsRecord _ts_record_new(const char* device_id, Timestamp timestamp, int timeseries_num) { - auto *record = new storage::TsRecord(timestamp, device_id, timeseries_num); + auto* record = new storage::TsRecord(timestamp, device_id, timeseries_num); return record; } #define INSERT_DATA_INTO_TS_RECORD_BY_NAME_DEF(type) \ ERRNO _insert_data_into_ts_record_by_name_##type( \ - TsRecord data, const char *measurement_name, type value) { \ - auto *record = (storage::TsRecord *)data; \ + TsRecord data, const char* measurement_name, type value) { \ + auto* record = (storage::TsRecord*)data; \ storage::DataPoint point(measurement_name, value); \ if (record->points_.size() + 1 > record->points_.capacity()) \ return common::E_BUF_NOT_ENOUGH; \ @@ -302,8 +302,8 @@ return writer; */ ERRNO tsfile_writer_write(TsFileWriter writer, Tablet tablet) { - auto *w = static_cast(writer); - auto *tbl = static_cast(tablet); + auto* w = static_cast(writer); + auto* tbl = static_cast(tablet); return w->write_table(*tbl); } @@ -314,12 +314,12 @@ ERRNO tsfile_writer_write(TsFileWriter writer, Tablet tablet) { // Query -ResultSet tsfile_query_table(TsFileReader reader, const char *table_name, - char **columns, uint32_t column_num, +ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, + char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, - ERRNO *err_code) { - auto *r = static_cast(reader); - storage::ResultSet *table_result_set = nullptr; + ERRNO* err_code) { + auto* r = static_cast(reader); + storage::ResultSet* table_result_set = nullptr; std::vector column_names; for (uint32_t i = 0; i < column_num; i++) { column_names.emplace_back(columns[i]); @@ -329,8 +329,22 @@ ResultSet tsfile_query_table(TsFileReader reader, const char *table_name, return table_result_set; } -bool tsfile_result_set_next(ResultSet result_set, ERRNO *err_code) { - auto *r = static_cast(result_set); +ResultSet tsfile_query_table_on_tree(TsFileReader reader, char** columns, + uint32_t column_num, Timestamp start_time, + Timestamp end_time, ERRNO* err_code) { + auto* r = static_cast(reader); + storage::ResultSet* table_result_set = nullptr; + std::vector column_names; + for (uint32_t i = 0; i < column_num; i++) { + column_names.emplace_back(columns[i]); + } + *err_code = r->query_table_on_tree(column_names, start_time, end_time, + table_result_set); + return table_result_set; +} + +bool tsfile_result_set_next(ResultSet result_set, ERRNO* err_code) { + auto* r = static_cast(result_set); bool has_next = true; int ret = common::E_OK; ret = r->next(has_next); @@ -343,8 +357,8 @@ bool tsfile_result_set_next(ResultSet result_set, ERRNO *err_code) { #define TSFILE_RESULT_SET_GET_VALUE_BY_NAME_DEF(type) \ type tsfile_result_set_get_value_by_name_##type(ResultSet result_set, \ - const char *column_name) { \ - auto *r = static_cast(result_set); \ + const char* column_name) { \ + auto* r = static_cast(result_set); \ std::string column_name_(column_name); \ return r->get_value(column_name_); \ } @@ -354,13 +368,13 @@ TSFILE_RESULT_SET_GET_VALUE_BY_NAME_DEF(int32_t); TSFILE_RESULT_SET_GET_VALUE_BY_NAME_DEF(int64_t); TSFILE_RESULT_SET_GET_VALUE_BY_NAME_DEF(float); TSFILE_RESULT_SET_GET_VALUE_BY_NAME_DEF(double); -char *tsfile_result_set_get_value_by_name_string(ResultSet result_set, - const char *column_name) { - auto *r = static_cast(result_set); +char* tsfile_result_set_get_value_by_name_string(ResultSet result_set, + const char* column_name) { + auto* r = static_cast(result_set); std::string column_name_(column_name); - common::String *ret = r->get_value(column_name_); + common::String* ret = r->get_value(column_name_); // Caller should free return's char* 's space. - char *dup = (char *)malloc(ret->len_ + 1); + char* dup = (char*)malloc(ret->len_ + 1); if (dup) { memcpy(dup, ret->buf_, ret->len_); dup[ret->len_] = '\0'; @@ -371,7 +385,7 @@ char *tsfile_result_set_get_value_by_name_string(ResultSet result_set, #define TSFILE_RESULT_SET_GET_VALUE_BY_INDEX_DEF(type) \ type tsfile_result_set_get_value_by_index_##type(ResultSet result_set, \ uint32_t column_index) { \ - auto *r = static_cast(result_set); \ + auto* r = static_cast(result_set); \ return r->get_value(column_index); \ } @@ -381,12 +395,12 @@ TSFILE_RESULT_SET_GET_VALUE_BY_INDEX_DEF(float); TSFILE_RESULT_SET_GET_VALUE_BY_INDEX_DEF(double); TSFILE_RESULT_SET_GET_VALUE_BY_INDEX_DEF(bool); -char *tsfile_result_set_get_value_by_index_string(ResultSet result_set, +char* tsfile_result_set_get_value_by_index_string(ResultSet result_set, uint32_t column_index) { - auto *r = static_cast(result_set); - common::String *ret = r->get_value(column_index); + auto* r = static_cast(result_set); + common::String* ret = r->get_value(column_index); // Caller should free return's char* 's space. - char *dup = (char *)malloc(ret->len_ + 1); + char* dup = (char*)malloc(ret->len_ + 1); if (dup) { memcpy(dup, ret->buf_, ret->len_); dup[ret->len_] = '\0'; @@ -395,19 +409,19 @@ char *tsfile_result_set_get_value_by_index_string(ResultSet result_set, } bool tsfile_result_set_is_null_by_name(ResultSet result_set, - const char *column_name) { - auto *r = static_cast(result_set); + const char* column_name) { + auto* r = static_cast(result_set); return r->is_null(column_name); } bool tsfile_result_set_is_null_by_index(const ResultSet result_set, const uint32_t column_index) { - auto *r = static_cast(result_set); + auto* r = static_cast(result_set); return r->is_null(column_index); } ResultSetMetaData tsfile_result_set_get_metadata(ResultSet result_set) { - auto *r = static_cast(result_set); + auto* r = static_cast(result_set); if (result_set == NULL) { return ResultSetMetaData(); } @@ -417,8 +431,8 @@ ResultSetMetaData tsfile_result_set_get_metadata(ResultSet result_set) { r->get_metadata(); meta_data.column_num = result_set_metadata->get_column_count(); meta_data.column_names = - static_cast(malloc(meta_data.column_num * sizeof(char *))); - meta_data.data_types = static_cast( + static_cast(malloc(meta_data.column_num * sizeof(char*))); + meta_data.data_types = static_cast( malloc(meta_data.column_num * sizeof(TSDataType))); for (int i = 0; i < meta_data.column_num; i++) { meta_data.column_names[i] = @@ -429,7 +443,7 @@ ResultSetMetaData tsfile_result_set_get_metadata(ResultSet result_set) { return meta_data; } -char *tsfile_result_set_metadata_get_column_name(ResultSetMetaData result_set, +char* tsfile_result_set_metadata_get_column_name(ResultSetMetaData result_set, uint32_t column_index) { if (column_index > (uint32_t)result_set.column_num) { return nullptr; @@ -482,15 +496,15 @@ int tsfile_result_set_metadata_get_column_num(ResultSetMetaData result_set) { // } TableSchema tsfile_reader_get_table_schema(TsFileReader reader, - const char *table_name) { - auto *r = static_cast(reader); + const char* table_name) { + auto* r = static_cast(reader); auto table_shcema = r->get_table_schema(table_name); TableSchema ret_schema; ret_schema.table_name = strdup(table_shcema->get_table_name().c_str()); int column_num = table_shcema->get_columns_num(); ret_schema.column_num = column_num; ret_schema.column_schemas = - static_cast(malloc(sizeof(ColumnSchema) * column_num)); + static_cast(malloc(sizeof(ColumnSchema) * column_num)); for (int i = 0; i < column_num; i++) { auto column_schema = table_shcema->get_measurement_schemas()[i]; ret_schema.column_schemas[i].column_name = @@ -504,18 +518,18 @@ TableSchema tsfile_reader_get_table_schema(TsFileReader reader, return ret_schema; } -TableSchema *tsfile_reader_get_all_table_schemas(TsFileReader reader, - uint32_t *size) { - auto *r = static_cast(reader); +TableSchema* tsfile_reader_get_all_table_schemas(TsFileReader reader, + uint32_t* size) { + auto* r = static_cast(reader); auto table_schemas = r->get_all_table_schemas(); size_t table_num = table_schemas.size(); - TableSchema *ret = - static_cast(malloc(sizeof(TableSchema) * table_num)); + TableSchema* ret = + static_cast(malloc(sizeof(TableSchema) * table_num)); for (size_t i = 0; i < table_schemas.size(); i++) { ret[i].table_name = strdup(table_schemas[i]->get_table_name().c_str()); int column_num = table_schemas[i]->get_columns_num(); ret[i].column_num = column_num; - ret[i].column_schemas = static_cast( + ret[i].column_schemas = static_cast( malloc(column_num * sizeof(ColumnSchema))); auto column_schemas = table_schemas[i]->get_measurement_schemas(); for (int j = 0; j < column_num; j++) { @@ -532,24 +546,77 @@ TableSchema *tsfile_reader_get_all_table_schemas(TsFileReader reader, return ret; } +DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, + uint32_t* size) { + auto* r = static_cast(reader); + auto device_ids = r->get_all_device_ids(); + if (size == nullptr) { + return nullptr; + } + *size = static_cast(device_ids.size()); + if (device_ids.empty()) { + return nullptr; + } + + DeviceSchema* device_schema = static_cast( + malloc(sizeof(DeviceSchema) * device_ids.size())); + if (device_schema == nullptr) { + *size = 0; + return nullptr; + } + + size_t device_index = 0; + for (const auto& device_id : device_ids) { + DeviceSchema& cur_schema = device_schema[device_index++]; + std::string device_name = + device_id == nullptr ? "" : device_id->get_device_name(); + cur_schema.device_name = strdup(device_name.c_str()); + cur_schema.timeseries_num = 0; + cur_schema.timeseries_schema = nullptr; + + std::vector schemas; + int ret = r->get_timeseries_schema(device_id, schemas); + if (ret != common::E_OK || schemas.empty()) { + continue; + } + + cur_schema.timeseries_num = static_cast(schemas.size()); + cur_schema.timeseries_schema = static_cast( + malloc(sizeof(TimeseriesSchema) * schemas.size())); + for (size_t i = 0; i < schemas.size(); ++i) { + const auto& measurement_schema = schemas[i]; + cur_schema.timeseries_schema[i].timeseries_name = + strdup(measurement_schema.measurement_name_.c_str()); + cur_schema.timeseries_schema[i].data_type = + static_cast(measurement_schema.data_type_); + cur_schema.timeseries_schema[i].encoding = + static_cast(measurement_schema.encoding_); + cur_schema.timeseries_schema[i].compression = + static_cast( + measurement_schema.compression_type_); + } + } + return device_schema; +} + // delete pointer -void _free_tsfile_ts_record(TsRecord *record) { +void _free_tsfile_ts_record(TsRecord* record) { if (*record != nullptr) { - delete static_cast(*record); + delete static_cast(*record); } *record = nullptr; } -void free_tablet(Tablet *tablet) { +void free_tablet(Tablet* tablet) { if (*tablet != nullptr) { - delete static_cast(*tablet); + delete static_cast(*tablet); } *tablet = nullptr; } -void free_tsfile_result_set(ResultSet *result_set) { +void free_tsfile_result_set(ResultSet* result_set) { if (*result_set != nullptr) { - delete static_cast(*result_set); + delete static_cast(*result_set); } *result_set = nullptr; } @@ -583,15 +650,15 @@ void free_table_schema(TableSchema schema) { } void free_column_schema(ColumnSchema schema) { free(schema.column_name); } -void free_write_file(WriteFile *write_file) { - auto f = static_cast(*write_file); +void free_write_file(WriteFile* write_file) { + auto f = static_cast(*write_file); delete f; *write_file = nullptr; } // For Python API -TsFileWriter _tsfile_writer_new(const char *pathname, uint64_t memory_threshold, - ERRNO *err_code) { +TsFileWriter _tsfile_writer_new(const char* pathname, uint64_t memory_threshold, + ERRNO* err_code) { init_tsfile_config(); auto writer = new storage::TsFileWriter(); int flags = O_WRONLY | O_CREAT | O_TRUNC; @@ -608,9 +675,9 @@ TsFileWriter _tsfile_writer_new(const char *pathname, uint64_t memory_threshold, return writer; } -Tablet _tablet_new_with_target_name(const char *device_id, - char **column_name_list, - TSDataType *data_types, int column_num, +Tablet _tablet_new_with_target_name(const char* device_id, + char** column_name_list, + TSDataType* data_types, int column_num, int max_rows) { std::vector measurement_list; std::vector data_type_list; @@ -627,27 +694,27 @@ Tablet _tablet_new_with_target_name(const char *device_id, } } -ERRNO _tsfile_writer_register_table(TsFileWriter writer, TableSchema *schema) { - std::vector measurement_schemas; +ERRNO _tsfile_writer_register_table(TsFileWriter writer, TableSchema* schema) { + std::vector measurement_schemas; std::vector column_categories; measurement_schemas.resize(schema->column_num); for (int i = 0; i < schema->column_num; i++) { - ColumnSchema *cur_schema = schema->column_schemas + i; + ColumnSchema* cur_schema = schema->column_schemas + i; measurement_schemas[i] = new storage::MeasurementSchema( cur_schema->column_name, static_cast(cur_schema->data_type)); column_categories.push_back( static_cast(cur_schema->column_category)); } - auto tsfile_writer = static_cast(writer); + auto tsfile_writer = static_cast(writer); return tsfile_writer->register_table(std::make_shared( schema->table_name, measurement_schemas, column_categories)); } ERRNO _tsfile_writer_register_timeseries(TsFileWriter writer, - const char *device_id, - const TimeseriesSchema *schema) { - auto *w = static_cast(writer); + const char* device_id, + const TimeseriesSchema* schema) { + auto* w = static_cast(writer); int ret = w->register_timeseries( device_id, @@ -660,8 +727,8 @@ ERRNO _tsfile_writer_register_timeseries(TsFileWriter writer, } ERRNO _tsfile_writer_register_device(TsFileWriter writer, - const device_schema *device_schema) { - auto *w = static_cast(writer); + const device_schema* device_schema) { + auto* w = static_cast(writer); for (int column_id = 0; column_id < device_schema->timeseries_num; column_id++) { TimeseriesSchema schema = device_schema->timeseries_schema[column_id]; @@ -680,26 +747,26 @@ ERRNO _tsfile_writer_register_device(TsFileWriter writer, } ERRNO _tsfile_writer_write_tablet(TsFileWriter writer, Tablet tablet) { - auto *w = static_cast(writer); - const auto *tbl = static_cast(tablet); + auto* w = static_cast(writer); + const auto* tbl = static_cast(tablet); return w->write_tablet(*tbl); } ERRNO _tsfile_writer_write_table(TsFileWriter writer, Tablet tablet) { - auto *w = static_cast(writer); - auto *tbl = static_cast(tablet); + auto* w = static_cast(writer); + auto* tbl = static_cast(tablet); return w->write_table(*tbl); } ERRNO _tsfile_writer_write_ts_record(TsFileWriter writer, TsRecord data) { - auto *w = static_cast(writer); - const storage::TsRecord *record = static_cast(data); + auto* w = static_cast(writer); + const storage::TsRecord* record = static_cast(data); const int ret = w->write_record(*record); return ret; } ERRNO _tsfile_writer_close(TsFileWriter writer) { - auto *w = static_cast(writer); + auto* w = static_cast(writer); int ret = w->flush(); if (ret != common::E_OK) { return ret; @@ -713,23 +780,23 @@ ERRNO _tsfile_writer_close(TsFileWriter writer) { } ERRNO _tsfile_writer_flush(TsFileWriter writer) { - auto *w = static_cast(writer); + auto* w = static_cast(writer); return w->flush(); } ResultSet _tsfile_reader_query_device(TsFileReader reader, - const char *device_name, - char **sensor_name, uint32_t sensor_num, + const char* device_name, + char** sensor_name, uint32_t sensor_num, Timestamp start_time, Timestamp end_time, - ERRNO *err_code) { - auto *r = static_cast(reader); + ERRNO* err_code) { + auto* r = static_cast(reader); std::vector selected_paths; selected_paths.reserve(sensor_num); for (uint32_t i = 0; i < sensor_num; i++) { selected_paths.push_back(std::string(device_name) + "." + std::string(sensor_name[i])); } - storage::ResultSet *qds = nullptr; + storage::ResultSet* qds = nullptr; *err_code = r->query(selected_paths, start_time, end_time, qds); return qds; } diff --git a/cpp/src/cwrapper/tsfile_cwrapper.h b/cpp/src/cwrapper/tsfile_cwrapper.h index 75dc03643..32f85aa4f 100644 --- a/cpp/src/cwrapper/tsfile_cwrapper.h +++ b/cpp/src/cwrapper/tsfile_cwrapper.h @@ -428,6 +428,10 @@ ResultSet tsfile_query_table(TsFileReader reader, const char* table_name, char** columns, uint32_t column_num, Timestamp start_time, Timestamp end_time, ERRNO* err_code); + +ResultSet tsfile_query_table_on_tree(TsFileReader reader, char** columns, + uint32_t column_num, Timestamp start_time, + Timestamp end_time, ERRNO* err_code); // ResultSet tsfile_reader_query_device(TsFileReader reader, // const char* device_name, // char** sensor_name, uint32_t sensor_num, @@ -572,6 +576,15 @@ TableSchema tsfile_reader_get_table_schema(TsFileReader reader, TableSchema* tsfile_reader_get_all_table_schemas(TsFileReader reader, uint32_t* size); +/** + * @brief Gets all timeseries schema in the tsfile. + * + * @return DeviceSchema list, contains timeseries info. + * @note Caller should call free_device_schema and free to free the ptr. + */ +DeviceSchema* tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, + uint32_t* size); + // Close and free resource. void free_tablet(Tablet* tablet); void free_tsfile_result_set(ResultSet* result_set); diff --git a/cpp/src/file/tsfile_io_reader.cc b/cpp/src/file/tsfile_io_reader.cc index 273f09a44..e16b6b4a2 100644 --- a/cpp/src/file/tsfile_io_reader.cc +++ b/cpp/src/file/tsfile_io_reader.cc @@ -24,7 +24,7 @@ using namespace common; namespace storage { -int TsFileIOReader::init(const std::string &file_path) { +int TsFileIOReader::init(const std::string& file_path) { int ret = E_OK; read_file_ = new ReadFile; read_file_created_ = true; @@ -33,7 +33,7 @@ int TsFileIOReader::init(const std::string &file_path) { return ret; } -int TsFileIOReader::init(ReadFile *read_file) { +int TsFileIOReader::init(ReadFile* read_file) { if (IS_NULL(read_file)) { ASSERT(false); return E_INVALID_ARG; @@ -56,9 +56,9 @@ void TsFileIOReader::reset() { } int TsFileIOReader::alloc_ssi(std::shared_ptr device_id, - const std::string &measurement_name, - TsFileSeriesScanIterator *&ssi, - common::PageArena &pa, Filter *time_filter) { + const std::string& measurement_name, + TsFileSeriesScanIterator*& ssi, + common::PageArena& pa, Filter* time_filter) { int ret = E_OK; if (RET_FAIL(load_tsfile_meta_if_necessary())) { } else { @@ -80,7 +80,7 @@ int TsFileIOReader::alloc_ssi(std::shared_ptr device_id, return ret; } -void TsFileIOReader::revert_ssi(TsFileSeriesScanIterator *ssi) { +void TsFileIOReader::revert_ssi(TsFileSeriesScanIterator* ssi) { if (ssi != nullptr) { ssi->destroy(); delete ssi; @@ -89,12 +89,12 @@ void TsFileIOReader::revert_ssi(TsFileSeriesScanIterator *ssi) { int TsFileIOReader::get_device_timeseries_meta_without_chunk_meta( std::shared_ptr device_id, - std::vector ×eries_indexs, PageArena &pa) { + std::vector& timeseries_indexs, PageArena& pa) { int ret = E_OK; load_tsfile_meta_if_necessary(); std::shared_ptr meta_index_entry; int64_t end_offset; - std::vector, int64_t> > + std::vector, int64_t>> meta_index_entry_list; if (RET_FAIL(load_device_index_entry( std::make_shared(device_id), meta_index_entry, @@ -108,8 +108,8 @@ int TsFileIOReader::get_device_timeseries_meta_without_chunk_meta( return ret; } -bool TsFileIOReader::filter_stasify(ITimeseriesIndex *ts_index, - Filter *time_filter) { +bool TsFileIOReader::filter_stasify(ITimeseriesIndex* ts_index, + Filter* time_filter) { ASSERT(ts_index->get_statistic() != nullptr); return time_filter->satisfy(ts_index->get_statistic()); } @@ -141,7 +141,7 @@ int TsFileIOReader::load_tsfile_meta() { // Step 1: reader the tsfile_meta_size // 1.1 prepare reader buffer int32_t alloc_size = UTIL_MIN(TSFILE_READ_IO_SIZE, file_size()); - char *read_buf = (char *)mem_alloc(alloc_size, MOD_TSFILE_READER); + char* read_buf = (char*)mem_alloc(alloc_size, MOD_TSFILE_READER); if (IS_NULL(read_buf)) { return E_OOM; } @@ -159,7 +159,7 @@ int TsFileIOReader::load_tsfile_meta() { // 1.3 deserialize tsfile_meta_size if (IS_SUCC(ret)) { // deserialize tsfile_meta_size - char *size_buf = read_buf + alloc_size - TAIL_MAGIC_AND_META_SIZE_SIZE; + char* size_buf = read_buf + alloc_size - TAIL_MAGIC_AND_META_SIZE_SIZE; tsfile_meta_size = SerializationUtil::read_ui32(size_buf); ASSERT(tsfile_meta_size > 0 && tsfile_meta_size <= (1ll << 20)); } @@ -167,12 +167,12 @@ int TsFileIOReader::load_tsfile_meta() { // Step 2: reader TsFileMeta if (IS_SUCC(ret)) { // 2.1 prepare enough buffer (use the previous buffer if can). - char *tsfile_meta_buf = nullptr; + char* tsfile_meta_buf = nullptr; if (tsfile_meta_size + TAIL_MAGIC_AND_META_SIZE_SIZE > (uint32_t)alloc_size) { // prepare buffer to re-reader from start of tsfile_meta - char *old_read_buf = read_buf; - read_buf = (char *)mem_realloc(read_buf, tsfile_meta_size); + char* old_read_buf = read_buf; + read_buf = (char*)mem_realloc(read_buf, tsfile_meta_size); if (IS_NULL(read_buf)) { read_buf = old_read_buf; ret = E_OOM; @@ -211,8 +211,8 @@ int TsFileIOReader::load_tsfile_meta() { } int TsFileIOReader::load_timeseries_index_for_ssi( - std::shared_ptr device_id, const std::string &measurement_name, - TsFileSeriesScanIterator *&ssi) { + std::shared_ptr device_id, const std::string& measurement_name, + TsFileSeriesScanIterator*& ssi) { int ret = E_OK; std::shared_ptr device_index_entry; int64_t device_ie_end_offset = 0; @@ -224,19 +224,19 @@ int TsFileIOReader::load_timeseries_index_for_ssi( device_ie_end_offset))) { return ret; } - auto &pa = ssi->timeseries_index_pa_; + auto& pa = ssi->timeseries_index_pa_; int start_offset = device_index_entry->get_offset(), end_offset = device_ie_end_offset; ASSERT(start_offset < end_offset); const int32_t read_size = end_offset - start_offset; int32_t ret_read_len = 0; - char *data_buf = (char *)pa.alloc(read_size); - void *m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + char* data_buf = (char*)pa.alloc(read_size); + void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { return E_OOM; } - auto *top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); + auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); auto top_node = std::shared_ptr(top_node_ptr, MetaIndexNode::self_deleter); @@ -248,7 +248,7 @@ int TsFileIOReader::load_timeseries_index_for_ssi( } bool is_aligned = is_aligned_device(top_node); - TimeseriesIndex *timeseries_index = nullptr; + TimeseriesIndex* timeseries_index = nullptr; if (is_aligned) { if (RET_FAIL( get_time_column_metadata(top_node, timeseries_index, pa))) { @@ -267,8 +267,8 @@ int TsFileIOReader::load_timeseries_index_for_ssi( return ret; } if (is_aligned) { - auto *aligned_timeseries_index = - dynamic_cast(ssi->itimeseries_index_); + auto* aligned_timeseries_index = + dynamic_cast(ssi->itimeseries_index_); if (aligned_timeseries_index) { aligned_timeseries_index->time_ts_idx_ = timeseries_index; } @@ -277,10 +277,10 @@ int TsFileIOReader::load_timeseries_index_for_ssi( #if DEBUG_SE if (measurement_index_entry.name_.len_) { std::cout << "load timeseries index: " - << *((TimeseriesIndex *)ssi->itimeseries_index_) << std::endl; + << *((TimeseriesIndex*)ssi->itimeseries_index_) << std::endl; } else { std::cout << "load aligned timeseries index: " - << *((AlignedTimeseriesIndex *)ssi->itimeseries_index_) + << *((AlignedTimeseriesIndex*)ssi->itimeseries_index_) << std::endl; } #endif @@ -289,7 +289,7 @@ int TsFileIOReader::load_timeseries_index_for_ssi( int TsFileIOReader::load_device_index_entry( std::shared_ptr device_name, - std::shared_ptr &device_index_entry, int64_t &end_offset) { + std::shared_ptr& device_index_entry, int64_t& end_offset) { int ret = E_OK; std::shared_ptr device_id_comparable = std::dynamic_pointer_cast(device_name); @@ -322,10 +322,10 @@ int TsFileIOReader::load_device_index_entry( } int TsFileIOReader::load_measurement_index_entry( - const std::string &measurement_name_str, + const std::string& measurement_name_str, std::shared_ptr top_node, - std::shared_ptr &ret_measurement_index_entry, - int64_t &ret_end_offset) { + std::shared_ptr& ret_measurement_index_entry, + int64_t& ret_end_offset) { int ret = E_OK; // search from top_node in top-down way auto measurement_name = @@ -346,9 +346,9 @@ int TsFileIOReader::load_measurement_index_entry( } int TsFileIOReader::load_all_measurement_index_entry( - int64_t start_offset, int64_t end_offset, common::PageArena &pa, - std::vector, int64_t> > - &ret_measurement_index_entry) { + int64_t start_offset, int64_t end_offset, common::PageArena& pa, + std::vector, int64_t>>& + ret_measurement_index_entry) { #if DEBUG_SE std::cout << "load_measurement_index_entry: measurement_name_str= " << ", start_offset=" << start_offset @@ -359,12 +359,12 @@ int TsFileIOReader::load_all_measurement_index_entry( // 1. load top measuremnt_index_node const int32_t read_size = (int32_t)(end_offset - start_offset); int32_t ret_read_len = 0; - char *data_buf = (char *)pa.alloc(read_size); - void *m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + char* data_buf = (char*)pa.alloc(read_size); + void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { return E_OOM; } - auto *top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); + auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); auto top_node = std::shared_ptr(top_node_ptr, MetaIndexNode::self_deleter); if (RET_FAIL(read_file_->read(start_offset, data_buf, read_size, @@ -389,15 +389,15 @@ int TsFileIOReader::load_all_measurement_index_entry( int TsFileIOReader::read_device_meta_index(int32_t start_offset, int32_t end_offset, - common::PageArena &pa, - MetaIndexNode *&device_meta_index, + common::PageArena& pa, + MetaIndexNode*& device_meta_index, bool leaf) { int ret = E_OK; ASSERT(start_offset < end_offset); const int32_t read_size = (int32_t)(end_offset - start_offset); int32_t ret_read_len = 0; - char *data_buf = (char *)pa.alloc(read_size); - void *m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + char* data_buf = (char*)pa.alloc(read_size); + void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { return E_OOM; } @@ -415,8 +415,8 @@ int TsFileIOReader::read_device_meta_index(int32_t start_offset, int TsFileIOReader::get_timeseries_indexes( std::shared_ptr device_id, - const std::unordered_set &measurement_names, - std::vector ×eries_indexs, common::PageArena &pa) { + const std::unordered_set& measurement_names, + std::vector& timeseries_indexs, common::PageArena& pa) { int ret = E_OK; std::shared_ptr device_index_entry; int64_t device_ie_end_offset = 0; @@ -433,12 +433,12 @@ int TsFileIOReader::get_timeseries_indexes( ASSERT(start_offset < end_offset); const int32_t read_size = end_offset - start_offset; int32_t ret_read_len = 0; - char *data_buf = (char *)pa.alloc(read_size); - void *m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); + char* data_buf = (char*)pa.alloc(read_size); + void* m_idx_node_buf = pa.alloc(sizeof(MetaIndexNode)); if (IS_NULL(data_buf) || IS_NULL(m_idx_node_buf)) { return E_OOM; } - auto *top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); + auto* top_node_ptr = new (m_idx_node_buf) MetaIndexNode(&pa); auto top_node = std::shared_ptr(top_node_ptr, MetaIndexNode::self_deleter); @@ -450,24 +450,26 @@ int TsFileIOReader::get_timeseries_indexes( } bool is_aligned = is_aligned_device(top_node); - TimeseriesIndex *timeseries_index = nullptr; + TimeseriesIndex* timeseries_index = nullptr; if (is_aligned) { get_time_column_metadata(top_node, timeseries_index, pa); } int64_t idx = 0; - for (const auto &measurement_name : measurement_names) { + for (const auto& measurement_name : measurement_names) { if (RET_FAIL(load_measurement_index_entry(measurement_name, top_node, measurement_index_entry, measurement_ie_end_offset))) { - } else if (RET_FAIL(do_load_timeseries_index( + } else if (do_load_timeseries_index( measurement_name, measurement_index_entry->get_offset(), measurement_ie_end_offset, pa, timeseries_indexs[idx], - is_aligned))) { + is_aligned) == E_NOT_EXIST) { + idx++; + continue; } if (is_aligned) { - AlignedTimeseriesIndex *aligned_timeseries_index = - dynamic_cast(timeseries_indexs[idx]); + AlignedTimeseriesIndex* aligned_timeseries_index = + dynamic_cast(timeseries_indexs[idx]); if (aligned_timeseries_index) { aligned_timeseries_index->time_ts_idx_ = timeseries_index; } @@ -485,8 +487,8 @@ int TsFileIOReader::get_timeseries_indexes( int TsFileIOReader::search_from_leaf_node( std::shared_ptr target_name, std::shared_ptr index_node, - std::shared_ptr &ret_index_entry, - int64_t &ret_end_offset) { + std::shared_ptr& ret_index_entry, + int64_t& ret_end_offset) { int ret = E_OK; ret = index_node->binary_search_children(target_name, true, ret_index_entry, ret_end_offset); @@ -496,8 +498,8 @@ int TsFileIOReader::search_from_leaf_node( int TsFileIOReader::search_from_internal_node( std::shared_ptr target_name, bool is_device, std::shared_ptr index_node, - std::shared_ptr &ret_index_entry, - int64_t &ret_end_offset) { + std::shared_ptr& ret_index_entry, + int64_t& ret_end_offset) { int ret = E_OK; std::shared_ptr index_entry; int64_t end_offset = 0; @@ -519,12 +521,12 @@ int TsFileIOReader::search_from_internal_node( #endif ASSERT(read_size > 0 && read_size < (1 << 30)); PageArena cur_level_index_node_pa; - void *buf = cur_level_index_node_pa.alloc(sizeof(MetaIndexNode)); - char *data_buf = (char *)cur_level_index_node_pa.alloc(read_size); + void* buf = cur_level_index_node_pa.alloc(sizeof(MetaIndexNode)); + char* data_buf = (char*)cur_level_index_node_pa.alloc(read_size); if (IS_NULL(buf) || IS_NULL(data_buf)) { return E_OOM; } - MetaIndexNode *cur_level_index_node = + MetaIndexNode* cur_level_index_node = new (buf) MetaIndexNode(&cur_level_index_node_pa); int32_t ret_read_len = 0; if (RET_FAIL(read_file_->read(index_entry->get_offset(), data_buf, @@ -569,12 +571,12 @@ bool TsFileIOReader::is_aligned_device( int TsFileIOReader::get_time_column_metadata( std::shared_ptr measurement_node, - TimeseriesIndex *&ret_timeseries_index, PageArena &pa) { + TimeseriesIndex*& ret_timeseries_index, PageArena& pa) { int ret = E_OK; if (!is_aligned_device(measurement_node)) { return ret; } - char *ti_buf = nullptr; + char* ti_buf = nullptr; int start_idx = 0, end_idx = 0; int ret_read_len = 0; if (measurement_node->node_type_ == LEAF_MEASUREMENT) { @@ -597,7 +599,7 @@ int TsFileIOReader::get_time_column_metadata( } } buffer.wrap_from(ti_buf, end_idx - start_idx); - void *buf = pa.alloc(sizeof(TimeseriesIndex)); + void* buf = pa.alloc(sizeof(TimeseriesIndex)); if (IS_NULL(buf)) { return E_OOM; } @@ -621,14 +623,14 @@ int TsFileIOReader::get_time_column_metadata( } int TsFileIOReader::do_load_timeseries_index( - const std::string &measurement_name_str, int64_t start_offset, - int64_t end_offset, PageArena &in_timeseries_index_pa, - ITimeseriesIndex *&ret_timeseries_index, bool is_aligned) { + const std::string& measurement_name_str, int64_t start_offset, + int64_t end_offset, PageArena& in_timeseries_index_pa, + ITimeseriesIndex*& ret_timeseries_index, bool is_aligned) { ASSERT(end_offset > start_offset); int ret = E_OK; int32_t read_size = (int32_t)(end_offset - start_offset); int32_t ret_read_len = 0; - char *ti_buf = (char *)mem_alloc(read_size, MOD_TSFILE_READER); + char* ti_buf = (char*)mem_alloc(read_size, MOD_TSFILE_READER); if (IS_NULL(ti_buf)) { return E_OOM; } @@ -638,7 +640,7 @@ int TsFileIOReader::do_load_timeseries_index( ByteStream bs; bs.wrap_from(ti_buf, read_size); const String target_measurement_name( - (char *)measurement_name_str.c_str(), + (char*)measurement_name_str.c_str(), strlen(measurement_name_str.c_str())); bool found = false; #if DEBUG_SE @@ -654,12 +656,12 @@ int TsFileIOReader::do_load_timeseries_index( } else if (is_aligned && cur_timeseries_index.get_measurement_name().equal_to( target_measurement_name)) { - void *buf = in_timeseries_index_pa.alloc( + void* buf = in_timeseries_index_pa.alloc( sizeof(AlignedTimeseriesIndex)); if (IS_NULL(buf)) { return E_OOM; } - AlignedTimeseriesIndex *aligned_ts_idx = + AlignedTimeseriesIndex* aligned_ts_idx = new (buf) AlignedTimeseriesIndex; buf = in_timeseries_index_pa.alloc(sizeof(TimeseriesIndex)); if (IS_NULL(buf)) { @@ -674,7 +676,7 @@ int TsFileIOReader::do_load_timeseries_index( } else if (!is_aligned && cur_timeseries_index.get_measurement_name().equal_to( target_measurement_name)) { - void *buf = + void* buf = in_timeseries_index_pa.alloc(sizeof(TimeseriesIndex)); auto ts_idx = new (buf) TimeseriesIndex; ts_idx->clone_from(cur_timeseries_index, @@ -693,17 +695,17 @@ int TsFileIOReader::do_load_timeseries_index( } int TsFileIOReader::do_load_all_timeseries_index( - std::vector, int64_t> > - &index_node_entry_list, - common::PageArena &in_timeseries_index_pa, - std::vector &ts_indexs) { + std::vector, int64_t>>& + index_node_entry_list, + common::PageArena& in_timeseries_index_pa, + std::vector& ts_indexs) { int ret = E_OK; - for (const auto &index_node_entry : index_node_entry_list) { + for (const auto& index_node_entry : index_node_entry_list) { int64_t start_offset = index_node_entry.first->get_offset(), end_offset = index_node_entry.second; int32_t read_size = (int32_t)(end_offset - start_offset); int32_t ret_read_len = 0; - char *ti_buf = in_timeseries_index_pa.alloc(read_size); + char* ti_buf = in_timeseries_index_pa.alloc(read_size); if (IS_NULL(ti_buf)) { return E_OOM; } @@ -714,7 +716,7 @@ int TsFileIOReader::do_load_all_timeseries_index( ByteStream bs; bs.wrap_from(ti_buf, read_size); while (bs.has_remaining()) { - void *buf = in_timeseries_index_pa.alloc(sizeof(TimeseriesIndex)); + void* buf = in_timeseries_index_pa.alloc(sizeof(TimeseriesIndex)); auto ts_idx = new (buf) TimeseriesIndex; if (RET_FAIL( ts_idx->deserialize_from(bs, &in_timeseries_index_pa))) { @@ -729,8 +731,8 @@ int TsFileIOReader::do_load_all_timeseries_index( int TsFileIOReader::get_all_leaf( std::shared_ptr index_node, - std::vector, int64_t> > - &index_node_entry_list) { + std::vector, int64_t>>& + index_node_entry_list) { int ret = E_OK; if (index_node->node_type_ == LEAF_MEASUREMENT || index_node->node_type_ == LEAF_DEVICE) { @@ -760,12 +762,12 @@ int TsFileIOReader::get_all_leaf( #endif ASSERT(read_size > 0 && read_size < (1 << 30)); PageArena cur_level_index_node_pa; - void *buf = cur_level_index_node_pa.alloc(sizeof(MetaIndexNode)); - char *data_buf = (char *)cur_level_index_node_pa.alloc(read_size); + void* buf = cur_level_index_node_pa.alloc(sizeof(MetaIndexNode)); + char* data_buf = (char*)cur_level_index_node_pa.alloc(read_size); if (IS_NULL(buf) || IS_NULL(data_buf)) { return E_OOM; } - auto *cur_level_index_node_ptr = + auto* cur_level_index_node_ptr = new (buf) MetaIndexNode(&cur_level_index_node_pa); auto cur_level_index_node = std::shared_ptr( cur_level_index_node_ptr, MetaIndexNode::self_deleter); diff --git a/cpp/src/reader/block/single_device_tsblock_reader.cc b/cpp/src/reader/block/single_device_tsblock_reader.cc index 1df563cd8..0e2b350c7 100644 --- a/cpp/src/reader/block/single_device_tsblock_reader.cc +++ b/cpp/src/reader/block/single_device_tsblock_reader.cc @@ -82,8 +82,8 @@ int SingleDeviceTsBlockReader::init(DeviceQueryTask* device_query_task, device_query_task->get_column_mapping()->get_id_columns()) { const auto& column_pos_in_result = device_query_task->get_column_mapping()->get_column_pos(id_column); - int column_pos_in_id = - table_schema->find_id_column_order(id_column) + 1; + int column_pos_in_id = table_schema->find_id_column_order(id_column) + + (!table_schema->is_virtual_table()); id_column_contexts_.insert(std::make_pair( id_column, IdColumnContext(column_pos_in_result, column_pos_in_id))); @@ -214,8 +214,13 @@ int SingleDeviceTsBlockReader::fill_ids() { const auto& id_column_context = entry.second; for (int32_t pos : id_column_context.pos_in_result_) { std::string* device_tag = nullptr; - device_tag = device_query_task_->get_device_id()->get_segments().at( - id_column_context.pos_in_device_id_); + auto device_id = device_query_task_->get_device_id(); + int32_t pos_in_device_id = id_column_context.pos_in_device_id_; + if (pos_in_device_id >= 0 && static_cast(pos_in_device_id) < + device_id->get_split_seg_num()) { + device_tag = device_id->get_split_segname_at(pos_in_device_id); + } + if (device_tag == nullptr) { ret = col_appenders_[pos + 1]->fill_null( current_block_->get_row_count()); diff --git a/cpp/src/reader/device_meta_iterator.cc b/cpp/src/reader/device_meta_iterator.cc index a59f511db..be7408528 100644 --- a/cpp/src/reader/device_meta_iterator.cc +++ b/cpp/src/reader/device_meta_iterator.cc @@ -45,7 +45,7 @@ int DeviceMetaIterator::next( } int DeviceMetaIterator::load_results() { - bool is_root_idx_node = true; + int root_num = meta_index_nodes_.size(); while (!meta_index_nodes_.empty()) { // To avoid ASan overflow. // using `const auto&` creates a reference @@ -60,12 +60,9 @@ int DeviceMetaIterator::load_results() { } else { return common::E_INVALID_NODE_TYPE; } - // The first MetaIndexNode is the root and is not loaded here, so no - // need to destruct it here. - if (!is_root_idx_node) { + if (root_num-- <= 0) { meta_data_index_node->~MetaIndexNode(); } - is_root_idx_node = false; } return common::E_OK; @@ -91,8 +88,11 @@ int DeviceMetaIterator::load_leaf_device(MetaIndexNode* meta_index_node) { start_offset, end_offset, pa_, child_node, true))) { return ret; } else { - result_cache_.push( - std::make_pair(child->get_device_id(), child_node)); + auto device_id = child->get_device_id(); + if (should_split_device_name) { + device_id->split_table_name(); + } + result_cache_.push(std::make_pair(device_id, child_node)); } } return ret; diff --git a/cpp/src/reader/device_meta_iterator.h b/cpp/src/reader/device_meta_iterator.h index 55f209134..e79c35a05 100644 --- a/cpp/src/reader/device_meta_iterator.h +++ b/cpp/src/reader/device_meta_iterator.h @@ -29,30 +29,44 @@ namespace storage { class DeviceMetaIterator { public: - explicit DeviceMetaIterator(TsFileIOReader *io_reader, - MetaIndexNode *meat_index_node, - const Filter *id_filter) - : io_reader_(io_reader), id_filter_(id_filter) { + explicit DeviceMetaIterator(TsFileIOReader* io_reader, + MetaIndexNode* meat_index_node, + const Filter* id_filter) + : io_reader_(io_reader), + id_filter_(id_filter), + should_split_device_name(false) { meta_index_nodes_.push(meat_index_node); pa_.init(512, common::MOD_DEVICE_META_ITER); } + DeviceMetaIterator(TsFileIOReader* io_reader, + const std::vector& meta_index_node_list, + const Filter* id_filter) + : io_reader_(io_reader), id_filter_(id_filter) { + for (auto meta_index_node : meta_index_node_list) { + meta_index_nodes_.push(meta_index_node); + } + should_split_device_name = true; + pa_.init(512, common::MOD_DEVICE_META_ITER); + } + ~DeviceMetaIterator() { pa_.destroy(); } bool has_next(); - int next(std::pair, MetaIndexNode *> &ret_meta); + int next(std::pair, MetaIndexNode*>& ret_meta); private: int load_results(); - int load_leaf_device(MetaIndexNode *meta_index_node); - int load_internal_node(MetaIndexNode *meta_index_node); - TsFileIOReader *io_reader_; - std::queue meta_index_nodes_; - std::queue, MetaIndexNode *>> + int load_leaf_device(MetaIndexNode* meta_index_node); + int load_internal_node(MetaIndexNode* meta_index_node); + TsFileIOReader* io_reader_; + std::queue meta_index_nodes_; + std::queue, MetaIndexNode*>> result_cache_; - const Filter *id_filter_; + const Filter* id_filter_; common::PageArena pa_; + bool should_split_device_name; }; } // end namespace storage diff --git a/cpp/src/reader/imeta_data_querier.h b/cpp/src/reader/imeta_data_querier.h index 73a005e84..c034f9151 100644 --- a/cpp/src/reader/imeta_data_querier.h +++ b/cpp/src/reader/imeta_data_querier.h @@ -57,7 +57,10 @@ class IMetadataQuerier { virtual std::unique_ptr device_iterator( MetaIndexNode* root, const Filter* id_filter) = 0; -}; + // FIXME(Colin): refine this. + virtual std::unique_ptr device_iterator( + std::vector root, const Filter* id_filter) = 0; +}; } // end namespace storage #endif // READER_IMETA_DATA_QUERIER_H diff --git a/cpp/src/reader/meta_data_querier.cc b/cpp/src/reader/meta_data_querier.cc index 5a32b9221..0accbdde9 100644 --- a/cpp/src/reader/meta_data_querier.cc +++ b/cpp/src/reader/meta_data_querier.cc @@ -98,6 +98,12 @@ std::unique_ptr MetadataQuerier::device_iterator( new DeviceMetaIterator(io_reader_, root, id_filter)); } +std::unique_ptr MetadataQuerier::device_iterator( + std::vector root, const Filter* id_filter) { + return std::unique_ptr( + new DeviceMetaIterator(io_reader_, root, id_filter)); +} + int MetadataQuerier::load_chunk_meta( const std::pair& key, std::vector& chunk_meta_list) { diff --git a/cpp/src/reader/meta_data_querier.h b/cpp/src/reader/meta_data_querier.h index b4eed3501..525ecf86e 100644 --- a/cpp/src/reader/meta_data_querier.h +++ b/cpp/src/reader/meta_data_querier.h @@ -61,6 +61,9 @@ class MetadataQuerier : public IMetadataQuerier { std::unique_ptr device_iterator( MetaIndexNode* root, const Filter* id_filter) override; + std::unique_ptr device_iterator( + std::vector root, const Filter* id_filter) override; + void clear() override; private: diff --git a/cpp/src/reader/table_query_executor.cc b/cpp/src/reader/table_query_executor.cc index d09a5c904..79b636b52 100644 --- a/cpp/src/reader/table_query_executor.cc +++ b/cpp/src/reader/table_query_executor.cc @@ -19,17 +19,19 @@ #include "reader/table_query_executor.h" +#include "utils/db_utils.h" + namespace storage { -int TableQueryExecutor::query(const std::string &table_name, - const std::vector &columns, - Filter *time_filter, Filter *id_filter, - Filter *field_filter, ResultSet *&ret_qds) { +int TableQueryExecutor::query(const std::string& table_name, + const std::vector& columns, + Filter* time_filter, Filter* id_filter, + Filter* field_filter, ResultSet*& ret_qds) { int ret = common::E_OK; - TsFileMeta *file_metadata = nullptr; + TsFileMeta* file_metadata = nullptr; file_metadata = tsfile_io_reader_->get_tsfile_meta(); common::PageArena pa; pa.init(512, common::MOD_TSFILE_READER); - MetaIndexNode *table_root = nullptr; + MetaIndexNode* table_root = nullptr; std::shared_ptr table_schema; if (RET_FAIL( file_metadata->get_table_metaindex_node(table_name, table_root))) { @@ -42,7 +44,7 @@ int TableQueryExecutor::query(const std::string &table_name, return ret; } std::vector lower_case_column_names(columns); - for (auto &column : lower_case_column_names) { + for (auto& column : lower_case_column_names) { to_lowercase_inplace(column); } std::shared_ptr column_mapping = @@ -85,6 +87,108 @@ int TableQueryExecutor::query(const std::string &table_name, return ret; } -void TableQueryExecutor::destroy_query_data_set(ResultSet *qds) { delete qds; } +int TableQueryExecutor::query_on_tree( + const std::vector>& devices, + const std::vector& tag_columns, + const std::vector& field_columns, Filter* time_filter, + ResultSet*& ret_qds) { + common::PageArena pa; + pa.init(512, common::MOD_TSFILE_READER); + int ret = common::E_OK; + TsFileMeta* file_meta = tsfile_io_reader_->get_tsfile_meta(); + std::unordered_set table_inodes; + for (auto const& device : devices) { + MetaIndexNode* table_inode; + if (RET_FAIL(file_meta->get_table_metaindex_node( + device->get_table_name(), table_inode))) { + }; + table_inodes.insert(table_inode); + } + + std::vector col_schema; + for (auto const& tag : tag_columns) { + col_schema.emplace_back(tag, common::TSDataType::STRING, + common::ColumnCategory::TAG); + } + + std::unordered_map column_types_map; + + for (auto const& device : devices) { + bool all_collected = true; + for (const auto& field_col : field_columns) { + if (column_types_map.find(field_col) == column_types_map.end()) { + all_collected = false; + break; + } + } + if (all_collected) { + break; + } + + std::unordered_set measurements(field_columns.begin(), + field_columns.end()); + std::vector index(measurements.size()); + if (RET_FAIL(tsfile_io_reader_->get_timeseries_indexes( + device, measurements, index, pa))) { + return ret; + } + + for (auto* ts_index : index) { + if (ts_index != nullptr) { + std::string measurement_name = + ts_index->get_measurement_name().to_std_string(); + if (column_types_map.find(measurement_name) == + column_types_map.end()) { + common::TSDataType type = ts_index->get_data_type(); + column_types_map[measurement_name] = type; + } + } + } + } + + for (const auto& field_col : field_columns) { + if (column_types_map.find(field_col) != column_types_map.end()) { + col_schema.emplace_back(field_col, column_types_map[field_col], + common::ColumnCategory::FIELD); + } else { + col_schema.emplace_back(field_col, + common::TSDataType::INVALID_DATATYPE, + common::ColumnCategory::FIELD); + } + } + + auto schema = std::make_shared("default", col_schema); + schema->set_virtual_table(); + std::shared_ptr column_mapping = + std::make_shared(); + for (size_t i = 0; i < col_schema.size(); ++i) { + column_mapping->add(col_schema[i].column_name_, i, *schema); + } + std::vector datatypes = schema->get_data_types(); + std::vector index_nodes(table_inodes.begin(), + table_inodes.end()); + auto device_task_iterator = + std::unique_ptr(new DeviceTaskIterator( + schema->get_measurement_names(), index_nodes, column_mapping, + meta_data_querier_, nullptr, schema)); + std::unique_ptr tsblock_reader; + switch (table_query_ordering_) { + case TableQueryOrdering::DEVICE: + tsblock_reader = std::unique_ptr( + new DeviceOrderedTsBlockReader( + std::move(device_task_iterator), meta_data_querier_, + block_size_, tsfile_io_reader_, time_filter, nullptr)); + break; + case TableQueryOrdering::TIME: + default: + ret = common::E_UNSUPPORTED_ORDER; + } + assert(tsblock_reader != nullptr); + ret_qds = new TableResultSet(std::move(tsblock_reader), + schema->get_measurement_names(), + schema->get_data_types()); + return ret; +} +void TableQueryExecutor::destroy_query_data_set(ResultSet* qds) { delete qds; } } // end namespace storage diff --git a/cpp/src/reader/table_query_executor.h b/cpp/src/reader/table_query_executor.h index 83a82fe56..974e6b45b 100644 --- a/cpp/src/reader/table_query_executor.h +++ b/cpp/src/reader/table_query_executor.h @@ -37,15 +37,15 @@ class TableQueryExecutor { public: enum class TableQueryOrdering { TIME, DEVICE }; - TableQueryExecutor(IMetadataQuerier *meta_data_querier, - TsFileIOReader *tsfile_io_reader, + TableQueryExecutor(IMetadataQuerier* meta_data_querier, + TsFileIOReader* tsfile_io_reader, TableQueryOrdering table_query_ordering, int block_size = 1024) : meta_data_querier_(meta_data_querier), tsfile_io_reader_(tsfile_io_reader), table_query_ordering_(table_query_ordering), block_size_(block_size) {} - TableQueryExecutor(ReadFile *read_file) { + TableQueryExecutor(ReadFile* read_file) { tsfile_io_reader_ = new TsFileIOReader(); tsfile_io_reader_->init(read_file); meta_data_querier_ = new MetadataQuerier(tsfile_io_reader_); @@ -62,14 +62,18 @@ class TableQueryExecutor { tsfile_io_reader_ = nullptr; } } - int query(const std::string &table_name, - const std::vector &columns, Filter *time_filter, - Filter *id_filter, Filter *field_filter, ResultSet *&ret_qds); - void destroy_query_data_set(ResultSet *qds); + int query(const std::string& table_name, + const std::vector& columns, Filter* time_filter, + Filter* id_filter, Filter* field_filter, ResultSet*& ret_qds); + int query_on_tree(const std::vector>& devices, + const std::vector& tag_columns, + const std::vector& field_columns, + Filter* time_filter, ResultSet*& ret_qds); + void destroy_query_data_set(ResultSet* qds); private: - IMetadataQuerier *meta_data_querier_; - TsFileIOReader *tsfile_io_reader_; + IMetadataQuerier* meta_data_querier_; + TsFileIOReader* tsfile_io_reader_; TableQueryOrdering table_query_ordering_; int32_t block_size_; }; diff --git a/cpp/src/reader/task/device_task_iterator.h b/cpp/src/reader/task/device_task_iterator.h index a5079877f..ec30a472d 100644 --- a/cpp/src/reader/task/device_task_iterator.h +++ b/cpp/src/reader/task/device_task_iterator.h @@ -43,6 +43,21 @@ class DeviceTaskIterator { table_schema_(table_schema) { pa_.init(512, common::MOD_DEVICE_TASK_ITER); } + + DeviceTaskIterator(std::vector column_names, + std::vector index_roots, + std::shared_ptr column_mapping, + IMetadataQuerier *metadata_querier, + const Filter *id_filter, + std::shared_ptr table_schema) + : column_names_(column_names), + column_mapping_(column_mapping), + device_meta_iterator_( + metadata_querier->device_iterator(index_roots, id_filter)), + table_schema_(table_schema) { + pa_.init(512, common::MOD_DEVICE_TASK_ITER); + } + ~DeviceTaskIterator() { pa_.destroy(); } bool has_next() const; diff --git a/cpp/src/reader/tsfile_reader.cc b/cpp/src/reader/tsfile_reader.cc index 2ac45d84d..f97570885 100644 --- a/cpp/src/reader/tsfile_reader.cc +++ b/cpp/src/reader/tsfile_reader.cc @@ -107,8 +107,6 @@ int TsFileReader::query(const std::string& table_name, return E_TABLE_NOT_EXIST; } - std::vector data_types = table_schema->get_data_types(); - Filter* time_filter = new TimeBetween(start_time, end_time, false); ret = table_query_executor_->query(to_lower(table_name), columns_names, time_filter, tag_filter, nullptr, @@ -116,6 +114,83 @@ int TsFileReader::query(const std::string& table_name, return ret; } +int TsFileReader::query_table_on_tree( + const std::vector& measurement_names, int64_t star_time, + int64_t end_time, ResultSet*& result_set) { + int ret = E_OK; + TsFileMeta* tsfile_meta = tsfile_executor_->get_tsfile_meta(); + if (tsfile_meta == nullptr) { + return E_TSFILE_WRITER_META_ERR; + } + auto device_ids = this->get_all_device_ids(); + std::vector> satisfied_device_ids; + std::unordered_set measurement_names_set_to_query; + size_t device_max_len = 0; + + if (measurement_names.empty()) { + for (auto& device_name : device_ids) { + std::vector schemas; + this->get_timeseries_schema(device_name, schemas); + satisfied_device_ids.push_back(device_name); + for (auto& schema : schemas) { + measurement_names_set_to_query.insert(schema.measurement_name_); + } + device_name->split_table_name(); + if (device_name->get_split_seg_num() > device_max_len) { + device_max_len = device_name->get_split_seg_num(); + } + } + } else { + std::unordered_set found_measurement_names; + std::unordered_set required_measurement_names( + measurement_names.begin(), measurement_names.end()); + for (auto& device_name : device_ids) { + std::vector schemas; + this->get_timeseries_schema(device_name, schemas); + + bool device_has_required_measurement_names = false; + for (auto& schema : schemas) { + if (required_measurement_names.find(schema.measurement_name_) != + required_measurement_names.end()) { + found_measurement_names.insert(schema.measurement_name_); + device_has_required_measurement_names = true; + } + } + if (device_has_required_measurement_names) { + device_name->split_table_name(); + satisfied_device_ids.push_back(device_name); + if (device_name->get_split_seg_num() > device_max_len) { + device_max_len = device_name->get_split_seg_num(); + } + } + } + + if (found_measurement_names.size() < + required_measurement_names.size()) { + return E_COLUMN_NOT_EXIST; + } + measurement_names_set_to_query = found_measurement_names; + } + std::vector measurement_names_to_query; + // Get all columns. + if (measurement_names.empty() && !measurement_names_set_to_query.empty()) { + for (auto& measurement_name : measurement_names_set_to_query) { + measurement_names_to_query.push_back(measurement_name); + } + } else { + measurement_names_to_query = measurement_names; + } + std::vector columns_names(device_max_len); + for (int i = 0; i < device_max_len; i++) { + columns_names[i] = "col_" + std::to_string(i); + } + Filter* time_filter = new TimeBetween(star_time, end_time, false); + ret = table_query_executor_->query_on_tree( + satisfied_device_ids, columns_names, measurement_names_to_query, + time_filter, result_set); + return ret; +} + void TsFileReader::destroy_query_data_set(storage::ResultSet* qds) { tsfile_executor_->destroy_query_data_set(qds); } diff --git a/cpp/src/reader/tsfile_reader.h b/cpp/src/reader/tsfile_reader.h index 55e5f2c49..8a6ba2264 100644 --- a/cpp/src/reader/tsfile_reader.h +++ b/cpp/src/reader/tsfile_reader.h @@ -54,7 +54,7 @@ class TsFileReader { * @param file_path the path of the tsfile which will be opened * @return Returns 0 on success, or a non-zero error code on failure. */ - int open(const std::string &file_path); + int open(const std::string& file_path); /** * @brief close the tsfile, this method should be called after the * query is finished @@ -70,7 +70,7 @@ class TsFileReader { * @param [out] ret_qds the result set * @return Returns 0 on success, or a non-zero error code on failure. */ - int query(storage::QueryExpression *qe, ResultSet *&ret_qds); + int query(storage::QueryExpression* qe, ResultSet*& ret_qds); /** * @brief query the tsfile by the path list, start time and end time * this method is used to query the tsfile by the tree model. @@ -80,8 +80,8 @@ class TsFileReader { * @param [in] end_time the end time * @param [out] result_set the result set */ - int query(std::vector &path_list, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(std::vector& path_list, int64_t start_time, + int64_t end_time, ResultSet*& result_set); /** * @brief query the tsfile by the table name, columns names, start time * and end time. this method is used to query the tsfile by the table @@ -93,9 +93,9 @@ class TsFileReader { * @param [in] end_time the end time * @param [out] result_set the result set */ - int query(const std::string &table_name, - const std::vector &columns_names, int64_t start_time, - int64_t end_time, ResultSet *&result_set); + int query(const std::string& table_name, + const std::vector& columns_names, int64_t start_time, + int64_t end_time, ResultSet*& result_set); /** * @brief query the tsfile by the table name, columns names, start time @@ -109,20 +109,23 @@ class TsFileReader { * @param [in] tag_filter the tag filter * @param [out] result_set the result set */ - int query(const std::string &table_name, - const std::vector &columns_names, int64_t start_time, - int64_t end_time, ResultSet *&result_set, Filter *tag_filter); + int query(const std::string& table_name, + const std::vector& columns_names, int64_t start_time, + int64_t end_time, ResultSet*& result_set, Filter* tag_filter); + int query_table_on_tree(const std::vector& measurement_names, + int64_t star_time, int64_t end_time, + ResultSet*& result_set); /** * @brief destroy the result set, this method should be called after the * query is finished and result_set * * @param qds the result set */ - void destroy_query_data_set(ResultSet *qds); - ResultSet *read_timeseries( - const std::shared_ptr &device_id, - const std::vector &measurement_name); + void destroy_query_data_set(ResultSet* qds); + ResultSet* read_timeseries( + const std::shared_ptr& device_id, + const std::vector& measurement_name); /** * @brief get all devices in the tsfile * @@ -148,7 +151,7 @@ class TsFileReader { * @return Returns 0 on success, or a non-zero error code on failure. */ int get_timeseries_schema(std::shared_ptr device_id, - std::vector &result); + std::vector& result); /** * @brief get the table schema by the table name * @@ -156,7 +159,7 @@ class TsFileReader { * @return std::shared_ptr the table schema */ std::shared_ptr get_table_schema( - const std::string &table_name); + const std::string& table_name); /** * @brief get all table schemas in the tsfile * @@ -165,12 +168,12 @@ class TsFileReader { std::vector> get_all_table_schemas(); private: - int get_all_devices(std::vector> &device_ids, + int get_all_devices(std::vector>& device_ids, std::shared_ptr index_node, - common::PageArena &pa); - storage::ReadFile *read_file_; - storage::TsFileExecutor *tsfile_executor_; - storage::TableQueryExecutor *table_query_executor_; + common::PageArena& pa); + storage::ReadFile* read_file_; + storage::TsFileExecutor* tsfile_executor_; + storage::TableQueryExecutor* table_query_executor_; }; } // namespace storage diff --git a/cpp/test/reader/bloom_filter_test.cc b/cpp/test/reader/bloom_filter_test.cc index 7e754df14..29b24db97 100644 --- a/cpp/test/reader/bloom_filter_test.cc +++ b/cpp/test/reader/bloom_filter_test.cc @@ -37,7 +37,7 @@ TEST(BloomfilterTest, BloomFilter) { common::String sensor1 = common::String("value", arena); filter.add_path_entry(device1, sensor1); common::ByteStream out(1024, common::MOD_DEFAULT); - uint8_t *filter_data_bytes = nullptr; + uint8_t* filter_data_bytes = nullptr; int32_t filter_data_bytes_len = 0; filter.get_bit_set()->to_bytes(filter_data_bytes, filter_data_bytes_len); std::unordered_set data; @@ -51,7 +51,7 @@ TEST(BloomfilterTest, BloomFilter) { BloomFilter filter2; filter2.deserialize_from(out); // ASSERT_EQ(filter, filter2); - uint8_t *filter_data_bytes2 = nullptr; + uint8_t* filter_data_bytes2 = nullptr; int32_t filter_data_bytes_len2 = 0; filter2.get_bit_set()->to_bytes(filter_data_bytes2, filter_data_bytes_len2); ASSERT_EQ(filter_data_bytes_len, filter_data_bytes_len2); diff --git a/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc b/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc index 477ab24b5..9dd482e86 100644 --- a/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc +++ b/cpp/test/reader/tree_view/tsfile_reader_tree_test.cc @@ -32,9 +32,96 @@ namespace storage { class QDSWithoutTimeGenerator; } + using namespace storage; using namespace common; +static void print_table_result_set(storage::TableResultSet* table_result_set) { + if (table_result_set == nullptr) { + std::cout << "TableResultSet is nullptr" << std::endl; + return; + } + + auto metadata = table_result_set->get_metadata(); + if (metadata == nullptr) { + std::cout << "Metadata is nullptr" << std::endl; + return; + } + + uint32_t column_count = metadata->get_column_count(); + if (column_count == 0) { + std::cout << "No columns in result set" << std::endl; + return; + } + + for (uint32_t i = 1; i <= column_count; i++) { + std::cout << metadata->get_column_name(i); + if (i < column_count) { + std::cout << "\t"; + } + } + std::cout << std::endl; + + bool has_next = false; + int row_count = 0; + while (IS_SUCC(table_result_set->next(has_next)) && has_next) { + for (uint32_t i = 1; i <= column_count; i++) { + if (table_result_set->is_null(i)) { + std::cout << "NULL"; + } else { + common::TSDataType col_type = metadata->get_column_type(i); + switch (col_type) { + case common::INT64: { + int64_t val = table_result_set->get_value(i); + std::cout << val; + break; + } + case common::INT32: { + int32_t val = table_result_set->get_value(i); + std::cout << val; + break; + } + case common::FLOAT: { + float val = table_result_set->get_value(i); + std::cout << val; + break; + } + case common::DOUBLE: { + double val = table_result_set->get_value(i); + std::cout << val; + break; + } + case common::BOOLEAN: { + bool val = table_result_set->get_value(i); + std::cout << (val ? "true" : "false"); + break; + } + case common::STRING: { + common::String* str = + table_result_set->get_value(i); + if (str == nullptr) { + std::cout << "null"; + } else { + std::cout << std::string(str->buf_, str->len_); + } + break; + } + default: { + std::cout << ""; + break; + } + } + } + if (i < column_count) { + std::cout << "\t"; + } + } + std::cout << std::endl; + row_count++; + } + std::cout << "Total rows: " << row_count << std::endl; +} + class TsFileTreeReaderTest : public ::testing::Test { protected: void SetUp() override { @@ -49,7 +136,8 @@ class TsFileTreeReaderTest : public ::testing::Test { mode_t mode = 0666; write_file_.create(file_name_, flags, mode); } - void TearDown() override { remove(file_name_.c_str()); } + + void TearDown() override {} std::string file_name_; WriteFile write_file_; @@ -108,6 +196,187 @@ TEST_F(TsFileTreeReaderTest, BasicTest) { reader.close(); } +TEST_F(TsFileTreeReaderTest, ReadTreeByTable) { + TsFileTreeWriter writer(&write_file_); + std::vector device_ids = {"root.db1.t1", "root.db2.t1", + "root.db3.t2.t3", "root.db3.t3", + "device"}; + std::vector measurement_ids = {"temperature", "hudi", "level"}; + for (auto& device_id : device_ids) { + TsRecord record(device_id, 0); + TsRecord record1(device_id, 1); + for (auto const& measurement : measurement_ids) { + auto schema = + new storage::MeasurementSchema(measurement, TSDataType::INT32); + ASSERT_EQ(E_OK, writer.register_timeseries(device_id, schema)); + delete schema; + record.add_point(measurement, static_cast(1)); + record1.add_point(measurement, static_cast(2)); + } + ASSERT_EQ(E_OK, writer.write(record)); + ASSERT_EQ(E_OK, writer.write(record1)); + } + writer.flush(); + writer.close(); + + TsFileReader reader; + reader.open(file_name_); + ResultSet* result; + int ret = reader.query_table_on_tree({"temperature", "hudi"}, INT64_MIN, + INT64_MAX, result); + ASSERT_EQ(ret, E_OK); + + auto* table_result_set = (storage::TableResultSet*)result; + bool has_next = false; + int col_cnt = table_result_set->get_metadata()->get_column_count(); + std::unordered_map res; + std::unordered_set result_set; + result_set.insert("rootdb1t1null"); + result_set.insert("rootdb2t1null"); + result_set.insert("rootdb3t2t3"); + result_set.insert("rootdb3t3null"); + result_set.insert("devicenullnullnull"); + int row_cnt = 0; + while (IS_SUCC(table_result_set->next(has_next)) && has_next) { + auto t = table_result_set->get_value(1); + ASSERT_TRUE(t == 0 || t == 1); + std::string device_id_string; + for (int i = 1; i < col_cnt + 1; ++i) { + switch (table_result_set->get_metadata()->get_column_type(i)) { + case INT64: + ASSERT_TRUE(table_result_set->get_value(i) == 1 || + table_result_set->get_value(i) == 0); + break; + case INT32: + ASSERT_TRUE(table_result_set->get_value(i) == 1 || + table_result_set->get_value(i) == 2); + break; + case STRING: { + common::String* str = + table_result_set->get_value(i); + std::string device_id_str; + if (str == nullptr) { + device_id_str = "null"; + } else { + device_id_str = std::string(str->buf_, str->len_); + } + device_id_string += device_id_str; + } break; + default: + break; + } + } + ASSERT_TRUE(result_set.find(device_id_string) != result_set.end()); + row_cnt++; + } + ASSERT_EQ(row_cnt, 10); + reader.destroy_query_data_set(result); + reader.close(); +} + +TEST_F(TsFileTreeReaderTest, ReadTreeByTableIrrergular) { + TsFileTreeWriter writer(&write_file_); + std::vector device_ids = {"root.db1.t1", + "root.db2.t1", + "root.db3.t2.t3", + "root.db3.t3", + "device", + "device.ln", + "device2.ln1.tmp", + "device3.ln2.tmp.v1.v2", + "device3.ln2.tmp.v1.v3"}; + std::vector measurement_ids1 = {"temperature", "hudi", + "level"}; + std::vector measurement_ids2 = {"level", "vol"}; + for (int i = 0; i < device_ids.size(); ++i) { + std::string device_id = device_ids[i]; + TsRecord record(device_id, 0); + TsRecord record1(device_id, 1); + std::vector measurements = + (i % 2 == 0) ? measurement_ids1 : measurement_ids2; + for (auto const& measurement : measurements) { + auto schema = + new storage::MeasurementSchema(measurement, TSDataType::INT32); + ASSERT_EQ(E_OK, writer.register_timeseries(device_id, schema)); + delete schema; + record.add_point(measurement, static_cast(1)); + record1.add_point(measurement, static_cast(2)); + } + ASSERT_EQ(E_OK, writer.write(record)); + ASSERT_EQ(E_OK, writer.write(record1)); + } + writer.flush(); + writer.close(); + + TsFileReader reader; + reader.open(file_name_); + ResultSet* result; + int ret = reader.query_table_on_tree({"level", "hudi"}, INT64_MIN, + INT64_MAX, result); + ASSERT_EQ(ret, E_OK); + + auto* table_result_set = (storage::TableResultSet*)result; + bool has_next = false; + int col_cnt = table_result_set->get_metadata()->get_column_count(); + ASSERT_EQ(col_cnt, 8); + int row_cnt = 0; + int null_count = 0; + std::unordered_set result_set; + result_set.insert("rootdb1t1nullnull"); + result_set.insert("rootdb2t1nullnull"); + result_set.insert("rootdb3t2t3null"); + result_set.insert("rootdb3t3nullnull"); + result_set.insert("devicenullnullnullnull"); + result_set.insert("devicelnnullnullnull"); + result_set.insert("device2ln1tmpnullnull"); + result_set.insert("device3ln2tmpv1v2"); + result_set.insert("device3ln2tmpv1v3"); + + while (IS_SUCC(table_result_set->next(has_next)) && has_next) { + auto t = table_result_set->get_value(1); + ASSERT_TRUE(t == 0 || t == 1); + std::string device_id_string; + for (int i = 1; i < col_cnt + 1; ++i) { + if (table_result_set->is_null(i)) { + null_count++; + if (table_result_set->get_metadata()->get_column_type(i) != + STRING) { + continue; + } + } + switch (table_result_set->get_metadata()->get_column_type(i)) { + case INT64: + ASSERT_TRUE(table_result_set->get_value(i) == 1 || + table_result_set->get_value(i) == 0); + break; + case INT32: + ASSERT_TRUE(table_result_set->get_value(i) == 1 || + table_result_set->get_value(i) == 2); + break; + case STRING: { + common::String* str = + table_result_set->get_value(i); + std::string device_id_str; + if (str == nullptr) { + device_id_str = "null"; + } else { + device_id_str = std::string(str->buf_, str->len_); + } + device_id_string += device_id_str; + } break; + default: + break; + } + } + ASSERT_TRUE(result_set.find(device_id_string) != result_set.end()); + row_cnt++; + } + ASSERT_EQ(null_count, 40); + ASSERT_EQ(row_cnt, 18); + reader.destroy_query_data_set(result); + reader.close(); +} + TEST_F(TsFileTreeReaderTest, ExtendedRowsAndColumnsTest) { TsFileTreeWriter writer(&write_file_); std::vector device_ids = {"device_1", "device_2", "device_3"}; diff --git a/cpp/test/reader/tsfile_reader_test.cc b/cpp/test/reader/tsfile_reader_test.cc index c2dfe2ce7..eed05d8b5 100644 --- a/cpp/test/reader/tsfile_reader_test.cc +++ b/cpp/test/reader/tsfile_reader_test.cc @@ -179,7 +179,7 @@ TEST_F(TsFileReaderTest, GetAllDevice) { ASSERT_EQ(ret, common::E_OK); auto devices = reader.get_all_devices("device"); ASSERT_EQ(devices.size(), 1024); - std::vector > devices_name_expected; + std::vector> devices_name_expected; for (size_t i = 0; i < 1024; i++) { devices_name_expected.push_back(std::make_shared( "device.ln" + std::to_string(i))); diff --git a/python/tests/test_write_and_read.py b/python/tests/test_write_and_read.py index e5c87ab92..bb1e2b84a 100644 --- a/python/tests/test_write_and_read.py +++ b/python/tests/test_write_and_read.py @@ -19,7 +19,10 @@ import os import numpy as np +import pandas as pd import pytest +from pandas.core.dtypes.common import is_integer_dtype + from tsfile import ColumnSchema, TableSchema, TSEncoding from tsfile import Compressor from tsfile import TSDataType @@ -34,17 +37,18 @@ def test_row_record_write_and_read(): try: writer = TsFileWriter("record_write_and_read.tsfile") - timeseries = TimeseriesSchema("level1", TSDataType.INT64) - writer.register_timeseries("root.device1", timeseries) + writer.register_timeseries("root.device1", TimeseriesSchema("level1", TSDataType.INT64)) writer.register_timeseries("root.device1", TimeseriesSchema("level2", TSDataType.DOUBLE)) - writer.register_timeseries("root.device1", TimeseriesSchema("level3", TSDataType.INT32)) + writer.register_timeseries("root.device2", TimeseriesSchema("level1", TSDataType.INT32)) max_row_num = 1000 for i in range(max_row_num): row = RowRecord("root.device1", i, [Field("level1", i + 1, TSDataType.INT64), - Field("level2", i * 1.1, TSDataType.DOUBLE), - Field("level3", i * 2, TSDataType.INT32)]) + Field("level2", i * 1.1, TSDataType.DOUBLE)]) + writer.write_row_record(row) + row = RowRecord("root.device2", i, + [Field("level1", i + 1, TSDataType.INT32)]) writer.write_row_record(row) writer.close() @@ -56,13 +60,237 @@ def test_row_record_write_and_read(): print(result.get_value_by_index(1)) print(reader.get_active_query_result()) result.close() + result2 = reader.query_table_on_tree(["level1", "level2"], 20, 50) + print(result2.read_data_frame()) + result2.close() print(reader.get_active_query_result()) reader.close() + + + finally: if os.path.exists("record_write_and_read.tsfile"): os.remove("record_write_and_read.tsfile") +def test_tree_query_to_dataframe_variants(): + file_path = "tree_query_to_dataframe.tsfile" + device_ids = [ + "root.db1.t1", + "root.db2.t1", + "root.db3.t2.t3", + "root.db3.t3", + "device", + "device.ln", + "device2.ln1.tmp", + "device3.ln2.tmp.v1.v2", + "device3.ln2.tmp.v1.v3", + ] + device_path_map = [ + "root.db1.t1.null.null", + "root.db2.t1.null.null", + "root.db3.t2.t3.null", + "root.db3.t3.null.null", + "device.null.null.null.null", + "device.ln.null.null.null", + "device2.ln1.tmp.null.null", + "device3.ln2.tmp.v1.v2", + "device3.ln2.tmp.v1.v3", + ] + measurement_ids1 = ["temperature", "hudi", "level"] + measurement_ids2 = ["level", "vol"] + rows_per_device = 2 + expected_values = {} + all_measurements = set() + + def _is_null(value): + return value is None or pd.isna(value) + + def _extract_device(row, path_columns): + parts = [] + for col in path_columns: + value = row[col] + if not _is_null(value): + parts.append(str(value)) + else: + parts.append("null") + return ".".join(parts) + + try: + writer = TsFileWriter(file_path) + for idx, device_id in enumerate(device_ids): + measurements = measurement_ids1 if idx % 2 == 0 else measurement_ids2 + all_measurements.update(measurements) + for measurement in measurements: + writer.register_timeseries( + device_id, TimeseriesSchema(measurement, TSDataType.INT32) + ) + for ts in range(rows_per_device): + fields = [] + measurement_snapshot = {} + for m_idx, measurement in enumerate(measurements): + value = idx * 100 + ts * 10 + m_idx + fields.append(Field(measurement, value, TSDataType.INT32)) + measurement_snapshot[measurement] = value + writer.write_row_record(RowRecord(device_id, ts, fields)) + expected_values[(device_path_map[idx], ts)] = measurement_snapshot + writer.close() + + df_all = to_dataframe(file_path, start_time=0, end_time=rows_per_device) + print(df_all) + total_rows = len(device_ids) * rows_per_device + assert df_all.shape[0] == total_rows + for measurement in all_measurements: + assert measurement in df_all.columns + assert "time" in df_all.columns + path_columns = sorted( + [col for col in df_all.columns if col.startswith("col_")], + key=lambda name: int(name.split("_")[1]), + ) + assert len(path_columns) > 0 + + for _, row in df_all.iterrows(): + device = _extract_device(row, path_columns) + timestamp = int(row["time"]) + assert (device, timestamp) in expected_values + expected_row = expected_values[(device, timestamp)] + for measurement in all_measurements: + value = row.get(measurement) + if measurement in expected_row: + assert value == expected_row[measurement] + else: + assert _is_null(value) + assert device in device_path_map + + requested_columns = ["level", "temperature"] + df_subset = to_dataframe( + file_path, column_names=requested_columns, start_time=0, end_time=rows_per_device + ) + for column in requested_columns: + assert column in df_subset.columns + for measurement in all_measurements: + if measurement not in requested_columns: + assert measurement not in df_subset.columns + for _, row in df_subset.iterrows(): + device = _extract_device(row, path_columns) + timestamp = int(row["time"]) + expected_row = expected_values[(device, timestamp)] + for measurement in requested_columns: + value = row.get(measurement) + if measurement in expected_row: + assert value == expected_row[measurement] + else: + assert _is_null(value) + assert device in device_path_map + df_limited = to_dataframe( + file_path, column_names=["level"], max_row_num=5, start_time=0, end_time=rows_per_device + ) + assert df_limited.shape[0] == 5 + assert "level" in df_limited.columns + + iterator = to_dataframe( + file_path, + column_names=["level", "temperature"], + max_row_num=3, + start_time=0, + end_time=rows_per_device, + as_iterator=True, + ) + iter_rows = 0 + for batch in iterator: + assert isinstance(batch, pd.DataFrame) + assert set(batch.columns).issuperset({"time", "level"}) + iter_rows += len(batch) + print(batch) + assert iter_rows == 18 + + iterator = to_dataframe( + file_path, + column_names=["level", "temperature"], + max_row_num=3, + start_time=0, + end_time=0, + as_iterator=True, + ) + iter_rows = 0 + for batch in iterator: + assert isinstance(batch, pd.DataFrame) + assert set(batch.columns).issuperset({"time", "level"}) + iter_rows += len(batch) + print(batch) + assert iter_rows == 9 + + with pytest.raises(ColumnNotExistError): + to_dataframe(file_path, column_names=["level", "not_exists"]) + finally: + if os.path.exists(file_path): + os.remove(file_path) + + +def test_get_all_timeseries_schemas(): + file_path = "get_all_timeseries_schema.tsfile" + device_ids = [ + "root.db1.t1", + "root.db2.t1", + "root.db3.t2.t3", + "root.db3.t3", + "device", + "device.ln", + "device2.ln1.tmp", + "device3.ln2.tmp.v1.v2", + "device3.ln2.tmp.v1.v3", + ] + measurement_ids1 = ["temperature", "hudi", "level"] + measurement_ids2 = ["level", "vol"] + rows_per_device = 2 + + try: + writer = TsFileWriter(file_path) + for idx, device_id in enumerate(device_ids): + measurements = measurement_ids1 if idx % 2 == 0 else measurement_ids2 + for measurement in measurements: + writer.register_timeseries( + device_id, TimeseriesSchema(measurement, TSDataType.INT32) + ) + for ts in range(rows_per_device): + fields = [] + for measurement in measurements: + fields.append( + Field( + measurement, + idx * 100 + ts * 10 + len(fields), + TSDataType.INT32, + ) + ) + writer.write_row_record(RowRecord(device_id, ts, fields)) + writer.close() + + reader = TsFileReader(file_path) + device_schema_map = reader.get_all_timeseries_schemas() + expected_devices = {device_id.lower() for device_id in device_ids} + assert set(device_schema_map.keys()) == expected_devices + print(device_schema_map) + + for idx, device_id in enumerate(device_ids): + measurements = measurement_ids1 if idx % 2 == 0 else measurement_ids2 + normalized_device = device_id.lower() + assert normalized_device in device_schema_map + device_schema = device_schema_map[normalized_device] + assert device_schema.get_device_name() == normalized_device + timeseries_list = device_schema.get_timeseries_list() + assert len(timeseries_list) == len(measurements) + actual_measurements = { + ts_schema.get_timeseries_name() for ts_schema in timeseries_list + } + assert actual_measurements == {m.lower() for m in measurements} + for ts_schema in timeseries_list: + assert ts_schema.get_data_type() == TSDataType.INT32 + reader.close() + finally: + if os.path.exists(file_path): + os.remove(file_path) + + @pytest.mark.skip(reason="API not match") def test_tablet_write_and_read(): try: @@ -291,9 +519,9 @@ def test_tsfile_to_df(): df1 = to_dataframe("table_write_to_df.tsfile") assert df1.shape == (4097, 4) assert df1["value2"].sum() == 100 * (1 + 4096) / 2 * 4096 - assert df1["time"].dtype == np.int64 + assert is_integer_dtype(df1["time"]) assert df1["value"].dtype == np.float64 - assert df1["value2"].dtype == np.int64 + assert is_integer_dtype(df1["value2"]) df2 = to_dataframe("table_write_to_df.tsfile", column_names=["device", "value2"]) assert df2.shape == (4097, 3) assert df1["value2"].equals(df2["value2"]) diff --git a/python/tsfile/constants.py b/python/tsfile/constants.py index 5eaa24700..72ac434bf 100644 --- a/python/tsfile/constants.py +++ b/python/tsfile/constants.py @@ -53,9 +53,9 @@ def to_pandas_dtype(self): if self == TSDataType.BOOLEAN: return "bool" elif self == TSDataType.INT32: - return "int32" + return "Int32" elif self == TSDataType.INT64: - return "int64" + return "Int64" elif self == TSDataType.FLOAT: return "float32" elif self == TSDataType.DOUBLE: diff --git a/python/tsfile/schema.py b/python/tsfile/schema.py index ae7960c5e..3aa1313cd 100644 --- a/python/tsfile/schema.py +++ b/python/tsfile/schema.py @@ -49,6 +49,10 @@ def get_encoding_type(self): def get_compression_type(self): return self.compression_type + def __repr__(self): + return f"TimeseriesSchema({self.timeseries_name}, {self.data_type.name}, {self.encoding_type.name}, {self.compression_type.name})" + + class DeviceSchema: """Represents a device entity containing multiple time series.""" @@ -66,6 +70,8 @@ def get_device_name(self): def get_timeseries_list(self): return self.timeseries_list + def __repr__(self): + return f"DeviceSchema({self.device_name}, {self.timeseries_list})" class ColumnSchema: """Defines schema for a table column (name, datatype, category).""" diff --git a/python/tsfile/tsfile_cpp.pxd b/python/tsfile/tsfile_cpp.pxd index 1b04051c9..d35be96e3 100644 --- a/python/tsfile/tsfile_cpp.pxd +++ b/python/tsfile/tsfile_cpp.pxd @@ -167,6 +167,12 @@ cdef extern from "./tsfile_cwrapper.h": const char * table_name, const char** columns, uint32_t column_num, int64_t start_time, int64_t end_time, ErrorCode *err_code) + + ResultSet tsfile_query_table_on_tree(TsFileReader reader, + char** columns, uint32_t column_num, + int64_t start_time, int64_t end_time, + ErrorCode* err_code); + ResultSet _tsfile_reader_query_device(TsFileReader reader, const char *device_name, char ** sensor_name, uint32_t sensor_num, @@ -177,6 +183,8 @@ cdef extern from "./tsfile_cwrapper.h": TableSchema * tsfile_reader_get_all_table_schemas(TsFileReader reader, uint32_t * size); + DeviceSchema * tsfile_reader_get_all_timeseries_schemas(TsFileReader reader, + uint32_t * size); # resultSet : get data from resultSet bint tsfile_result_set_next(ResultSet result_set, ErrorCode * err_code); diff --git a/python/tsfile/tsfile_py_cpp.pxd b/python/tsfile/tsfile_py_cpp.pxd index ce907a796..e44bb588d 100644 --- a/python/tsfile/tsfile_py_cpp.pxd +++ b/python/tsfile/tsfile_py_cpp.pxd @@ -49,9 +49,12 @@ cdef public api ErrorCode tsfile_writer_register_table_py_cpp(TsFileWriter write cdef public api bint tsfile_result_set_is_null_by_name_c(ResultSet result_set, object name) cdef public api ResultSet tsfile_reader_query_table_c(TsFileReader reader, object table_name, object column_list, int64_t start_time, int64_t end_time) +cdef public api ResultSet tsfile_reader_query_table_on_tree_c(TsFileReader reader, object column_list, + int64_t start_time, int64_t end_time) cdef public api ResultSet tsfile_reader_query_paths_c(TsFileReader reader, object device_name, object sensor_list, int64_t start_time, int64_t end_time) cdef public api object get_table_schema(TsFileReader reader, object table_name) cdef public api object get_all_table_schema(TsFileReader reader) +cdef public api object get_all_timeseries_schema(TsFileReader reader) cpdef public api object get_tsfile_config() cpdef public api void set_tsfile_config(dict new_config) \ No newline at end of file diff --git a/python/tsfile/tsfile_py_cpp.pyx b/python/tsfile/tsfile_py_cpp.pyx index e17430399..7a9aa889e 100644 --- a/python/tsfile/tsfile_py_cpp.pyx +++ b/python/tsfile/tsfile_py_cpp.pyx @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. # - #cython: language_level=3 from .tsfile_cpp cimport * @@ -32,9 +31,10 @@ from tsfile.schema import ResultSetMetaData as ResultSetMetaDataPy from tsfile.schema import TSDataType as TSDataTypePy, TSEncoding as TSEncodingPy from tsfile.schema import Compressor as CompressorPy, ColumnCategory as CategoryPy from tsfile.schema import TableSchema as TableSchemaPy, ColumnSchema as ColumnSchemaPy +from tsfile.schema import DeviceSchema as DeviceSchemaPy, TimeseriesSchema as TimeseriesSchemaPy # check exception and set py exception object -cdef inline void check_error(int errcode, const char* context=NULL) except *: +cdef inline void check_error(int errcode, const char * context=NULL) except*: cdef: object exc_type object exc_instance @@ -74,7 +74,21 @@ cdef object from_c_table_schema(TableSchema schema): free_c_table_schema(&schema) return TableSchemaPy(table_name, columns) +cdef object from_c_timeseries_schema(TimeseriesSchema schema): + timeseries_name = schema.timeseries_name.decode('utf-8') + data_type = TSDataTypePy(schema.data_type) + encoding = TSEncodingPy(schema.encoding) + compression = CompressorPy(schema.compression) + return TimeseriesSchemaPy(timeseries_name, data_type, encoding, compression) +cdef object from_c_device_schema(DeviceSchema schema): + cdef int i + device_name = schema.device_name.decode('utf-8') + timeseries = [] + for i in range(schema.timeseries_num): + timeseries.append(from_c_timeseries_schema(schema.timeseries_schema[i])) + free_c_device_schema(&schema) + return DeviceSchemaPy(device_name, timeseries) # Convert from python to c struct cdef dict TS_DATA_TYPE_MAP = { @@ -128,7 +142,6 @@ cdef ColumnCategory to_c_category_type(object category): except KeyError: raise ValueError(f"Unsupported Python Column Category: {category}") - cdef TSEncoding to_c_encoding_type(object encoding_type): try: return TS_ENCODING_MAP[encoding_type] @@ -141,8 +154,8 @@ cdef CompressionType to_c_compression_type(object compression_type): except KeyError: raise ValueError(f"Unsupported Python Compressor: {compression_type}") -cdef TimeseriesSchema* to_c_timeseries_schema(object py_schema): - cdef TimeseriesSchema* c_schema +cdef TimeseriesSchema * to_c_timeseries_schema(object py_schema): + cdef TimeseriesSchema * c_schema c_schema = malloc(sizeof(TimeseriesSchema)) c_schema.timeseries_name = strdup(py_schema.timeseries_name.encode('utf-8')) if py_schema.data_type is not None: @@ -159,30 +172,30 @@ cdef TimeseriesSchema* to_c_timeseries_schema(object py_schema): raise ValueError("compression_type cannot be None") return c_schema - -cdef DeviceSchema* to_c_device_schema(object py_schema): - cdef DeviceSchema* c_schema +cdef DeviceSchema * to_c_device_schema(object py_schema): + cdef DeviceSchema * c_schema c_schema = malloc(sizeof(DeviceSchema)) c_schema.device_name = strdup(py_schema.device_name.encode('utf-8')) c_schema.timeseries_num = len(py_schema.timeseries_list) c_schema.timeseries_schema = malloc(c_schema.timeseries_num * sizeof(TimeseriesSchema)) for i in range(c_schema.timeseries_num): - c_schema.timeseries_schema[i].timeseries_name = strdup(py_schema.timeseries_list[i].timeseries_name.encode('utf-8')) + c_schema.timeseries_schema[i].timeseries_name = strdup( + py_schema.timeseries_list[i].timeseries_name.encode('utf-8')) c_schema.timeseries_schema[i].data_type = to_c_data_type(py_schema.timeseries_list[i].data_type) c_schema.timeseries_schema[i].encoding = to_c_encoding_type(py_schema.timeseries_list[i].encoding_type) c_schema.timeseries_schema[i].compression = to_c_compression_type(py_schema.timeseries_list[i].compression_type) return c_schema -cdef ColumnSchema* to_c_column_schema(object py_schema): - cdef ColumnSchema* c_schema - c_schema = malloc(sizeof(ColumnSchema)) +cdef ColumnSchema * to_c_column_schema(object py_schema): + cdef ColumnSchema * c_schema + c_schema = malloc(sizeof(ColumnSchema)) c_schema.data_type = to_c_data_type(py_schema.data_type) c_schema.column_category = py_schema.category c_schema.column_name = strdup(py_schema.column_name.encode('utf-8')) return c_schema -cdef TableSchema* to_c_table_schema(object py_schema): - cdef TableSchema* c_schema +cdef TableSchema * to_c_table_schema(object py_schema): + cdef TableSchema * c_schema c_schema = malloc(sizeof(TableSchema)) c_schema.table_name = strdup(py_schema.table_name.encode('utf-8')) c_schema.column_num = len(py_schema.columns) @@ -193,7 +206,6 @@ cdef TableSchema* to_c_table_schema(object py_schema): c_schema.column_schemas[i].data_type = to_c_data_type(py_schema.columns[i].data_type) return c_schema - cdef Tablet to_c_tablet(object tablet): cdef Tablet ctablet cdef int max_row_num @@ -202,7 +214,7 @@ cdef Tablet to_c_tablet(object tablet): cdef bytes device_id_bytes cdef const char * device_id_c cdef char** columns_names - cdef TSDataType* column_types + cdef TSDataType * column_types cdef bytes row_bytes cdef const char *row_str @@ -222,7 +234,7 @@ cdef Tablet to_c_tablet(object tablet): max_row_num = tablet.get_max_row_num() ctablet = _tablet_new_with_target_name(device_id_c, columns_names, columns_types, column_num, - max_row_num) + max_row_num) free(columns_types) for i in range(column_num): free(columns_names[i]) @@ -275,15 +287,13 @@ cdef Tablet to_c_tablet(object tablet): row_str = PyBytes_AsString(row_bytes) tablet_add_value_by_index_string(ctablet, row, col, row_str) - return ctablet - cdef TsRecord to_c_record(object row_record): cdef int field_num = row_record.get_fields_num() - cdef int64_t timestamp = row_record.get_timestamp() + cdef int64_t timestamp = row_record.get_timestamp() cdef bytes device_id_bytes = PyUnicode_AsUTF8String(row_record.get_device_id()) - cdef const char* device_id = device_id_bytes + cdef const char * device_id = device_id_bytes cdef TsRecord record cdef int i cdef TSDataType data_type @@ -292,32 +302,37 @@ cdef TsRecord to_c_record(object row_record): field = row_record.get_fields()[i] data_type = to_c_data_type(field.get_data_type()) if data_type == TS_DATATYPE_BOOLEAN: - _insert_data_into_ts_record_by_name_bool(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_bool_value()) + _insert_data_into_ts_record_by_name_bool(record, PyUnicode_AsUTF8(field.get_field_name()), + field.get_bool_value()) elif data_type == TS_DATATYPE_INT32: - _insert_data_into_ts_record_by_name_int32_t(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_int_value()) + _insert_data_into_ts_record_by_name_int32_t(record, PyUnicode_AsUTF8(field.get_field_name()), + field.get_int_value()) elif data_type == TS_DATATYPE_INT64: - _insert_data_into_ts_record_by_name_int64_t(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_long_value()) + _insert_data_into_ts_record_by_name_int64_t(record, PyUnicode_AsUTF8(field.get_field_name()), + field.get_long_value()) elif data_type == TS_DATATYPE_DOUBLE: - _insert_data_into_ts_record_by_name_double(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_double_value()) + _insert_data_into_ts_record_by_name_double(record, PyUnicode_AsUTF8(field.get_field_name()), + field.get_double_value()) elif data_type == TS_DATATYPE_FLOAT: - _insert_data_into_ts_record_by_name_float(record, PyUnicode_AsUTF8(field.get_field_name()), field.get_float_value()) + _insert_data_into_ts_record_by_name_float(record, PyUnicode_AsUTF8(field.get_field_name()), + field.get_float_value()) return record # Free c structs' space -cdef void free_c_table_schema(TableSchema* c_schema): +cdef void free_c_table_schema(TableSchema * c_schema): free(c_schema.table_name) for i in range(c_schema.column_num): free_c_column_schema(&(c_schema.column_schemas[i])) free(c_schema.column_schemas) -cdef void free_c_column_schema(ColumnSchema* c_schema): +cdef void free_c_column_schema(ColumnSchema * c_schema): free(c_schema.column_name) -cdef void free_c_timeseries_schema(TimeseriesSchema* c_schema): +cdef void free_c_timeseries_schema(TimeseriesSchema * c_schema): free(c_schema.timeseries_name) -cdef void free_c_device_schema(DeviceSchema* c_schema): +cdef void free_c_device_schema(DeviceSchema * c_schema): free(c_schema.device_name) for i in range(c_schema.timeseries_num): free_c_timeseries_schema(&(c_schema.timeseries_schema[i])) @@ -334,7 +349,7 @@ cdef TsFileWriter tsfile_writer_new_c(object pathname, uint64_t memory_threshold cdef ErrorCode errno = 0 cdef TsFileWriter writer cdef bytes encoded_path = PyUnicode_AsUTF8String(pathname) - cdef const char* c_path = encoded_path + cdef const char * c_path = encoded_path writer = _tsfile_writer_new(c_path, memory_threshold, &errno) check_error(errno) return writer @@ -343,8 +358,8 @@ cdef TsFileReader tsfile_reader_new_c(object pathname) except +: cdef ErrorCode errno = 0 cdef TsFileReader reader cdef bytes encoded_path = PyUnicode_AsUTF8String(pathname) - cdef const char* c_path = encoded_path - reader = tsfile_reader_new(c_path, &errno) + cdef const char * c_path = encoded_path + reader = tsfile_reader_new(c_path, &errno) check_error(errno) return reader @@ -356,13 +371,13 @@ cpdef object get_tsfile_config(): "page_writer_max_memory_bytes_": g_config_value_.page_writer_max_memory_bytes_, "max_degree_of_index_node_": g_config_value_.max_degree_of_index_node_, "tsfile_index_bloom_filter_error_percent_": g_config_value_.tsfile_index_bloom_filter_error_percent_, - "time_encoding_type_":TSEncodingPy(int(g_config_value_.time_encoding_type_)), + "time_encoding_type_": TSEncodingPy(int(g_config_value_.time_encoding_type_)), "time_data_type_": TSDataTypePy(int(g_config_value_.time_data_type_)), "time_compress_type_": CompressorPy(int(g_config_value_.time_compress_type_)), "chunk_group_size_threshold_": g_config_value_.chunk_group_size_threshold_, - "record_count_for_next_mem_check_":g_config_value_.record_count_for_next_mem_check_, - "encrypt_flag_":g_config_value_.encrypt_flag_, - "boolean_encoding_type_":TSEncodingPy(int(g_config_value_.boolean_encoding_type_)), + "record_count_for_next_mem_check_": g_config_value_.record_count_for_next_mem_check_, + "encrypt_flag_": g_config_value_.encrypt_flag_, + "boolean_encoding_type_": TSEncodingPy(int(g_config_value_.boolean_encoding_type_)), "int32_encoding_type_": TSEncodingPy(int(g_config_value_.int32_encoding_type_)), "int64_encoding_type_": TSEncodingPy(int(g_config_value_.int64_encoding_type_)), "float_encoding_type_": TSEncodingPy(int(g_config_value_.float_encoding_type_)), @@ -371,7 +386,6 @@ cpdef object get_tsfile_config(): "default_compression_type_": CompressorPy(int(g_config_value_.default_compression_type_)), } - cpdef void set_tsfile_config(dict new_config): if "tsblock_mem_inc_step_size_" in new_config: _check_uint32(new_config["tsblock_mem_inc_step_size_"]) @@ -390,21 +404,22 @@ cpdef void set_tsfile_config(dict new_config): g_config_value_.max_degree_of_index_node_ = new_config["max_degree_of_index_node_"] if "tsfile_index_bloom_filter_error_percent_" in new_config: _check_double(new_config["tsfile_index_bloom_filter_error_percent_"]) - g_config_value_.tsfile_index_bloom_filter_error_percent_ = new_config["tsfile_index_bloom_filter_error_percent_"] + g_config_value_.tsfile_index_bloom_filter_error_percent_ = new_config[ + "tsfile_index_bloom_filter_error_percent_"] if "time_encoding_type_" in new_config: if not isinstance(new_config["time_encoding_type_"], TSEncodingPy): raise TypeError(f"Unsupported TSEncoding: {new_config['time_encoding_type_']}") - code = set_global_time_encoding((new_config["time_encoding_type_"].value)) + code = set_global_time_encoding( (new_config["time_encoding_type_"].value)) check_error(code) if "time_data_type_" in new_config: if not isinstance(new_config["time_data_type_"], TSDataTypePy): raise TypeError(f"Unsupported TSDataType: {new_config['time_data_type_']}") - code = set_global_time_data_type((new_config["time_data_type_"].value)) + code = set_global_time_data_type( (new_config["time_data_type_"].value)) check_error(code) if "time_compress_type_" in new_config: if not isinstance(new_config["time_compress_type_"], CompressorPy): raise TypeError(f"Unsupported Compressor: {new_config['time_compress_type_']}") - code = set_global_time_compression((new_config["time_compress_type_"].value)) + code = set_global_time_compression( (new_config["time_compress_type_"].value)) check_error(code) if "chunk_group_size_threshold_" in new_config: _check_uint32(new_config["chunk_group_size_threshold_"]) @@ -414,7 +429,7 @@ cpdef void set_tsfile_config(dict new_config): g_config_value_.record_count_for_next_mem_check_ = new_config["record_count_for_next_mem_check_"] if "encrypt_flag_" in new_config: _check_bool(new_config["encrypt_flag_"]) - g_config_value_.encrypt_flag_ = new_config["encrypt_flag_"] + g_config_value_.encrypt_flag_ = new_config["encrypt_flag_"] if "boolean_encoding_type_" in new_config: if not isinstance(new_config["boolean_encoding_type_"], TSEncodingPy): @@ -474,7 +489,7 @@ cdef ErrorCode tsfile_writer_register_timeseries_py_cpp(TsFileWriter writer, obj TimeseriesSchema *schema): cdef ErrorCode errno cdef bytes encoded_device_name = PyUnicode_AsUTF8String(device_name) - cdef const char* c_device_name = encoded_device_name + cdef const char * c_device_name = encoded_device_name errno = _tsfile_writer_register_timeseries(writer, c_device_name, schema) return errno @@ -485,73 +500,100 @@ cdef ErrorCode tsfile_writer_register_table_py_cpp(TsFileWriter writer, TableSch cdef bint tsfile_result_set_is_null_by_name_c(ResultSet result_set, object name): cdef bytes encoded_name = PyUnicode_AsUTF8String(name) - cdef const char* c_name = encoded_name + cdef const char * c_name = encoded_name return tsfile_result_set_is_null_by_name(result_set, c_name) cdef ResultSet tsfile_reader_query_table_c(TsFileReader reader, object table_name, object column_list, - int64_t start_time, int64_t end_time): + int64_t start_time, int64_t end_time): cdef ResultSet result cdef int column_num = len(column_list) cdef bytes table_name_bytes = PyUnicode_AsUTF8String(table_name) - cdef const char* table_name_c = table_name_bytes - cdef char** columns = malloc(sizeof(char*) * column_num) + cdef const char * table_name_c = table_name_bytes + cdef char** columns = malloc(sizeof(char *) * column_num) + cdef int i + cdef ErrorCode code = 0 + if columns == NULL: + raise MemoryError("Failed to allocate memory for columns") + try: + for i in range(column_num): + columns[i] = strdup(( column_list[i]).encode('utf-8')) + if columns[i] == NULL: + raise MemoryError("Failed to allocate memory for column name") + result = tsfile_query_table(reader, table_name_c, columns, column_num, start_time, end_time, &code) + check_error(code) + return result + finally: + if columns != NULL: + for i in range(column_num): + free( columns[i]) + columns[i] = NULL + free( columns) + columns = NULL + +cdef ResultSet tsfile_reader_query_table_on_tree_c(TsFileReader reader, object column_list, + int64_t start_time, int64_t end_time): + cdef ResultSet result + cdef int column_num = len(column_list) + cdef char** columns = malloc(sizeof(char *) * column_num) cdef int i cdef ErrorCode code = 0 if columns == NULL: raise MemoryError("Failed to allocate memory for columns") try: for i in range(column_num): - columns[i] = strdup((column_list[i]).encode('utf-8')) + columns[i] = strdup(( column_list[i]).encode('utf-8')) if columns[i] == NULL: raise MemoryError("Failed to allocate memory for column name") - result = tsfile_query_table(reader, table_name_c, columns, column_num, start_time, end_time, &code) + result = tsfile_query_table_on_tree(reader, columns, column_num, start_time, end_time, &code) check_error(code) return result finally: if columns != NULL: for i in range(column_num): - free(columns[i]) + free( columns[i]) columns[i] = NULL - free(columns) + free( columns) columns = NULL -cdef ResultSet tsfile_reader_query_paths_c(TsFileReader reader, object device_name, object sensor_list, int64_t start_time, - int64_t end_time): +cdef ResultSet tsfile_reader_query_paths_c(TsFileReader reader, object device_name, object sensor_list, + int64_t start_time, + int64_t end_time): cdef ResultSet result cdef int path_num = len(sensor_list) - cdef char** sensor_list_c = malloc(sizeof(char*) * path_num) + cdef char** sensor_list_c = malloc(sizeof(char *) * path_num) cdef bytes device_name_bytes = PyUnicode_AsUTF8String(device_name) - cdef const char* device_name_c = device_name_bytes + cdef const char * device_name_c = device_name_bytes cdef int i cdef ErrorCode code = 0 if sensor_list_c == NULL: raise MemoryError("Failed to allocate memory for paths") try: for i in range(path_num): - sensor_list_c[i] = strdup((sensor_list[i]).encode('utf-8')) + sensor_list_c[i] = strdup(( sensor_list[i]).encode('utf-8')) if sensor_list_c[i] == NULL: raise MemoryError("Failed to allocate memory for path") - result = _tsfile_reader_query_device(reader, device_name_c, sensor_list_c, path_num, start_time, end_time, &code) + result = _tsfile_reader_query_device(reader, device_name_c, sensor_list_c, path_num, start_time, end_time, + &code) check_error(code) return result finally: if sensor_list_c != NULL: for i in range(path_num): if sensor_list_c[i] != NULL: - free(sensor_list_c[i]) + free( sensor_list_c[i]) sensor_list_c[i] = NULL - free(sensor_list_c) + free( sensor_list_c) sensor_list_c = NULL cdef object get_table_schema(TsFileReader reader, object table_name): cdef bytes table_name_bytes = PyUnicode_AsUTF8String(table_name) - cdef const char* table_name_c = table_name_bytes + cdef const char * table_name_c = table_name_bytes cdef TableSchema schema = tsfile_reader_get_table_schema(reader, table_name_c) return from_c_table_schema(schema) cdef object get_all_table_schema(TsFileReader reader): cdef uint32_t table_num = 0 - cdef TableSchema* schemas + cdef TableSchema * schemas cdef int i table_schemas = {} @@ -562,3 +604,15 @@ cdef object get_all_table_schema(TsFileReader reader): free(schemas) return table_schemas +cdef object get_all_timeseries_schema(TsFileReader reader): + cdef uint32_t device_num = 0 + cdef DeviceSchema * schemas + cdef int i + + device_schemas = {} + schemas = tsfile_reader_get_all_timeseries_schemas(reader, &device_num) + for i in range(device_num): + schema_py = from_c_device_schema(schemas[i]) + device_schemas.update([(schema_py.get_device_name(), schema_py)]) + free(schemas) + return device_schemas diff --git a/python/tsfile/tsfile_reader.pyx b/python/tsfile/tsfile_reader.pyx index e8d38d7df..6cc6b0042 100644 --- a/python/tsfile/tsfile_reader.pyx +++ b/python/tsfile/tsfile_reader.pyx @@ -172,7 +172,7 @@ cdef class ResultSetPy: return tsfile_result_set_get_value_by_index_double(self.result, index) elif data_type == TSDataTypePy.BOOLEAN: return tsfile_result_set_get_value_by_index_bool(self.result, index) - elif data_type == TSDataTypePy.STRING: + elif data_type == TSDataTypePy.STRING or data_type == TSDataTypePy.TEXT: try: string = tsfile_result_set_get_value_by_index_string(self.result, index) py_str = string.decode('utf-8') @@ -291,6 +291,21 @@ cdef class TsFileReaderPy: pyresult.init_c(result, table_name) self.activate_result_set_list.add(pyresult) return pyresult + + def query_table_on_tree(self, column_names : List[str], + start_time : int = INT64_MIN, end_time : int = INT64_MAX) -> ResultSetPy: + """ + Execute a time range query on specified columns on tree structure. + :return: query result handler. + """ + cdef ResultSet result; + result = tsfile_reader_query_table_on_tree_c(self.reader, + [column_name.lower() for column_name in column_names], start_time, + end_time) + pyresult = ResultSetPy(self, True) + pyresult.init_c(result, "root") + self.activate_result_set_list.add(pyresult) + return pyresult def query_timeseries(self, device_name : str, sensor_list : List[str], start_time : int = 0, end_time : int = 0) -> ResultSetPy: @@ -324,6 +339,12 @@ cdef class TsFileReaderPy: """ return get_all_table_schema(self.reader) + def get_all_timeseries_schemas(self): + """ + Get all timeseries schemas + """ + return get_all_timeseries_schema(self.reader) + def close(self): """ Close TsFile Reader, if reader has result sets, invalid them. diff --git a/python/tsfile/utils.py b/python/tsfile/utils.py index 3d2366061..1d6f1afc5 100644 --- a/python/tsfile/utils.py +++ b/python/tsfile/utils.py @@ -15,7 +15,10 @@ # specific language governing permissions and limitations # under the License. # +from typing import Iterator, Union +from typing import Optional +import numpy as np import pandas as pd from tsfile.exceptions import TableNotExistError, ColumnNotExistError @@ -23,45 +26,77 @@ def to_dataframe(file_path: str, - table_name: str = None, - column_names: list[str] = None, - max_row_num: int = None) -> pd.DataFrame: - with TsFileReaderPy(file_path) as reader: - total_rows = 0 - table_schema = reader.get_all_table_schemas() - if len(table_schema) == 0: - raise TableNotExistError("Not found any table in the TsFile.") - if table_name is None: - # get the first table name by default - table_name, columns = next(iter(table_schema.items())) - else: - if table_name not in table_schema: - raise TableNotExistError(table_name) - columns = table_schema[table_name] + table_name: Optional[str] = None, + column_names: Optional[list[str]] = None, + start_time: Optional[int] = None, + end_time: Optional[int] = None, + max_row_num: Optional[int] = None, + as_iterator: bool = False) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: + def _gen(is_iterator: bool) -> Iterator[pd.DataFrame]: + _table_name = table_name + _column_names = column_names + _start_time = start_time if start_time is not None else np.iinfo(np.int64).min + _end_time = end_time if end_time is not None else np.iinfo(np.int64).max - column_names_in_file = columns.get_column_names() + with TsFileReaderPy(file_path) as reader: + total_rows = 0 + table_schema = reader.get_all_table_schemas() - if column_names is not None: - for column in column_names: - if column not in column_names_in_file: - raise ColumnNotExistError(column) - else: - column_names = column_names_in_file + is_tree_model = len(table_schema) == 0 + + if is_tree_model: + if _column_names is None: + print("columns name is None, return all columns") + else: + if _table_name is None: + _table_name, columns = next(iter(table_schema.items())) + else: + if _table_name not in table_schema: + raise TableNotExistError(_table_name) + columns = table_schema[_table_name] - df_list: list[pd.DataFrame] = [] + column_names_in_file = columns.get_column_names() - with reader.query_table(table_name, column_names) as result: - while result.next(): - if max_row_num is not None: - remaining_rows = max_row_num - total_rows - if remaining_rows <= 0: - break + if _column_names is not None: + for column in _column_names: + if column not in column_names_in_file: + raise ColumnNotExistError(column) + else: + _column_names = column_names_in_file + + if is_tree_model: + if _column_names is None: + _column_names = [] + query_result = reader.query_table_on_tree(_column_names, _start_time, _end_time) + else: + query_result = reader.query_table(_table_name, _column_names, _start_time, _end_time) + + with query_result as result: + while result.next(): + if max_row_num is None: + df = result.read_data_frame() + elif is_iterator: + df = result.read_data_frame(max_row_num) else: - batch_rows = min(remaining_rows, 1024) - df = result.read_data_frame(batch_rows) + remaining_rows = max_row_num - total_rows + if remaining_rows <= 0: + break + df = result.read_data_frame(remaining_rows) + if df is None or df.empty: + continue total_rows += len(df) - else: - df = result.read_data_frame() - df_list.append(df) - df = pd.concat(df_list, ignore_index=True) - return df + yield df + if (not is_iterator) and max_row_num is not None and total_rows >= max_row_num: + break + + if as_iterator: + return _gen(True) + else: + df_list = list(_gen(False)) + if df_list: + df = pd.concat(df_list, ignore_index=True) + if max_row_num is not None and len(df) > max_row_num: + df = df.iloc[:max_row_num] + return df + else: + return pd.DataFrame()