-
Notifications
You must be signed in to change notification settings - Fork 506
ORC-9. Create a vector type for timestamp columns #5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -854,6 +854,8 @@ namespace orc { | |
|
|
||
| class ReaderImpl : public Reader { | ||
| private: | ||
| const int64_t epochOffset; | ||
|
|
||
| // inputs | ||
| std::unique_ptr<InputStream> stream; | ||
| ReaderOptions options; | ||
|
|
@@ -993,12 +995,27 @@ namespace orc { | |
| } | ||
| } | ||
|
|
||
| int64_t getEpochOffset() { | ||
| // Build the literal for the ORC epoch | ||
| // 2015 Jan 1 00:00:00 | ||
| struct tm epoch; | ||
| epoch.tm_sec = 0; | ||
| epoch.tm_min = 0; | ||
| epoch.tm_hour = 0; | ||
| epoch.tm_mday = 1; | ||
| epoch.tm_mon = 0; | ||
| epoch.tm_year = 2015 - 1900; | ||
| epoch.tm_isdst = 0; | ||
| return static_cast<int64_t>(mktime(&epoch)); | ||
| } | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit confused: Why is the offset 115 years and 1 day, instead of 45 years (2015 - 1970)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is the representation of 1 Jan 2015 00:00:00, so it is converting that date into a time_t. Look at the man page for mktime.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess I should add a comment saying that :). |
||
| ReaderImpl::ReaderImpl(std::unique_ptr<InputStream> input, | ||
| const ReaderOptions& opts, | ||
| std::unique_ptr<proto::PostScript> _postscript, | ||
| std::unique_ptr<proto::Footer> _footer, | ||
| uint64_t _footerStart | ||
| ): stream(std::move(input)), | ||
| ): epochOffset(getEpochOffset()), | ||
| stream(std::move(input)), | ||
| options(opts), | ||
| footerStart(_footerStart), | ||
| memoryPool(*opts.getMemoryPool()), | ||
|
|
@@ -1334,13 +1351,15 @@ namespace orc { | |
| const uint64_t stripeStart; | ||
| InputStream& input; | ||
| MemoryPool& memoryPool; | ||
| const int64_t epochOffset; | ||
|
|
||
| public: | ||
| StripeStreamsImpl(const ReaderImpl& reader, | ||
| const proto::StripeFooter& footer, | ||
| uint64_t stripeStart, | ||
| InputStream& input, | ||
| MemoryPool& memoryPool); | ||
| MemoryPool& memoryPool, | ||
| int64_t epochOffset); | ||
|
|
||
| virtual ~StripeStreamsImpl(); | ||
|
|
||
|
|
@@ -1356,18 +1375,22 @@ namespace orc { | |
| bool shouldStream) const override; | ||
|
|
||
| MemoryPool& getMemoryPool() const override; | ||
|
|
||
| int64_t getEpochOffset() const override; | ||
| }; | ||
|
|
||
| StripeStreamsImpl::StripeStreamsImpl(const ReaderImpl& _reader, | ||
| const proto::StripeFooter& _footer, | ||
| uint64_t _stripeStart, | ||
| InputStream& _input, | ||
| MemoryPool& _memoryPool | ||
| MemoryPool& _memoryPool, | ||
| int64_t _epochOffset | ||
| ): reader(_reader), | ||
| footer(_footer), | ||
| stripeStart(_stripeStart), | ||
| input(_input), | ||
| memoryPool(_memoryPool) { | ||
| memoryPool(_memoryPool), | ||
| epochOffset(_epochOffset) { | ||
| // PASS | ||
| } | ||
|
|
||
|
|
@@ -1383,10 +1406,15 @@ namespace orc { | |
| return reader.getSelectedColumns(); | ||
| } | ||
|
|
||
| proto::ColumnEncoding StripeStreamsImpl::getEncoding(int64_t columnId) const { | ||
| proto::ColumnEncoding StripeStreamsImpl::getEncoding(int64_t columnId | ||
| ) const { | ||
| return footer.columns(static_cast<int>(columnId)); | ||
| } | ||
|
|
||
| int64_t StripeStreamsImpl::getEpochOffset() const { | ||
| return epochOffset; | ||
| } | ||
|
|
||
| std::unique_ptr<SeekableInputStream> | ||
| StripeStreamsImpl::getStream(int64_t columnId, | ||
| proto::Stream_Kind kind, | ||
|
|
@@ -1426,7 +1454,8 @@ namespace orc { | |
| StripeStreamsImpl stripeStreams(*this, currentStripeFooter, | ||
| currentStripeInfo.offset(), | ||
| *(stream.get()), | ||
| memoryPool); | ||
| memoryPool, | ||
| epochOffset); | ||
| reader = buildReader(*(schema.get()), stripeStreams); | ||
| } | ||
|
|
||
|
|
@@ -1479,7 +1508,6 @@ namespace orc { | |
| case SHORT: | ||
| case INT: | ||
| case LONG: | ||
| case TIMESTAMP: | ||
| case DATE: | ||
| result = new LongVectorBatch(capacity, memoryPool); | ||
| break; | ||
|
|
@@ -1493,6 +1521,9 @@ namespace orc { | |
| case VARCHAR: | ||
| result = new StringVectorBatch(capacity, memoryPool); | ||
| break; | ||
| case TIMESTAMP: | ||
| result = new TimestampVectorBatch(capacity, memoryPool); | ||
| break; | ||
| case STRUCT: | ||
| result = new StructVectorBatch(capacity, memoryPool); | ||
| for(uint64_t i=0; i < type.getSubtypeCount(); ++i) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ReaderImpl::epochOffset is not used (we seem to call getEpochOffset() everywhere).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The ColumnReader uses StripeStreamsImpl::getEpochOffset() and the implementation of that method uses ReaderImpl::epochOffset. The initializer for ReaderImpl::epochOffset uses the static method getEpochOffset, but it should only be called once per a reader.