-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Add cpu and io indicates to audit log #531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| } | ||
| } | ||
|
|
||
| private boolean hasLimit(PlanNode planNode) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it should judge limit by it's immediate parent. i will fix it.
gensrc/proto/data.proto
Outdated
|
|
||
| message PQueryStatistic { | ||
| optional int64 cpu_by_row = 1; | ||
| optional int64 io_by_byte = 2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| optional int64 io_by_byte = 2; | |
| optional int64 scan_rows = 2; |
gensrc/proto/data.proto
Outdated
| package doris; | ||
|
|
||
| message PQueryStatistic { | ||
| optional int64 cpu_by_row = 1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
process_rows
| @Protobuf(order = 1, required = false) | ||
| public long cpu; | ||
| @Protobuf(order = 2, required = false) | ||
| public long io; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| public long io; | |
| public long scanRows; |
| @ProtobufClass | ||
| public class PQueryStatistic { | ||
| @Protobuf(order = 1, required = false) | ||
| public long cpu; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
processRows
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i will fix it.
| || sink.output_partition.type == TPartitionType::RANGE_PARTITIONED); | ||
| // TODO: use something like google3's linked_ptr here (scoped_ptr isn't copyable) | ||
| for (int i = 0; i < destinations.size(); ++i) { | ||
| bool is_transfer_chain = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| bool is_transfer_chain = false; | |
| bool is_transfer_stats = (i == 0); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i will modify it.
be/src/service/internal_service.cpp
Outdated
| eos ? nullptr : &done); | ||
| } | ||
|
|
||
| if (request->has_query_statistic()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you should update query statistics in add_data function
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now add_data is used for add batch, when eos is true and request does't contain batch, add_data will not be called, but request may contains query statistics, so it is more simple to handle query statistics update separately.
| ctx.getAuditBuilder().put("time", elapseMs); | ||
| Preconditions.checkNotNull(queryStatistic); | ||
| ctx.getAuditBuilder().put("cpu", queryStatistic.getFormattingCpu()); | ||
| ctx.getAuditBuilder().put("io", queryStatistic.getFormattingIo()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use scanRows, processRows instead of cpu and io. because a man can't know what's io/cpu stand for
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i will fix it.
be/src/exec/analytic_eval_node.h
Outdated
| virtual Status prepare(RuntimeState* state); | ||
| virtual Status open(RuntimeState* state); | ||
| virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos); | ||
| virtual Status collect_query_statistics(QueryStatistics* statistics); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| virtual Status collect_query_statistics(QueryStatistics* statistics); | |
| Status collect_query_statistics(QueryStatistics* statistics) override; |
be/src/runtime/query_statistics.h
Outdated
|
|
||
| void add(const Statistics& other) { | ||
| process_rows += other.process_rows; | ||
| scan_bytes += other.scan_bytes; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
scan rows?
be/src/runtime/query_statistics.h
Outdated
|
|
||
| void serialize(PQueryStatistics* statistics) { | ||
| DCHECK(statistics != nullptr); | ||
| boost::lock_guard<SpinLock> l(_lock); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
boost::lock_guard -> std::lock_guard
be/src/exec/exchange_node.h
Outdated
| RuntimeProfile::Counter* _merge_rows_counter; | ||
|
|
||
| // Query statistics from sub plan. | ||
| boost::scoped_ptr<QueryStatistics> _sub_plan_statistics; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use std first
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
std does't include scoped_ptr.
be/src/runtime/query_statistics.h
Outdated
| } | ||
|
|
||
| long get_process_rows() { | ||
| boost::lock_guard<SpinLock> l(_lock); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you need a lock here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i will fix it.
| return statisticsForAuditLog; | ||
| } | ||
|
|
||
| public static class QueryStatistics { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can PQueryStatistics replace this class?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, i will replace this with PQueryStatistics.
| public final class RowBatch { | ||
| private TResultBatch batch; | ||
| private PQueryStatistics statistics; | ||
| private boolean eos; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not use batch.eos?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TResultBatch does't include EOS member, but it's wapper PFetchDataResult.
|
|
||
| private void setQueryStatisticsForAuditLog(RowBatch batch) { | ||
| if (batch != null) { | ||
| final PQueryStatistics statistics = batch.getQueryStatistics(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
batch.getQueryStatistics() may return nullptr?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i will fix it.
| ctx.getAuditBuilder().put("time", elapseMs); | ||
| Preconditions.checkNotNull(statistics); | ||
| ctx.getAuditBuilder().put("ScanRows", statistics.getFormattingScanRows()); | ||
| ctx.getAuditBuilder().put("ScanRawData", statistics.getFormattingScanBytes()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's good to format printing information in statistics.getFormattingScanRows.
It should be that caller formats returned value as its wanted format.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Now it is not convenient to support Format as function params, and now only one format.
be/src/runtime/query_statistics.h
Outdated
| private: | ||
|
|
||
| long scan_rows; | ||
| long scan_bytes; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
int64_t
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok , i will fix it.
| // This is responsible for collecting query statistics, usually it consists of | ||
| // two parts, one is current fragment or plan's statistics, the other is sub fragment | ||
| // or plan's statistics and QueryStatisticsRecvr is responsible for collecting it. | ||
| class QueryStatistics { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change it to struct
be/src/runtime/query_statistics.h
Outdated
| statistics->set_scan_bytes(scan_bytes); | ||
| } | ||
|
|
||
| void deserialize(const PQueryStatistics& statistics) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
from_pb
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i will modify it.
be/src/exec/exchange_node.cpp
Outdated
|
|
||
| Status ExchangeNode::collect_query_statistics(QueryStatistics* statistics) { | ||
| RETURN_IF_ERROR(ExecNode::collect_query_statistics(statistics)); | ||
| _sub_plan_query_statistics_recvr->add_to(statistics); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
statistics.merge()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, i will fix it.
| boolean isSendFields = false; | ||
| while ((batch = coord.getNext()) != null) { | ||
|
|
||
| while ((batch = coord.getNext()) != null && !batch.isEos()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
while (true) {
getNext(batch)
if (batch.batch != null)
if (batch.stats != null)
if (batch.batch.eos) {
break;
}
| while (iter != _query_statistics.end()) { | ||
| delete iter->second; | ||
| iter++; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
for (auto& it : _query_statistics) {
delete it.second;
}
| state->fragment_instance_id(), _id, | ||
| _num_senders, config::exchg_node_buffer_size_bytes, | ||
| state->runtime_profile(), _is_merging); | ||
| state->runtime_profile(), _is_merging, _sub_plan_query_statistics_recvr.get()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it is better to let _stream_recvr own _sub_plan_query_statistics_recvr, and this node use _stream_recvr.get_ststs_recvr() to use it.
Record query consumption into fe audit log. Its basic mode of work is as follows, one of instance of parent plan is responsible for accumulating sub plan's consumption and send to it's parent, BE coordinator will get total consumption because it's a single instance.