/root/doris/be/src/exec/scan/scanner.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <stdint.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <vector> |
24 | | |
25 | | #include "common/status.h" |
26 | | #include "core/block/block.h" |
27 | | #include "runtime/exec_env.h" |
28 | | #include "runtime/runtime_state.h" |
29 | | #include "storage/tablet/tablet.h" |
30 | | #include "util/stopwatch.hpp" |
31 | | |
32 | | namespace doris { |
33 | | class RuntimeProfile; |
34 | | class TupleDescriptor; |
35 | | |
36 | | class VExprContext; |
37 | | |
38 | | class ScanLocalStateBase; |
39 | | } // namespace doris |
40 | | |
41 | | namespace doris { |
42 | | |
43 | | // Counter for load |
44 | | struct ScannerCounter { |
45 | 35 | ScannerCounter() : num_rows_filtered(0), num_rows_unselected(0) {} |
46 | | |
47 | | int64_t num_rows_filtered; // unqualified rows (unmatched the dest schema, or no partition) |
48 | | int64_t num_rows_unselected; // rows filtered by predicates |
49 | | }; |
50 | | |
51 | | class Scanner { |
52 | | public: |
53 | | Scanner(RuntimeState* state, ScanLocalStateBase* local_state, int64_t limit, |
54 | | RuntimeProfile* profile); |
55 | | |
56 | | //only used for FileScanner read one line. |
57 | | Scanner(RuntimeState* state, RuntimeProfile* profile) |
58 | 16 | : _state(state), _limit(1), _profile(profile), _total_rf_num(0), _has_prepared(false) { |
59 | 16 | DorisMetrics::instance()->scanner_cnt->increment(1); |
60 | 16 | }; |
61 | | |
62 | 31 | virtual ~Scanner() { |
63 | 31 | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_state->query_mem_tracker()); |
64 | 31 | _input_block.clear(); |
65 | 31 | _conjuncts.clear(); |
66 | 31 | _projections.clear(); |
67 | 31 | _origin_block.clear(); |
68 | 31 | _common_expr_ctxs_push_down.clear(); |
69 | 31 | DorisMetrics::instance()->scanner_cnt->increment(-1); |
70 | 31 | } |
71 | | |
72 | | virtual Status init(RuntimeState* state, const VExprContextSPtrs& conjuncts); |
73 | 0 | virtual Status prepare() { |
74 | 0 | _has_prepared = true; |
75 | 0 | return Status::OK(); |
76 | 0 | } |
77 | | |
78 | 0 | Status open(RuntimeState* state) { |
79 | 0 | SCOPED_RAW_TIMER(&_per_scanner_timer); |
80 | 0 | return _open_impl(state); |
81 | 0 | } |
82 | | |
83 | | Status get_block(RuntimeState* state, Block* block, bool* eos); |
84 | | Status get_block_after_projects(RuntimeState* state, Block* block, bool* eos); |
85 | | |
86 | | virtual Status close(RuntimeState* state); |
87 | | |
88 | | // Try to stop scanner, and all running readers. |
89 | 0 | virtual void try_stop() { _should_stop = true; }; |
90 | | |
91 | 0 | virtual std::string get_name() { return ""; } |
92 | | |
93 | | // return the readable name of current scan range. |
94 | | // eg, for file scanner, return the current file path. |
95 | 0 | virtual std::string get_current_scan_range_name() { return "not implemented"; } |
96 | | |
97 | | protected: |
98 | 0 | virtual Status _open_impl(RuntimeState* state) { |
99 | 0 | _block_avg_bytes = state->batch_size() * 8; |
100 | 0 | return Status::OK(); |
101 | 0 | } |
102 | | |
103 | | // Subclass should implement this to return data. |
104 | | virtual Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) = 0; |
105 | | |
106 | 0 | Status _merge_padding_block() { |
107 | 0 | if (_padding_block.empty()) { |
108 | 0 | _padding_block.swap(_origin_block); |
109 | 0 | } else if (_origin_block.rows()) { |
110 | 0 | RETURN_IF_ERROR( |
111 | 0 | MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block)); |
112 | 0 | } |
113 | 0 | return Status::OK(); |
114 | 0 | } |
115 | | |
116 | | // Update the counters before closing this scanner |
117 | | virtual void _collect_profile_before_close(); |
118 | | |
119 | | // Filter the output block finally. |
120 | | Status _filter_output_block(Block* block); |
121 | | |
122 | | Status _do_projections(Block* origin_block, Block* output_block); |
123 | | |
124 | | public: |
125 | 0 | int64_t get_time_cost_ns() const { return _per_scanner_timer; } |
126 | | |
127 | 0 | int64_t projection_time() const { return _projection_timer; } |
128 | 0 | int64_t get_rows_read() const { return _num_rows_read; } |
129 | | |
130 | 0 | bool has_prepared() const { return _has_prepared; } |
131 | | |
132 | | Status try_append_late_arrival_runtime_filter(); |
133 | | |
134 | | // Call start_wait_worker_timer() when submit the scanner to the thread pool. |
135 | | // And call update_wait_worker_timer() when it is actually being executed. |
136 | 0 | void start_wait_worker_timer() { |
137 | 0 | _watch.reset(); |
138 | 0 | _watch.start(); |
139 | 0 | } |
140 | | |
141 | 0 | void start_scan_cpu_timer() { |
142 | 0 | _cpu_watch.reset(); |
143 | 0 | _cpu_watch.start(); |
144 | 0 | } |
145 | | |
146 | 0 | void update_wait_worker_timer() { _scanner_wait_worker_timer += _watch.elapsed_time(); } |
147 | | |
148 | 0 | int64_t get_scanner_wait_worker_timer() const { return _scanner_wait_worker_timer; } |
149 | | |
150 | | void update_scan_cpu_timer(); |
151 | | |
152 | | // Some counters need to be updated realtime, for example, workload group policy need |
153 | | // scan bytes to cancel the query exceed limit. |
154 | 0 | virtual void update_realtime_counters() {} |
155 | | |
156 | 290 | RuntimeState* runtime_state() { return _state; } |
157 | | |
158 | 0 | bool is_open() const { return _is_open; } |
159 | 0 | void set_opened() { _is_open = true; } |
160 | | |
161 | 0 | virtual doris::TabletStorageType get_storage_type() { |
162 | 0 | return doris::TabletStorageType::STORAGE_TYPE_REMOTE; |
163 | 0 | } |
164 | | |
165 | 0 | bool need_to_close() const { return _need_to_close; } |
166 | | |
167 | 0 | void mark_to_need_to_close() { |
168 | | // If the scanner is failed during init or open, then not need update counters |
169 | | // because the query is fail and the counter is useless. And it may core during |
170 | | // update counters. For example, update counters depend on scanner's tablet, but |
171 | | // the tablet == null when init failed. |
172 | 0 | if (_is_open) { |
173 | 0 | _collect_profile_before_close(); |
174 | 0 | } |
175 | 0 | _need_to_close = true; |
176 | 0 | } |
177 | | |
178 | 0 | void set_status_on_failure(const Status& st) { _status = st; } |
179 | | |
180 | 0 | int64_t limit() const { return _limit; } |
181 | | |
182 | 0 | auto get_block_avg_bytes() const { return _block_avg_bytes; } |
183 | | |
184 | 0 | void update_block_avg_bytes(size_t block_avg_bytes) { _block_avg_bytes = block_avg_bytes; } |
185 | | |
186 | | protected: |
187 | | RuntimeState* _state = nullptr; |
188 | | ScanLocalStateBase* _local_state = nullptr; |
189 | | |
190 | | // Set if scan node has sort limit info |
191 | | int64_t _limit = -1; |
192 | | |
193 | | RuntimeProfile* _profile = nullptr; |
194 | | |
195 | | const TupleDescriptor* _output_tuple_desc = nullptr; |
196 | | const RowDescriptor* _output_row_descriptor = nullptr; |
197 | | |
198 | | // If _input_tuple_desc is set, the scanner will read data into |
199 | | // this _input_block first, then convert to the output block. |
200 | | Block _input_block; |
201 | | |
202 | | bool _is_open = false; |
203 | | bool _is_closed = false; |
204 | | bool _need_to_close = false; |
205 | | Status _status; |
206 | | |
207 | | // If _applied_rf_num == _total_rf_num |
208 | | // means all runtime filters are arrived and applied. |
209 | | int _applied_rf_num = 0; |
210 | | int _total_rf_num = 0; |
211 | | // Cloned from _conjuncts of scan node. |
212 | | // It includes predicate in SQL and runtime filters. |
213 | | VExprContextSPtrs _conjuncts; |
214 | | VExprContextSPtrs _projections; |
215 | | // Used in common subexpression elimination to compute intermediate results. |
216 | | std::vector<VExprContextSPtrs> _intermediate_projections; |
217 | | Block _origin_block; |
218 | | Block _padding_block; |
219 | | bool _alreay_eos = false; |
220 | | |
221 | | VExprContextSPtrs _common_expr_ctxs_push_down; |
222 | | |
223 | | // num of rows read from scanner |
224 | | int64_t _num_rows_read = 0; |
225 | | |
226 | | int64_t _num_byte_read = 0; |
227 | | |
228 | | // num of rows return from scanner, after filter block |
229 | | int64_t _num_rows_return = 0; |
230 | | |
231 | | size_t _block_avg_bytes = 0; |
232 | | |
233 | | // Set true after counter is updated finally |
234 | | bool _has_updated_counter = false; |
235 | | |
236 | | // watch to count the time wait for scanner thread |
237 | | MonotonicStopWatch _watch; |
238 | | // Do not use ScopedTimer. There is no guarantee that, the counter |
239 | | ThreadCpuStopWatch _cpu_watch; |
240 | | int64_t _scanner_wait_worker_timer = 0; |
241 | | int64_t _scan_cpu_timer = 0; |
242 | | |
243 | | bool _is_load = false; |
244 | | |
245 | | bool _has_prepared = false; |
246 | | |
247 | | ScannerCounter _counter; |
248 | | int64_t _per_scanner_timer = 0; |
249 | | int64_t _projection_timer = 0; |
250 | | |
251 | | bool _should_stop = false; |
252 | | }; |
253 | | |
254 | | using ScannerSPtr = std::shared_ptr<Scanner>; |
255 | | |
256 | | } // namespace doris |