/root/doris/be/src/exec/scan/scanner.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <stdint.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <atomic> |
24 | | #include <vector> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/block/block.h" |
28 | | #include "runtime/exec_env.h" |
29 | | #include "runtime/runtime_state.h" |
30 | | #include "storage/tablet/tablet.h" |
31 | | #include "util/stopwatch.hpp" |
32 | | |
33 | | namespace doris { |
34 | | class RuntimeProfile; |
35 | | class TupleDescriptor; |
36 | | |
37 | | class VExprContext; |
38 | | |
39 | | class ScanLocalStateBase; |
40 | | } // namespace doris |
41 | | |
42 | | namespace doris { |
43 | | |
44 | | // Counter for load |
45 | | struct ScannerCounter { |
46 | 29 | ScannerCounter() : num_rows_filtered(0), num_rows_unselected(0) {} |
47 | | |
48 | | int64_t num_rows_filtered; // unqualified rows (unmatched the dest schema, or no partition) |
49 | | int64_t num_rows_unselected; // rows filtered by predicates |
50 | | }; |
51 | | |
52 | | class Scanner { |
53 | | public: |
54 | | Scanner(RuntimeState* state, ScanLocalStateBase* local_state, int64_t limit, |
55 | | RuntimeProfile* profile); |
56 | | |
57 | | //only used for FileScanner read one line. |
58 | | Scanner(RuntimeState* state, RuntimeProfile* profile) |
59 | 15 | : _state(state), _limit(1), _profile(profile), _total_rf_num(0), _has_prepared(false) { |
60 | 15 | DorisMetrics::instance()->scanner_cnt->increment(1); |
61 | 15 | }; |
62 | | |
63 | 27 | virtual ~Scanner() { |
64 | 27 | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_state->query_mem_tracker()); |
65 | 27 | _input_block.clear(); |
66 | 27 | _conjuncts.clear(); |
67 | 27 | _projections.clear(); |
68 | 27 | _origin_block.clear(); |
69 | 27 | _common_expr_ctxs_push_down.clear(); |
70 | 27 | DorisMetrics::instance()->scanner_cnt->increment(-1); |
71 | 27 | } |
72 | | |
73 | | virtual Status init(RuntimeState* state, const VExprContextSPtrs& conjuncts); |
74 | 0 | virtual Status prepare() { |
75 | 0 | _has_prepared = true; |
76 | 0 | return Status::OK(); |
77 | 0 | } |
78 | | |
79 | 0 | Status open(RuntimeState* state) { |
80 | 0 | SCOPED_RAW_TIMER(&_per_scanner_timer); |
81 | 0 | return _open_impl(state); |
82 | 0 | } |
83 | | |
84 | | Status get_block(RuntimeState* state, Block* block, bool* eos); |
85 | | Status get_block_after_projects(RuntimeState* state, Block* block, bool* eos); |
86 | | |
87 | | virtual Status close(RuntimeState* state); |
88 | | |
89 | | // Try to stop scanner, and all running readers. |
90 | 0 | virtual void try_stop() { _should_stop = true; }; |
91 | | |
92 | 0 | virtual std::string get_name() { return ""; } |
93 | | |
94 | | // return the readable name of current scan range. |
95 | | // eg, for file scanner, return the current file path. |
96 | 0 | virtual std::string get_current_scan_range_name() { return "not implemented"; } |
97 | | |
98 | | protected: |
99 | 0 | virtual Status _open_impl(RuntimeState* state) { |
100 | 0 | _block_avg_bytes = state->batch_size() * 8; |
101 | 0 | return Status::OK(); |
102 | 0 | } |
103 | | |
104 | | // Subclass should implement this to return data. |
105 | | virtual Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) = 0; |
106 | | |
107 | 0 | Status _merge_padding_block() { |
108 | 0 | if (_padding_block.empty()) { |
109 | 0 | _padding_block.swap(_origin_block); |
110 | 0 | } else if (_origin_block.rows()) { |
111 | 0 | RETURN_IF_ERROR( |
112 | 0 | MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block)); |
113 | 0 | } |
114 | 0 | return Status::OK(); |
115 | 0 | } |
116 | | |
117 | | // Update the counters before closing this scanner |
118 | | virtual void _collect_profile_before_close(); |
119 | | |
120 | | // Check if scanner is already closed, if not, mark it as closed. |
121 | | // Returns true if the scanner was successfully marked as closed (first time). |
122 | | // Returns false if the scanner was already closed. |
123 | | bool _try_close(); |
124 | | |
125 | | // Filter the output block finally. |
126 | | Status _filter_output_block(Block* block); |
127 | | |
128 | | Status _do_projections(Block* origin_block, Block* output_block); |
129 | | |
130 | | public: |
131 | 0 | int64_t get_time_cost_ns() const { return _per_scanner_timer; } |
132 | | |
133 | 0 | int64_t projection_time() const { return _projection_timer; } |
134 | 0 | int64_t get_rows_read() const { return _num_rows_read; } |
135 | | |
136 | 0 | bool has_prepared() const { return _has_prepared; } |
137 | | |
138 | | Status try_append_late_arrival_runtime_filter(); |
139 | | |
140 | | // Call start_wait_worker_timer() when submit the scanner to the thread pool. |
141 | | // And call update_wait_worker_timer() when it is actually being executed. |
142 | 0 | void start_wait_worker_timer() { |
143 | 0 | _watch.reset(); |
144 | 0 | _watch.start(); |
145 | 0 | } |
146 | | |
147 | 0 | void start_scan_cpu_timer() { |
148 | 0 | _cpu_watch.reset(); |
149 | 0 | _cpu_watch.start(); |
150 | 0 | } |
151 | | |
152 | 0 | void update_wait_worker_timer() { _scanner_wait_worker_timer += _watch.elapsed_time(); } |
153 | | |
154 | 0 | int64_t get_scanner_wait_worker_timer() const { return _scanner_wait_worker_timer; } |
155 | | |
156 | | void update_scan_cpu_timer(); |
157 | | |
158 | | // Some counters need to be updated realtime, for example, workload group policy need |
159 | | // scan bytes to cancel the query exceed limit. |
160 | 0 | virtual void update_realtime_counters() {} |
161 | | |
162 | 290 | RuntimeState* runtime_state() { return _state; } |
163 | | |
164 | 0 | bool is_open() const { return _is_open; } |
165 | 0 | void set_opened() { _is_open = true; } |
166 | | |
167 | 0 | virtual doris::TabletStorageType get_storage_type() { |
168 | 0 | return doris::TabletStorageType::STORAGE_TYPE_REMOTE; |
169 | 0 | } |
170 | | |
171 | 0 | bool need_to_close() const { return _need_to_close; } |
172 | | |
173 | 0 | void mark_to_need_to_close() { |
174 | | // If the scanner is failed during init or open, then not need update counters |
175 | | // because the query is fail and the counter is useless. And it may core during |
176 | | // update counters. For example, update counters depend on scanner's tablet, but |
177 | | // the tablet == null when init failed. |
178 | 0 | if (_is_open) { |
179 | 0 | _collect_profile_before_close(); |
180 | 0 | } |
181 | 0 | _need_to_close = true; |
182 | 0 | } |
183 | | |
184 | 0 | void set_status_on_failure(const Status& st) { _status = st; } |
185 | | |
186 | 0 | int64_t limit() const { return _limit; } |
187 | | |
188 | 0 | auto get_block_avg_bytes() const { return _block_avg_bytes; } |
189 | | |
190 | 0 | void update_block_avg_bytes(size_t block_avg_bytes) { _block_avg_bytes = block_avg_bytes; } |
191 | | |
192 | | protected: |
193 | | RuntimeState* _state = nullptr; |
194 | | ScanLocalStateBase* _local_state = nullptr; |
195 | | |
196 | | // Set if scan node has sort limit info |
197 | | int64_t _limit = -1; |
198 | | |
199 | | RuntimeProfile* _profile = nullptr; |
200 | | |
201 | | const TupleDescriptor* _output_tuple_desc = nullptr; |
202 | | const RowDescriptor* _output_row_descriptor = nullptr; |
203 | | |
204 | | // If _input_tuple_desc is set, the scanner will read data into |
205 | | // this _input_block first, then convert to the output block. |
206 | | Block _input_block; |
207 | | |
208 | | bool _is_open = false; |
209 | | std::atomic<bool> _is_closed {false}; |
210 | | bool _need_to_close = false; |
211 | | Status _status; |
212 | | |
213 | | // If _applied_rf_num == _total_rf_num |
214 | | // means all runtime filters are arrived and applied. |
215 | | int _applied_rf_num = 0; |
216 | | int _total_rf_num = 0; |
217 | | // Cloned from _conjuncts of scan node. |
218 | | // It includes predicate in SQL and runtime filters. |
219 | | VExprContextSPtrs _conjuncts; |
220 | | VExprContextSPtrs _projections; |
221 | | // Used in common subexpression elimination to compute intermediate results. |
222 | | std::vector<VExprContextSPtrs> _intermediate_projections; |
223 | | Block _origin_block; |
224 | | Block _padding_block; |
225 | | bool _alreay_eos = false; |
226 | | |
227 | | VExprContextSPtrs _common_expr_ctxs_push_down; |
228 | | |
229 | | // num of rows read from scanner |
230 | | int64_t _num_rows_read = 0; |
231 | | |
232 | | int64_t _num_byte_read = 0; |
233 | | |
234 | | // num of rows return from scanner, after filter block |
235 | | int64_t _num_rows_return = 0; |
236 | | |
237 | | size_t _block_avg_bytes = 0; |
238 | | |
239 | | // Set true after counter is updated finally |
240 | | bool _has_updated_counter = false; |
241 | | |
242 | | // watch to count the time wait for scanner thread |
243 | | MonotonicStopWatch _watch; |
244 | | // Do not use ScopedTimer. There is no guarantee that, the counter |
245 | | ThreadCpuStopWatch _cpu_watch; |
246 | | int64_t _scanner_wait_worker_timer = 0; |
247 | | int64_t _scan_cpu_timer = 0; |
248 | | |
249 | | bool _is_load = false; |
250 | | |
251 | | bool _has_prepared = false; |
252 | | |
253 | | ScannerCounter _counter; |
254 | | int64_t _per_scanner_timer = 0; |
255 | | int64_t _projection_timer = 0; |
256 | | |
257 | | bool _should_stop = false; |
258 | | }; |
259 | | |
260 | | using ScannerSPtr = std::shared_ptr<Scanner>; |
261 | | |
262 | | } // namespace doris |