be/src/storage/iterator/block_reader.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <parallel_hashmap/phmap.h> |
21 | | #include <stddef.h> |
22 | | #include <sys/types.h> |
23 | | |
24 | | #include <utility> |
25 | | #include <vector> |
26 | | |
27 | | #include "common/status.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/column/column.h" |
30 | | #include "core/data_type/data_type.h" |
31 | | #include "exprs/aggregate/aggregate_function.h" |
32 | | #include "storage/iterator/vcollect_iterator.h" |
33 | | #include "storage/rowset/rowset_reader.h" |
34 | | #include "storage/tablet/tablet_reader.h" |
35 | | #include "storage/utils.h" |
36 | | |
37 | | namespace doris { |
38 | | class ColumnPredicate; |
39 | | class FunctionFilter; |
40 | | class RuntimeProfile; |
41 | | |
42 | | class BlockReader final : public TabletReader { |
43 | | public: |
44 | | ~BlockReader() override; |
45 | | |
46 | | // Initialize BlockReader with tablet, data version and fetch range. |
47 | | Status init(const ReaderParams& read_params) override; |
48 | | |
49 | | Status next_block_with_aggregation(Block* block, bool* eof) override; |
50 | | |
51 | 578 | std::vector<RowLocation> current_block_row_locations() { return _block_row_locations; } |
52 | | |
53 | 0 | void update_profile(RuntimeProfile* profile) override { |
54 | 0 | return _vcollect_iter.update_profile(profile); |
55 | 0 | } |
56 | | |
57 | | private: |
58 | | // Directly read row from rowset and pass to upper caller. No need to do aggregation. |
59 | | // This is usually used for DUPLICATE KEY tables |
60 | | Status _direct_next_block(Block* block, bool* eof); |
61 | | // Just same as _direct_next_block, but this is only for AGGREGATE KEY tables. |
62 | | // And this is an optimization for AGGR tables. |
63 | | // When there is only one rowset and is not overlapping, we can read it directly without aggregation. |
64 | | Status _direct_agg_key_next_block(Block* block, bool* eof); |
65 | | // For normal AGGREGATE KEY tables, read data by a merge heap. |
66 | | Status _agg_key_next_block(Block* block, bool* eof); |
67 | | // For UNIQUE KEY tables, read data by a merge heap. |
68 | | // The difference from _agg_key_next_block is that it will read the data from high version to low version, |
69 | | // to minimize the comparison time in merge heap. |
70 | | Status _unique_key_next_block(Block* block, bool* eof); |
71 | | |
72 | | Status _replace_key_next_block(Block* block, bool* eof); |
73 | | |
74 | | Status _init_collect_iter(const ReaderParams& read_params); |
75 | | |
76 | | Status _init_agg_state(const ReaderParams& read_params); |
77 | | |
78 | | Status _insert_data_normal(MutableColumns& columns); |
79 | | |
80 | | // for partial update table |
81 | | void _update_last_mutil_seq(int seq_idx); |
82 | | void _compare_sequence_map_and_replace(MutableColumns& columns); |
83 | | |
84 | | void _append_agg_data(MutableColumns& columns); |
85 | | |
86 | | void _update_agg_data(MutableColumns& columns); |
87 | | |
88 | | size_t _copy_agg_data(); |
89 | | |
90 | | void _update_agg_value(MutableColumns& columns, int begin, int end, bool is_close = true); |
91 | | |
92 | | // return false if keys of rowsets are mono ascending and disjoint |
93 | | bool _rowsets_not_mono_asc_disjoint(const ReaderParams& read_params); |
94 | | |
95 | | VCollectIterator _vcollect_iter; |
96 | | IteratorRowRef _next_row {{}, -1, false}; |
97 | | |
98 | | std::vector<AggregateFunctionPtr> _agg_functions; |
99 | | std::vector<AggregateDataPtr> _agg_places; |
100 | | |
101 | | std::vector<int> _normal_columns_idx; // key column on agg mode, all column on uniq mode |
102 | | std::vector<int> _agg_columns_idx; |
103 | | std::vector<int> _return_columns_loc; |
104 | | |
105 | | std::vector<int> _agg_data_counters; |
106 | | int _last_agg_data_counter = 0; |
107 | | |
108 | | MutableColumns _stored_data_columns; |
109 | | std::vector<IteratorRowRef> _stored_row_ref; |
110 | | |
111 | | std::vector<bool> _stored_has_null_tag; |
112 | | std::vector<bool> _stored_has_variable_length_tag; |
113 | | |
114 | | phmap::flat_hash_map<const Block*, std::vector<std::pair<int, int>>> _temp_ref_map; |
115 | | |
116 | | bool _eof = false; |
117 | | |
118 | | Status (BlockReader::*_next_block_func)(Block* block, bool* eof) = nullptr; |
119 | | |
120 | | std::vector<RowLocation> _block_row_locations; |
121 | | |
122 | | ColumnPtr _delete_filter_column; |
123 | | |
124 | | bool _is_rowsets_overlapping = true; |
125 | | |
126 | | bool _has_seq_map = false; |
127 | | // for check multi seq |
128 | | std::unordered_map<uint32_t, MutableColumnPtr> _seq_columns; |
129 | | // MutableColumns _seq_columns; |
130 | | // seq in return_columns, val pos in _normal_columns_idx |
131 | | std::unordered_map<uint32_t, std::vector<uint32_t>> _seq_map_in_origin_block; |
132 | | std::unordered_map<uint32_t, std::vector<uint32_t>> _seq_map_not_in_origin_block; |
133 | | |
134 | | Arena _arena; |
135 | | }; |
136 | | |
137 | | } // namespace doris |