/root/doris/be/src/olap/merger.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/merger.h" |
19 | | |
20 | | #include <gen_cpp/olap_file.pb.h> |
21 | | #include <gen_cpp/types.pb.h> |
22 | | #include <stddef.h> |
23 | | #include <unistd.h> |
24 | | |
25 | | #include <algorithm> |
26 | | #include <iterator> |
27 | | #include <memory> |
28 | | #include <mutex> |
29 | | #include <numeric> |
30 | | #include <ostream> |
31 | | #include <shared_mutex> |
32 | | #include <string> |
33 | | #include <utility> |
34 | | #include <vector> |
35 | | |
36 | | #include "cloud/config.h" |
37 | | #include "common/config.h" |
38 | | #include "common/logging.h" |
39 | | #include "common/status.h" |
40 | | #include "olap/base_tablet.h" |
41 | | #include "olap/iterators.h" |
42 | | #include "olap/olap_common.h" |
43 | | #include "olap/olap_define.h" |
44 | | #include "olap/rowid_conversion.h" |
45 | | #include "olap/rowset/rowset.h" |
46 | | #include "olap/rowset/rowset_meta.h" |
47 | | #include "olap/rowset/rowset_writer.h" |
48 | | #include "olap/rowset/segment_v2/segment_writer.h" |
49 | | #include "olap/storage_engine.h" |
50 | | #include "olap/tablet.h" |
51 | | #include "olap/tablet_fwd.h" |
52 | | #include "olap/tablet_meta.h" |
53 | | #include "olap/tablet_reader.h" |
54 | | #include "olap/utils.h" |
55 | | #include "util/slice.h" |
56 | | #include "vec/core/block.h" |
57 | | #include "vec/olap/block_reader.h" |
58 | | #include "vec/olap/vertical_block_reader.h" |
59 | | #include "vec/olap/vertical_merge_iterator.h" |
60 | | |
61 | | namespace doris { |
62 | | |
63 | | Status Merger::vmerge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type, |
64 | | const TabletSchema& cur_tablet_schema, |
65 | | const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, |
66 | 48 | RowsetWriter* dst_rowset_writer, Statistics* stats_output) { |
67 | 48 | vectorized::BlockReader reader; |
68 | 48 | TabletReader::ReaderParams reader_params; |
69 | 48 | reader_params.tablet = tablet; |
70 | 48 | reader_params.reader_type = reader_type; |
71 | | |
72 | 48 | TabletReader::ReadSource read_source; |
73 | 48 | read_source.rs_splits.reserve(src_rowset_readers.size()); |
74 | 144 | for (const RowsetReaderSharedPtr& rs_reader : src_rowset_readers) { Branch (74:49): [True: 144, False: 48]
|
75 | 144 | read_source.rs_splits.emplace_back(rs_reader); |
76 | 144 | } |
77 | 48 | read_source.fill_delete_predicates(); |
78 | 48 | reader_params.set_read_source(std::move(read_source)); |
79 | | |
80 | 48 | reader_params.version = dst_rowset_writer->version(); |
81 | | |
82 | 48 | TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>(); |
83 | 48 | merge_tablet_schema->copy_from(cur_tablet_schema); |
84 | | |
85 | | // Merge the columns in delete predicate that not in latest schema in to current tablet schema |
86 | 48 | for (auto& del_pred_rs : reader_params.delete_predicates) { Branch (86:28): [True: 24, False: 48]
|
87 | 24 | merge_tablet_schema->merge_dropped_columns(*del_pred_rs->tablet_schema()); |
88 | 24 | } |
89 | 48 | reader_params.tablet_schema = merge_tablet_schema; |
90 | 48 | if (!tablet->tablet_schema()->cluster_key_idxes().empty()) { Branch (90:9): [True: 0, False: 48]
|
91 | 0 | reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); |
92 | 0 | } |
93 | | |
94 | 48 | if (stats_output && stats_output->rowid_conversion) { Branch (94:9): [True: 48, False: 0]
Branch (94:25): [True: 48, False: 0]
|
95 | 48 | reader_params.record_rowids = true; |
96 | 48 | reader_params.rowid_conversion = stats_output->rowid_conversion; |
97 | 48 | stats_output->rowid_conversion->set_dst_rowset_id(dst_rowset_writer->rowset_id()); |
98 | 48 | } |
99 | | |
100 | 48 | reader_params.return_columns.resize(cur_tablet_schema.num_columns()); |
101 | 48 | std::iota(reader_params.return_columns.begin(), reader_params.return_columns.end(), 0); |
102 | 48 | reader_params.origin_return_columns = &reader_params.return_columns; |
103 | 48 | RETURN_IF_ERROR(reader.init(reader_params)); |
104 | | |
105 | 48 | vectorized::Block block = cur_tablet_schema.create_block(reader_params.return_columns); |
106 | 48 | size_t output_rows = 0; |
107 | 48 | bool eof = false; |
108 | 626 | while (!eof && !ExecEnv::GetInstance()->storage_engine().stopped()) { Branch (108:12): [True: 578, False: 48]
Branch (108:20): [True: 578, False: 0]
|
109 | 578 | auto tablet_state = tablet->tablet_state(); |
110 | 578 | if (tablet_state != TABLET_RUNNING && tablet_state != TABLET_NOTREADY) { Branch (110:13): [True: 0, False: 578]
Branch (110:47): [True: 0, False: 0]
|
111 | 0 | tablet->clear_cache(); |
112 | 0 | return Status::Error<INTERNAL_ERROR>("tablet {} is not used any more", |
113 | 0 | tablet->tablet_id()); |
114 | 0 | } |
115 | | |
116 | | // Read one block from block reader |
117 | 578 | RETURN_NOT_OK_STATUS_WITH_WARN(reader.next_block_with_aggregation(&block, &eof), |
118 | 578 | "failed to read next block when merging rowsets of tablet " + |
119 | 578 | std::to_string(tablet->tablet_id())); |
120 | 578 | RETURN_NOT_OK_STATUS_WITH_WARN(dst_rowset_writer->add_block(&block), |
121 | 578 | "failed to write block when merging rowsets of tablet " + |
122 | 578 | std::to_string(tablet->tablet_id())); |
123 | | |
124 | 578 | if (reader_params.record_rowids && block.rows() > 0) { Branch (124:13): [True: 578, False: 0]
Branch (124:44): [True: 578, False: 0]
|
125 | 578 | std::vector<uint32_t> segment_num_rows; |
126 | 578 | RETURN_IF_ERROR(dst_rowset_writer->get_segment_num_rows(&segment_num_rows)); |
127 | 578 | stats_output->rowid_conversion->add(reader.current_block_row_locations(), |
128 | 578 | segment_num_rows); |
129 | 578 | } |
130 | | |
131 | 578 | output_rows += block.rows(); |
132 | 578 | block.clear_column_data(); |
133 | 578 | } |
134 | 48 | if (ExecEnv::GetInstance()->storage_engine().stopped()) { Branch (134:9): [True: 0, False: 48]
|
135 | 0 | return Status::Error<INTERNAL_ERROR>("tablet {} failed to do compaction, engine stopped", |
136 | 0 | tablet->tablet_id()); |
137 | 0 | } |
138 | | |
139 | 48 | if (stats_output != nullptr) { Branch (139:9): [True: 48, False: 0]
|
140 | 48 | stats_output->output_rows = output_rows; |
141 | 48 | stats_output->merged_rows = reader.merged_rows(); |
142 | 48 | stats_output->filtered_rows = reader.filtered_rows(); |
143 | 48 | stats_output->bytes_read_from_local = reader.stats().file_cache_stats.bytes_read_from_local; |
144 | 48 | stats_output->bytes_read_from_remote = |
145 | 48 | reader.stats().file_cache_stats.bytes_read_from_remote; |
146 | 48 | stats_output->cached_bytes_total = reader.stats().file_cache_stats.bytes_write_into_cache; |
147 | 48 | if (config::is_cloud_mode()) { Branch (147:13): [True: 0, False: 48]
|
148 | 0 | stats_output->cloud_local_read_time = |
149 | 0 | reader.stats().file_cache_stats.local_io_timer / 1000; |
150 | 0 | stats_output->cloud_remote_read_time = |
151 | 0 | reader.stats().file_cache_stats.remote_io_timer / 1000; |
152 | 0 | } |
153 | 48 | } |
154 | | |
155 | 48 | RETURN_NOT_OK_STATUS_WITH_WARN(dst_rowset_writer->flush(), |
156 | 48 | "failed to flush rowset when merging rowsets of tablet " + |
157 | 48 | std::to_string(tablet->tablet_id())); |
158 | | |
159 | 48 | return Status::OK(); |
160 | 48 | } |
161 | | |
162 | | // split columns into several groups, make sure all keys in one group |
163 | | // unique_key should consider sequence&delete column |
164 | | void Merger::vertical_split_columns(const TabletSchema& tablet_schema, |
165 | | std::vector<std::vector<uint32_t>>* column_groups, |
166 | 94 | std::vector<uint32_t>* key_group_cluster_key_idxes) { |
167 | 94 | uint32_t num_key_cols = tablet_schema.num_key_columns(); |
168 | 94 | uint32_t total_cols = tablet_schema.num_columns(); |
169 | 94 | std::vector<uint32_t> key_columns; |
170 | 183 | for (auto i = 0; i < num_key_cols; ++i) { Branch (170:22): [True: 89, False: 94]
|
171 | 89 | key_columns.emplace_back(i); |
172 | 89 | } |
173 | | // in unique key, sequence & delete sign column should merge with key columns |
174 | 94 | int32_t sequence_col_idx = -1; |
175 | 94 | int32_t delete_sign_idx = -1; |
176 | | // in key column compaction, seq_col real index is _num_key_columns |
177 | | // and delete_sign column is _block->columns() - 1 |
178 | 94 | if (tablet_schema.keys_type() == KeysType::UNIQUE_KEYS) { Branch (178:9): [True: 49, False: 45]
|
179 | 49 | if (tablet_schema.has_sequence_col()) { Branch (179:13): [True: 4, False: 45]
|
180 | 4 | sequence_col_idx = tablet_schema.sequence_col_idx(); |
181 | 4 | key_columns.emplace_back(sequence_col_idx); |
182 | 4 | } |
183 | 49 | delete_sign_idx = tablet_schema.field_index(DELETE_SIGN); |
184 | 49 | if (delete_sign_idx != -1) { Branch (184:13): [True: 43, False: 6]
|
185 | 43 | key_columns.emplace_back(delete_sign_idx); |
186 | 43 | } |
187 | 49 | if (!tablet_schema.cluster_key_idxes().empty()) { Branch (187:13): [True: 0, False: 49]
|
188 | 0 | for (const auto& cid : tablet_schema.cluster_key_idxes()) { Branch (188:34): [True: 0, False: 0]
|
189 | 0 | auto idx = tablet_schema.field_index(cid); |
190 | 0 | DCHECK(idx >= 0) << "could not find cluster key column with unique_id=" << cid |
191 | 0 | << " in tablet schema, table_id=" << tablet_schema.table_id(); |
192 | 0 | if (idx >= num_key_cols) { Branch (192:21): [True: 0, False: 0]
|
193 | 0 | key_columns.emplace_back(idx); |
194 | 0 | } |
195 | 0 | } |
196 | | // tablet schema unique ids: [1, 2, 5, 3, 6, 4], [1 2] is key columns |
197 | | // cluster key unique ids: [3, 1, 4] |
198 | | // the key_columns should be [0, 1, 3, 5] |
199 | | // the key_group_cluster_key_idxes should be [2, 1, 3] |
200 | 0 | for (const auto& cid : tablet_schema.cluster_key_idxes()) { Branch (200:34): [True: 0, False: 0]
|
201 | 0 | auto idx = tablet_schema.field_index(cid); |
202 | 0 | for (auto i = 0; i < key_columns.size(); ++i) { Branch (202:34): [True: 0, False: 0]
|
203 | 0 | if (idx == key_columns[i]) { Branch (203:25): [True: 0, False: 0]
|
204 | 0 | key_group_cluster_key_idxes->emplace_back(i); |
205 | 0 | break; |
206 | 0 | } |
207 | 0 | } |
208 | 0 | } |
209 | 0 | } |
210 | 49 | } |
211 | 94 | VLOG_NOTICE << "sequence_col_idx=" << sequence_col_idx Line | Count | Source | 42 | 0 | #define VLOG_NOTICE VLOG(3) |
|
212 | 0 | << ", delete_sign_idx=" << delete_sign_idx; |
213 | | // for duplicate no keys |
214 | 94 | if (!key_columns.empty()) { Branch (214:9): [True: 78, False: 16]
|
215 | 78 | column_groups->emplace_back(key_columns); |
216 | 78 | } |
217 | | |
218 | 94 | std::vector<uint32_t> value_columns; |
219 | | |
220 | 992 | for (uint32_t i = num_key_cols; i < total_cols; ++i) { Branch (220:37): [True: 898, False: 94]
|
221 | 898 | if (i == sequence_col_idx || i == delete_sign_idx || Branch (221:13): [True: 4, False: 894]
Branch (221:13): [True: 47, False: 851]
Branch (221:38): [True: 43, False: 851]
|
222 | 898 | key_columns.end() != std::find(key_columns.begin(), key_columns.end(), i)) { Branch (222:13): [True: 0, False: 851]
|
223 | 47 | continue; |
224 | 47 | } |
225 | | |
226 | 851 | if (!value_columns.empty() && Branch (226:13): [True: 757, False: 94]
|
227 | 851 | value_columns.size() % config::vertical_compaction_num_columns_per_group == 0) { Branch (227:13): [True: 141, False: 616]
|
228 | 141 | column_groups->push_back(value_columns); |
229 | 141 | value_columns.clear(); |
230 | 141 | } |
231 | 851 | value_columns.push_back(i); |
232 | 851 | } |
233 | | |
234 | 94 | if (!value_columns.empty()) { Branch (234:9): [True: 94, False: 0]
|
235 | 94 | column_groups->push_back(value_columns); |
236 | 94 | } |
237 | 94 | } |
238 | | |
239 | | Status Merger::vertical_compact_one_group( |
240 | | BaseTabletSPtr tablet, ReaderType reader_type, const TabletSchema& tablet_schema, |
241 | | bool is_key, const std::vector<uint32_t>& column_group, |
242 | | vectorized::RowSourcesBuffer* row_source_buf, |
243 | | const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, |
244 | | RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, Statistics* stats_output, |
245 | | std::vector<uint32_t> key_group_cluster_key_idxes, int64_t batch_size, |
246 | 291 | CompactionSampleInfo* sample_info) { |
247 | | // build tablet reader |
248 | 291 | VLOG_NOTICE << "vertical compact one group, max_rows_per_segment=" << max_rows_per_segment; Line | Count | Source | 42 | 0 | #define VLOG_NOTICE VLOG(3) |
|
249 | 291 | vectorized::VerticalBlockReader reader(row_source_buf); |
250 | 291 | TabletReader::ReaderParams reader_params; |
251 | 291 | reader_params.is_key_column_group = is_key; |
252 | 291 | reader_params.key_group_cluster_key_idxes = key_group_cluster_key_idxes; |
253 | 291 | reader_params.tablet = tablet; |
254 | 291 | reader_params.reader_type = reader_type; |
255 | | |
256 | 291 | TabletReader::ReadSource read_source; |
257 | 291 | read_source.rs_splits.reserve(src_rowset_readers.size()); |
258 | 948 | for (const RowsetReaderSharedPtr& rs_reader : src_rowset_readers) { Branch (258:49): [True: 948, False: 291]
|
259 | 948 | read_source.rs_splits.emplace_back(rs_reader); |
260 | 948 | } |
261 | 291 | read_source.fill_delete_predicates(); |
262 | 291 | reader_params.set_read_source(std::move(read_source)); |
263 | | |
264 | 291 | reader_params.version = dst_rowset_writer->version(); |
265 | | |
266 | 291 | TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>(); |
267 | 291 | merge_tablet_schema->copy_from(tablet_schema); |
268 | | |
269 | 291 | for (auto& del_pred_rs : reader_params.delete_predicates) { Branch (269:28): [True: 125, False: 291]
|
270 | 125 | merge_tablet_schema->merge_dropped_columns(*del_pred_rs->tablet_schema()); |
271 | 125 | } |
272 | | |
273 | 291 | reader_params.tablet_schema = merge_tablet_schema; |
274 | 291 | if (!tablet->tablet_schema()->cluster_key_idxes().empty()) { Branch (274:9): [True: 0, False: 291]
|
275 | 0 | reader_params.delete_bitmap = &tablet->tablet_meta()->delete_bitmap(); |
276 | 0 | } |
277 | | |
278 | 291 | if (is_key && stats_output && stats_output->rowid_conversion) { Branch (278:9): [True: 83, False: 208]
Branch (278:19): [True: 83, False: 0]
Branch (278:35): [True: 83, False: 0]
|
279 | 83 | reader_params.record_rowids = true; |
280 | 83 | reader_params.rowid_conversion = stats_output->rowid_conversion; |
281 | 83 | stats_output->rowid_conversion->set_dst_rowset_id(dst_rowset_writer->rowset_id()); |
282 | 83 | } |
283 | | |
284 | 291 | reader_params.return_columns = column_group; |
285 | 291 | reader_params.origin_return_columns = &reader_params.return_columns; |
286 | 291 | reader_params.batch_size = batch_size; |
287 | 291 | RETURN_IF_ERROR(reader.init(reader_params, sample_info)); |
288 | | |
289 | 290 | vectorized::Block block = tablet_schema.create_block(reader_params.return_columns); |
290 | 290 | size_t output_rows = 0; |
291 | 290 | bool eof = false; |
292 | 8.08k | while (!eof && !ExecEnv::GetInstance()->storage_engine().stopped()) { Branch (292:12): [True: 7.79k, False: 290]
Branch (292:20): [True: 7.79k, False: 0]
|
293 | 7.79k | auto tablet_state = tablet->tablet_state(); |
294 | 7.79k | if (tablet_state != TABLET_RUNNING && tablet_state != TABLET_NOTREADY) { Branch (294:13): [True: 307, False: 7.49k]
Branch (294:47): [True: 0, False: 307]
|
295 | 0 | tablet->clear_cache(); |
296 | 0 | return Status::Error<INTERNAL_ERROR>("tablet {} is not used any more", |
297 | 0 | tablet->tablet_id()); |
298 | 0 | } |
299 | | // Read one block from block reader |
300 | 7.79k | RETURN_NOT_OK_STATUS_WITH_WARN(reader.next_block_with_aggregation(&block, &eof), |
301 | 7.79k | "failed to read next block when merging rowsets of tablet " + |
302 | 7.79k | std::to_string(tablet->tablet_id())); |
303 | 7.79k | RETURN_NOT_OK_STATUS_WITH_WARN( |
304 | 7.79k | dst_rowset_writer->add_columns(&block, column_group, is_key, max_rows_per_segment), |
305 | 7.79k | "failed to write block when merging rowsets of tablet " + |
306 | 7.79k | std::to_string(tablet->tablet_id())); |
307 | | |
308 | 7.79k | if (is_key && reader_params.record_rowids && block.rows() > 0) { Branch (308:13): [True: 4.63k, False: 3.16k]
Branch (308:23): [True: 4.63k, False: 0]
Branch (308:54): [True: 4.62k, False: 6]
|
309 | 4.62k | std::vector<uint32_t> segment_num_rows; |
310 | 4.62k | RETURN_IF_ERROR(dst_rowset_writer->get_segment_num_rows(&segment_num_rows)); |
311 | 4.62k | stats_output->rowid_conversion->add(reader.current_block_row_locations(), |
312 | 4.62k | segment_num_rows); |
313 | 4.62k | } |
314 | 7.79k | output_rows += block.rows(); |
315 | 7.79k | block.clear_column_data(); |
316 | 7.79k | } |
317 | 290 | if (ExecEnv::GetInstance()->storage_engine().stopped()) { Branch (317:9): [True: 0, False: 290]
|
318 | 0 | return Status::Error<INTERNAL_ERROR>("tablet {} failed to do compaction, engine stopped", |
319 | 0 | tablet->tablet_id()); |
320 | 0 | } |
321 | | |
322 | 290 | if (is_key && stats_output != nullptr) { Branch (322:9): [True: 82, False: 208]
Branch (322:19): [True: 82, False: 0]
|
323 | 82 | stats_output->output_rows = output_rows; |
324 | 82 | stats_output->merged_rows = reader.merged_rows(); |
325 | 82 | stats_output->filtered_rows = reader.filtered_rows(); |
326 | 82 | stats_output->bytes_read_from_local = reader.stats().file_cache_stats.bytes_read_from_local; |
327 | 82 | stats_output->bytes_read_from_remote = |
328 | 82 | reader.stats().file_cache_stats.bytes_read_from_remote; |
329 | 82 | stats_output->cached_bytes_total = reader.stats().file_cache_stats.bytes_write_into_cache; |
330 | 82 | if (config::is_cloud_mode()) { Branch (330:13): [True: 0, False: 82]
|
331 | 0 | stats_output->cloud_local_read_time = |
332 | 0 | reader.stats().file_cache_stats.local_io_timer / 1000; |
333 | 0 | stats_output->cloud_remote_read_time = |
334 | 0 | reader.stats().file_cache_stats.remote_io_timer / 1000; |
335 | 0 | } |
336 | 82 | } |
337 | 290 | RETURN_IF_ERROR(dst_rowset_writer->flush_columns(is_key)); |
338 | | |
339 | 290 | return Status::OK(); |
340 | 290 | } |
341 | | |
342 | | // for segcompaction |
343 | | Status Merger::vertical_compact_one_group( |
344 | | int64_t tablet_id, ReaderType reader_type, const TabletSchema& tablet_schema, bool is_key, |
345 | | const std::vector<uint32_t>& column_group, vectorized::RowSourcesBuffer* row_source_buf, |
346 | | vectorized::VerticalBlockReader& src_block_reader, |
347 | | segment_v2::SegmentWriter& dst_segment_writer, Statistics* stats_output, |
348 | 22 | uint64_t* index_size, KeyBoundsPB& key_bounds, SimpleRowIdConversion* rowid_conversion) { |
349 | | // TODO: record_rowids |
350 | 22 | vectorized::Block block = tablet_schema.create_block(column_group); |
351 | 22 | size_t output_rows = 0; |
352 | 22 | bool eof = false; |
353 | 138 | while (!eof && !ExecEnv::GetInstance()->storage_engine().stopped()) { Branch (353:12): [True: 116, False: 22]
Branch (353:20): [True: 116, False: 0]
|
354 | | // Read one block from block reader |
355 | 116 | RETURN_NOT_OK_STATUS_WITH_WARN(src_block_reader.next_block_with_aggregation(&block, &eof), |
356 | 116 | "failed to read next block when merging rowsets of tablet " + |
357 | 116 | std::to_string(tablet_id)); |
358 | 116 | if (!block.rows()) { Branch (358:13): [True: 0, False: 116]
|
359 | 0 | break; |
360 | 0 | } |
361 | 116 | RETURN_NOT_OK_STATUS_WITH_WARN(dst_segment_writer.append_block(&block, 0, block.rows()), |
362 | 116 | "failed to write block when merging rowsets of tablet " + |
363 | 116 | std::to_string(tablet_id)); |
364 | | |
365 | 116 | if (is_key && rowid_conversion != nullptr) { Branch (365:13): [True: 58, False: 58]
Branch (365:23): [True: 30, False: 28]
|
366 | 30 | rowid_conversion->add(src_block_reader.current_block_row_locations()); |
367 | 30 | } |
368 | 116 | output_rows += block.rows(); |
369 | 116 | block.clear_column_data(); |
370 | 116 | } |
371 | 22 | if (ExecEnv::GetInstance()->storage_engine().stopped()) { Branch (371:9): [True: 0, False: 22]
|
372 | 0 | return Status::Error<INTERNAL_ERROR>("tablet {} failed to do compaction, engine stopped", |
373 | 0 | tablet_id); |
374 | 0 | } |
375 | | |
376 | 22 | if (is_key && stats_output != nullptr) { Branch (376:9): [True: 11, False: 11]
Branch (376:19): [True: 11, False: 0]
|
377 | 11 | stats_output->output_rows = output_rows; |
378 | 11 | stats_output->merged_rows = src_block_reader.merged_rows(); |
379 | 11 | stats_output->filtered_rows = src_block_reader.filtered_rows(); |
380 | 11 | stats_output->bytes_read_from_local = |
381 | 11 | src_block_reader.stats().file_cache_stats.bytes_read_from_local; |
382 | 11 | stats_output->bytes_read_from_remote = |
383 | 11 | src_block_reader.stats().file_cache_stats.bytes_read_from_remote; |
384 | 11 | stats_output->cached_bytes_total = |
385 | 11 | src_block_reader.stats().file_cache_stats.bytes_write_into_cache; |
386 | 11 | } |
387 | | |
388 | | // segcompaction produce only one segment at once |
389 | 22 | RETURN_IF_ERROR(dst_segment_writer.finalize_columns_data()); |
390 | 22 | RETURN_IF_ERROR(dst_segment_writer.finalize_columns_index(index_size)); |
391 | | |
392 | 22 | if (is_key) { Branch (392:9): [True: 11, False: 11]
|
393 | 11 | Slice min_key = dst_segment_writer.min_encoded_key(); |
394 | 11 | Slice max_key = dst_segment_writer.max_encoded_key(); |
395 | 11 | DCHECK_LE(min_key.compare(max_key), 0); |
396 | 11 | key_bounds.set_min_key(min_key.to_string()); |
397 | 11 | key_bounds.set_max_key(max_key.to_string()); |
398 | 11 | } |
399 | | |
400 | 22 | return Status::OK(); |
401 | 22 | } |
402 | | |
403 | 97 | int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_cnt) { |
404 | 97 | std::unique_lock<std::mutex> lock(tablet->sample_info_lock); |
405 | 97 | CompactionSampleInfo info = tablet->sample_infos[group_index]; |
406 | 97 | if (way_cnt <= 0) { Branch (406:9): [True: 0, False: 97]
|
407 | 0 | LOG(INFO) << "estimate batch size for vertical compaction, tablet id: " |
408 | 0 | << tablet->tablet_id() << " way cnt: " << way_cnt; |
409 | 0 | return 4096 - 32; |
410 | 0 | } |
411 | 97 | int64_t block_mem_limit = config::compaction_memory_bytes_limit / way_cnt; |
412 | 97 | if (tablet->last_compaction_status.is<ErrorCode::MEM_LIMIT_EXCEEDED>()) { Branch (412:9): [True: 0, False: 97]
|
413 | 0 | block_mem_limit /= 4; |
414 | 0 | } |
415 | | |
416 | 97 | int64_t group_data_size = 0; |
417 | 97 | if (info.group_data_size > 0 && info.bytes > 0 && info.rows > 0) { Branch (417:9): [True: 0, False: 97]
Branch (417:37): [True: 0, False: 0]
Branch (417:55): [True: 0, False: 0]
|
418 | 0 | float smoothing_factor = 0.5; |
419 | 0 | group_data_size = int64_t(info.group_data_size * (1 - smoothing_factor) + |
420 | 0 | info.bytes / info.rows * smoothing_factor); |
421 | 0 | tablet->sample_infos[group_index].group_data_size = group_data_size; |
422 | 97 | } else if (info.group_data_size > 0 && (info.bytes <= 0 || info.rows <= 0)) { Branch (422:16): [True: 0, False: 97]
Branch (422:45): [True: 0, False: 0]
Branch (422:64): [True: 0, False: 0]
|
423 | 0 | group_data_size = info.group_data_size; |
424 | 97 | } else if (info.group_data_size <= 0 && info.bytes > 0 && info.rows > 0) { Branch (424:16): [True: 97, False: 0]
Branch (424:45): [True: 0, False: 97]
Branch (424:63): [True: 0, False: 0]
|
425 | 0 | group_data_size = info.bytes / info.rows; |
426 | 0 | tablet->sample_infos[group_index].group_data_size = group_data_size; |
427 | 97 | } else { |
428 | 97 | LOG(INFO) << "estimate batch size for vertical compaction, tablet id: " |
429 | 97 | << tablet->tablet_id() << " group data size: " << info.group_data_size |
430 | 97 | << " row num: " << info.rows << " consume bytes: " << info.bytes; |
431 | 97 | return 1024 - 32; |
432 | 97 | } |
433 | | |
434 | 0 | if (group_data_size <= 0) { Branch (434:9): [True: 0, False: 0]
|
435 | 0 | LOG(WARNING) << "estimate batch size for vertical compaction, tablet id: " |
436 | 0 | << tablet->tablet_id() << " unexpected group data size: " << group_data_size; |
437 | 0 | return 4096 - 32; |
438 | 0 | } |
439 | | |
440 | 0 | tablet->sample_infos[group_index].bytes = 0; |
441 | 0 | tablet->sample_infos[group_index].rows = 0; |
442 | |
|
443 | 0 | int64_t batch_size = block_mem_limit / group_data_size; |
444 | 0 | int64_t res = std::max(std::min(batch_size, int64_t(4096 - 32)), int64_t(32L)); |
445 | 0 | LOG(INFO) << "estimate batch size for vertical compaction, tablet id: " << tablet->tablet_id() |
446 | 0 | << " group data size: " << info.group_data_size << " row num: " << info.rows |
447 | 0 | << " consume bytes: " << info.bytes << " way cnt: " << way_cnt |
448 | 0 | << " batch size: " << res; |
449 | 0 | return res; |
450 | 0 | } |
451 | | |
452 | | // steps to do vertical merge: |
453 | | // 1. split columns into column groups |
454 | | // 2. compact groups one by one, generate a row_source_buf when compact key group |
455 | | // and use this row_source_buf to compact value column groups |
456 | | // 3. build output rowset |
457 | | Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type, |
458 | | const TabletSchema& tablet_schema, |
459 | | const std::vector<RowsetReaderSharedPtr>& src_rowset_readers, |
460 | | RowsetWriter* dst_rowset_writer, int64_t max_rows_per_segment, |
461 | 83 | int64_t merge_way_num, Statistics* stats_output) { |
462 | 83 | LOG(INFO) << "Start to do vertical compaction, tablet_id: " << tablet->tablet_id(); |
463 | 83 | std::vector<std::vector<uint32_t>> column_groups; |
464 | 83 | std::vector<uint32_t> key_group_cluster_key_idxes; |
465 | 83 | vertical_split_columns(tablet_schema, &column_groups, &key_group_cluster_key_idxes); |
466 | | |
467 | 83 | vectorized::RowSourcesBuffer row_sources_buf( |
468 | 83 | tablet->tablet_id(), dst_rowset_writer->context().tablet_path, reader_type); |
469 | 83 | { |
470 | 83 | std::unique_lock<std::mutex> lock(tablet->sample_info_lock); |
471 | 83 | tablet->sample_infos.resize(column_groups.size(), {0, 0, 0}); |
472 | 83 | } |
473 | | // compact group one by one |
474 | 373 | for (auto i = 0; i < column_groups.size(); ++i) { Branch (474:22): [True: 291, False: 82]
|
475 | 291 | VLOG_NOTICE << "row source size: " << row_sources_buf.total_size(); Line | Count | Source | 42 | 0 | #define VLOG_NOTICE VLOG(3) |
|
476 | 291 | bool is_key = (i == 0); |
477 | 291 | int64_t batch_size = config::compaction_batch_size != -1 Branch (477:30): [True: 194, False: 97]
|
478 | 291 | ? config::compaction_batch_size |
479 | 291 | : estimate_batch_size(i, tablet, merge_way_num); |
480 | 291 | CompactionSampleInfo sample_info; |
481 | 291 | Status st = vertical_compact_one_group( |
482 | 291 | tablet, reader_type, tablet_schema, is_key, column_groups[i], &row_sources_buf, |
483 | 291 | src_rowset_readers, dst_rowset_writer, max_rows_per_segment, stats_output, |
484 | 291 | key_group_cluster_key_idxes, batch_size, &sample_info); |
485 | 291 | { |
486 | 291 | std::unique_lock<std::mutex> lock(tablet->sample_info_lock); |
487 | 291 | tablet->sample_infos[i] = sample_info; |
488 | 291 | } |
489 | 291 | RETURN_IF_ERROR(st); |
490 | 290 | if (is_key) { Branch (490:13): [True: 82, False: 208]
|
491 | 82 | RETURN_IF_ERROR(row_sources_buf.flush()); |
492 | 82 | } |
493 | 290 | RETURN_IF_ERROR(row_sources_buf.seek_to_begin()); |
494 | 290 | } |
495 | | |
496 | | // finish compact, build output rowset |
497 | 82 | VLOG_NOTICE << "finish compact groups"; Line | Count | Source | 42 | 0 | #define VLOG_NOTICE VLOG(3) |
|
498 | 82 | RETURN_IF_ERROR(dst_rowset_writer->final_flush()); |
499 | | |
500 | 82 | return Status::OK(); |
501 | 82 | } |
502 | | |
503 | | } // namespace doris |