Coverage Report

Created: 2026-01-27 18:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/merger.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/merger.h"
19
20
#include <gen_cpp/olap_file.pb.h>
21
#include <gen_cpp/types.pb.h>
22
#include <stddef.h>
23
#include <unistd.h>
24
25
#include <algorithm>
26
#include <iterator>
27
#include <memory>
28
#include <mutex>
29
#include <numeric>
30
#include <ostream>
31
#include <shared_mutex>
32
#include <string>
33
#include <utility>
34
#include <vector>
35
36
#include "cloud/config.h"
37
#include "common/config.h"
38
#include "common/logging.h"
39
#include "common/status.h"
40
#include "olap/base_tablet.h"
41
#include "olap/iterators.h"
42
#include "olap/olap_common.h"
43
#include "olap/olap_define.h"
44
#include "olap/rowid_conversion.h"
45
#include "olap/rowset/rowset.h"
46
#include "olap/rowset/rowset_meta.h"
47
#include "olap/rowset/rowset_writer.h"
48
#include "olap/rowset/segment_v2/segment_writer.h"
49
#include "olap/storage_engine.h"
50
#include "olap/tablet.h"
51
#include "olap/tablet_fwd.h"
52
#include "olap/tablet_meta.h"
53
#include "olap/tablet_reader.h"
54
#include "olap/utils.h"
55
#include "util/slice.h"
56
#include "vec/core/block.h"
57
#include "vec/olap/block_reader.h"
58
#include "vec/olap/vertical_block_reader.h"
59
#include "vec/olap/vertical_merge_iterator.h"
60
61
namespace doris {
62
#include "common/compile_check_begin.h"
63
Status Merger::vmerge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type,
64
                              const TabletSchema& cur_tablet_schema,
65
                              const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
66
48
                              RowsetWriter* dst_rowset_writer, Statistics* stats_output) {
67
48
    if (!cur_tablet_schema.cluster_key_uids().empty()) {
68
0
        return Status::InternalError(
69
0
                "mow table with cluster keys does not support non vertical compaction");
70
0
    }
71
48
    vectorized::BlockReader reader;
72
48
    TabletReader::ReaderParams reader_params;
73
48
    reader_params.tablet = tablet;
74
48
    reader_params.reader_type = reader_type;
75
76
48
    TabletReadSource read_source;
77
48
    read_source.rs_splits.reserve(src_rowset_readers.size());
78
144
    for (const RowsetReaderSharedPtr& rs_reader : src_rowset_readers) {
79
144
        read_source.rs_splits.emplace_back(rs_reader);
80
144
    }
81
48
    read_source.fill_delete_predicates();
82
48
    reader_params.set_read_source(std::move(read_source));
83
84
48
    reader_params.version = dst_rowset_writer->version();
85
86
48
    TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>();
87
48
    merge_tablet_schema->copy_from(cur_tablet_schema);
88
89
    // Merge the columns in delete predicate that not in latest schema in to current tablet schema
90
48
    for (auto& del_pred_rs : reader_params.delete_predicates) {
91
24
        merge_tablet_schema->merge_dropped_columns(*del_pred_rs->tablet_schema());
92
24
    }
93
48
    reader_params.tablet_schema = merge_tablet_schema;
94
48
    if (!tablet->tablet_schema()->cluster_key_uids().empty()) {
95
0
        reader_params.delete_bitmap = tablet->tablet_meta()->delete_bitmap_ptr();
96
0
    }
97
98
48
    if (stats_output && stats_output->rowid_conversion) {
99
48
        reader_params.record_rowids = true;
100
48
        reader_params.rowid_conversion = stats_output->rowid_conversion;
101
48
        stats_output->rowid_conversion->set_dst_rowset_id(dst_rowset_writer->rowset_id());
102
48
    }
103
104
48
    reader_params.return_columns.resize(cur_tablet_schema.num_columns());
105
48
    std::iota(reader_params.return_columns.begin(), reader_params.return_columns.end(), 0);
106
48
    reader_params.origin_return_columns = &reader_params.return_columns;
107
48
    RETURN_IF_ERROR(reader.init(reader_params));
108
109
48
    vectorized::Block block = cur_tablet_schema.create_block(reader_params.return_columns);
110
48
    size_t output_rows = 0;
111
48
    bool eof = false;
112
626
    while (!eof && !ExecEnv::GetInstance()->storage_engine().stopped()) {
113
578
        auto tablet_state = tablet->tablet_state();
114
578
        if (tablet_state != TABLET_RUNNING && tablet_state != TABLET_NOTREADY) {
115
0
            tablet->clear_cache();
116
0
            return Status::Error<INTERNAL_ERROR>("tablet {} is not used any more",
117
0
                                                 tablet->tablet_id());
118
0
        }
119
120
        // Read one block from block reader
121
578
        RETURN_NOT_OK_STATUS_WITH_WARN(reader.next_block_with_aggregation(&block, &eof),
122
578
                                       "failed to read next block when merging rowsets of tablet " +
123
578
                                               std::to_string(tablet->tablet_id()));
124
578
        RETURN_NOT_OK_STATUS_WITH_WARN(dst_rowset_writer->add_block(&block),
125
578
                                       "failed to write block when merging rowsets of tablet " +
126
578
                                               std::to_string(tablet->tablet_id()));
127
128
578
        if (reader_params.record_rowids && block.rows() > 0) {
129
578
            std::vector<uint32_t> segment_num_rows;
130
578
            RETURN_IF_ERROR(dst_rowset_writer->get_segment_num_rows(&segment_num_rows));
131
578
            stats_output->rowid_conversion->add(reader.current_block_row_locations(),
132
578
                                                segment_num_rows);
133
578
        }
134
135
578
        output_rows += block.rows();
136
578
        block.clear_column_data();
137
578
    }
138
48
    if (ExecEnv::GetInstance()->storage_engine().stopped()) {
139
0
        return Status::Error<INTERNAL_ERROR>("tablet {} failed to do compaction, engine stopped",
140
0
                                             tablet->tablet_id());
141
0
    }
142
143
48
    if (stats_output != nullptr) {
144
48
        stats_output->output_rows = output_rows;
145
48
        stats_output->merged_rows = reader.merged_rows();
146
48
        stats_output->filtered_rows = reader.filtered_rows();
147
48
        stats_output->bytes_read_from_local = reader.stats().file_cache_stats.bytes_read_from_local;
148
48
        stats_output->bytes_read_from_remote =
149
48
                reader.stats().file_cache_stats.bytes_read_from_remote;
150
48
        stats_output->cached_bytes_total = reader.stats().file_cache_stats.bytes_write_into_cache;
151
48
        if (config::is_cloud_mode()) {
152
0
            stats_output->cloud_local_read_time =
153
0
                    reader.stats().file_cache_stats.local_io_timer / 1000;
154
0
            stats_output->cloud_remote_read_time =
155
0
                    reader.stats().file_cache_stats.remote_io_timer / 1000;
156
0
        }
157
48
    }
158
159
48
    RETURN_NOT_OK_STATUS_WITH_WARN(dst_rowset_writer->flush(),
160
48
                                   "failed to flush rowset when merging rowsets of tablet " +
161
48
                                           std::to_string(tablet->tablet_id()));
162
163
48
    return Status::OK();
164
48
}
165
166
// split columns into several groups, make sure all keys in one group
167
// unique_key should consider sequence&delete column
168
void Merger::vertical_split_columns(const TabletSchema& tablet_schema,
169
                                    std::vector<std::vector<uint32_t>>* column_groups,
170
95
                                    std::vector<uint32_t>* key_group_cluster_key_idxes) {
171
95
    size_t num_key_cols = tablet_schema.num_key_columns();
172
95
    size_t total_cols = tablet_schema.num_columns();
173
95
    std::vector<uint32_t> key_columns;
174
184
    for (auto i = 0; i < num_key_cols; ++i) {
175
89
        key_columns.emplace_back(i);
176
89
    }
177
    // in unique key, sequence & delete sign column should merge with key columns
178
95
    int32_t sequence_col_idx = -1;
179
95
    int32_t delete_sign_idx = -1;
180
    // in key column compaction, seq_col real index is _num_key_columns
181
    // and delete_sign column is _block->columns() - 1
182
95
    if (tablet_schema.keys_type() == KeysType::UNIQUE_KEYS) {
183
50
        if (tablet_schema.has_sequence_col()) {
184
4
            sequence_col_idx = tablet_schema.sequence_col_idx();
185
4
            key_columns.emplace_back(sequence_col_idx);
186
4
        }
187
50
        delete_sign_idx = tablet_schema.field_index(DELETE_SIGN);
188
50
        if (delete_sign_idx != -1) {
189
44
            key_columns.emplace_back(delete_sign_idx);
190
44
        }
191
50
        if (!tablet_schema.cluster_key_uids().empty()) {
192
0
            for (const auto& cid : tablet_schema.cluster_key_uids()) {
193
0
                auto idx = tablet_schema.field_index(cid);
194
0
                DCHECK(idx >= 0) << "could not find cluster key column with unique_id=" << cid
195
0
                                 << " in tablet schema, table_id=" << tablet_schema.table_id();
196
0
                if (idx >= num_key_cols) {
197
0
                    key_columns.emplace_back(idx);
198
0
                }
199
0
            }
200
            // tablet schema unique ids: [1, 2, 5, 3, 6, 4], [1 2] is key columns
201
            // cluster key unique ids: [3, 1, 4]
202
            // the key_columns should be [0, 1, 3, 5]
203
            // the key_group_cluster_key_idxes should be [2, 1, 3]
204
0
            for (const auto& cid : tablet_schema.cluster_key_uids()) {
205
0
                auto idx = tablet_schema.field_index(cid);
206
0
                for (auto i = 0; i < key_columns.size(); ++i) {
207
0
                    if (idx == key_columns[i]) {
208
0
                        key_group_cluster_key_idxes->emplace_back(i);
209
0
                        break;
210
0
                    }
211
0
                }
212
0
            }
213
0
        }
214
50
    }
215
95
    VLOG_NOTICE << "sequence_col_idx=" << sequence_col_idx
216
60
                << ", delete_sign_idx=" << delete_sign_idx;
217
    // for duplicate no keys
218
95
    if (!key_columns.empty()) {
219
78
        column_groups->emplace_back(key_columns);
220
78
    }
221
222
95
    std::vector<uint32_t> value_columns;
223
224
991
    for (size_t i = num_key_cols; i < total_cols; ++i) {
225
896
        if (i == sequence_col_idx || i == delete_sign_idx ||
226
896
            key_columns.end() != std::find(key_columns.begin(), key_columns.end(), i)) {
227
48
            continue;
228
48
        }
229
230
848
        if (!value_columns.empty() &&
231
848
            value_columns.size() % config::vertical_compaction_num_columns_per_group == 0) {
232
140
            column_groups->push_back(value_columns);
233
140
            value_columns.clear();
234
140
        }
235
848
        value_columns.push_back(cast_set<uint32_t>(i));
236
848
    }
237
238
95
    if (!value_columns.empty()) {
239
95
        column_groups->push_back(value_columns);
240
95
    }
241
95
}
242
243
Status Merger::vertical_compact_one_group(
244
        BaseTabletSPtr tablet, ReaderType reader_type, const TabletSchema& tablet_schema,
245
        bool is_key, const std::vector<uint32_t>& column_group,
246
        vectorized::RowSourcesBuffer* row_source_buf,
247
        const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
248
        RowsetWriter* dst_rowset_writer, uint32_t max_rows_per_segment, Statistics* stats_output,
249
        std::vector<uint32_t> key_group_cluster_key_idxes, int64_t batch_size,
250
291
        CompactionSampleInfo* sample_info, bool enable_sparse_optimization) {
251
    // build tablet reader
252
291
    VLOG_NOTICE << "vertical compact one group, max_rows_per_segment=" << max_rows_per_segment;
253
291
    vectorized::VerticalBlockReader reader(row_source_buf);
254
291
    TabletReader::ReaderParams reader_params;
255
291
    reader_params.is_key_column_group = is_key;
256
291
    reader_params.key_group_cluster_key_idxes = key_group_cluster_key_idxes;
257
291
    reader_params.tablet = tablet;
258
291
    reader_params.reader_type = reader_type;
259
291
    reader_params.enable_sparse_optimization = enable_sparse_optimization;
260
261
291
    TabletReadSource read_source;
262
291
    read_source.rs_splits.reserve(src_rowset_readers.size());
263
939
    for (const RowsetReaderSharedPtr& rs_reader : src_rowset_readers) {
264
939
        read_source.rs_splits.emplace_back(rs_reader);
265
939
    }
266
291
    read_source.fill_delete_predicates();
267
291
    reader_params.set_read_source(std::move(read_source));
268
269
291
    reader_params.version = dst_rowset_writer->version();
270
271
291
    TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>();
272
291
    merge_tablet_schema->copy_from(tablet_schema);
273
274
291
    for (auto& del_pred_rs : reader_params.delete_predicates) {
275
125
        merge_tablet_schema->merge_dropped_columns(*del_pred_rs->tablet_schema());
276
125
    }
277
278
291
    reader_params.tablet_schema = merge_tablet_schema;
279
291
    bool has_cluster_key = false;
280
291
    if (!tablet->tablet_schema()->cluster_key_uids().empty()) {
281
0
        reader_params.delete_bitmap = tablet->tablet_meta()->delete_bitmap_ptr();
282
0
        has_cluster_key = true;
283
0
    }
284
285
291
    if (is_key && stats_output && stats_output->rowid_conversion) {
286
84
        reader_params.record_rowids = true;
287
84
        reader_params.rowid_conversion = stats_output->rowid_conversion;
288
84
        stats_output->rowid_conversion->set_dst_rowset_id(dst_rowset_writer->rowset_id());
289
84
    }
290
291
291
    reader_params.return_columns = column_group;
292
291
    reader_params.origin_return_columns = &reader_params.return_columns;
293
291
    reader_params.batch_size = batch_size;
294
291
    RETURN_IF_ERROR(reader.init(reader_params, sample_info));
295
296
291
    vectorized::Block block = tablet_schema.create_block(reader_params.return_columns);
297
291
    size_t output_rows = 0;
298
291
    bool eof = false;
299
8.07k
    while (!eof && !ExecEnv::GetInstance()->storage_engine().stopped()) {
300
7.78k
        auto tablet_state = tablet->tablet_state();
301
7.78k
        if (tablet_state != TABLET_RUNNING && tablet_state != TABLET_NOTREADY) {
302
0
            tablet->clear_cache();
303
0
            return Status::Error<INTERNAL_ERROR>("tablet {} is not used any more",
304
0
                                                 tablet->tablet_id());
305
0
        }
306
        // Read one block from block reader
307
7.78k
        RETURN_NOT_OK_STATUS_WITH_WARN(reader.next_block_with_aggregation(&block, &eof),
308
7.78k
                                       "failed to read next block when merging rowsets of tablet " +
309
7.78k
                                               std::to_string(tablet->tablet_id()));
310
7.78k
        RETURN_NOT_OK_STATUS_WITH_WARN(
311
7.78k
                dst_rowset_writer->add_columns(&block, column_group, is_key, max_rows_per_segment,
312
7.78k
                                               has_cluster_key),
313
7.78k
                "failed to write block when merging rowsets of tablet " +
314
7.78k
                        std::to_string(tablet->tablet_id()));
315
316
7.78k
        if (is_key && reader_params.record_rowids && block.rows() > 0) {
317
4.62k
            std::vector<uint32_t> segment_num_rows;
318
4.62k
            RETURN_IF_ERROR(dst_rowset_writer->get_segment_num_rows(&segment_num_rows));
319
4.62k
            stats_output->rowid_conversion->add(reader.current_block_row_locations(),
320
4.62k
                                                segment_num_rows);
321
4.62k
        }
322
7.78k
        output_rows += block.rows();
323
7.78k
        block.clear_column_data();
324
7.78k
    }
325
291
    if (ExecEnv::GetInstance()->storage_engine().stopped()) {
326
0
        return Status::Error<INTERNAL_ERROR>("tablet {} failed to do compaction, engine stopped",
327
0
                                             tablet->tablet_id());
328
0
    }
329
330
291
    if (stats_output != nullptr) {
331
291
        if (is_key) {
332
84
            stats_output->output_rows = output_rows;
333
84
            stats_output->merged_rows = reader.merged_rows();
334
84
            stats_output->filtered_rows = reader.filtered_rows();
335
84
        }
336
291
        stats_output->bytes_read_from_local = reader.stats().file_cache_stats.bytes_read_from_local;
337
291
        stats_output->bytes_read_from_remote =
338
291
                reader.stats().file_cache_stats.bytes_read_from_remote;
339
291
        stats_output->cached_bytes_total = reader.stats().file_cache_stats.bytes_write_into_cache;
340
291
        if (config::is_cloud_mode()) {
341
0
            stats_output->cloud_local_read_time =
342
0
                    reader.stats().file_cache_stats.local_io_timer / 1000;
343
0
            stats_output->cloud_remote_read_time =
344
0
                    reader.stats().file_cache_stats.remote_io_timer / 1000;
345
0
        }
346
291
    }
347
291
    RETURN_IF_ERROR(dst_rowset_writer->flush_columns(is_key));
348
349
291
    return Status::OK();
350
291
}
351
352
// for segcompaction
353
Status Merger::vertical_compact_one_group(
354
        int64_t tablet_id, ReaderType reader_type, const TabletSchema& tablet_schema, bool is_key,
355
        const std::vector<uint32_t>& column_group, vectorized::RowSourcesBuffer* row_source_buf,
356
        vectorized::VerticalBlockReader& src_block_reader,
357
        segment_v2::SegmentWriter& dst_segment_writer, Statistics* stats_output,
358
22
        uint64_t* index_size, KeyBoundsPB& key_bounds, SimpleRowIdConversion* rowid_conversion) {
359
    // TODO: record_rowids
360
22
    vectorized::Block block = tablet_schema.create_block(column_group);
361
22
    size_t output_rows = 0;
362
22
    bool eof = false;
363
138
    while (!eof && !ExecEnv::GetInstance()->storage_engine().stopped()) {
364
        // Read one block from block reader
365
116
        RETURN_NOT_OK_STATUS_WITH_WARN(src_block_reader.next_block_with_aggregation(&block, &eof),
366
116
                                       "failed to read next block when merging rowsets of tablet " +
367
116
                                               std::to_string(tablet_id));
368
116
        if (!block.rows()) {
369
0
            break;
370
0
        }
371
116
        RETURN_NOT_OK_STATUS_WITH_WARN(dst_segment_writer.append_block(&block, 0, block.rows()),
372
116
                                       "failed to write block when merging rowsets of tablet " +
373
116
                                               std::to_string(tablet_id));
374
375
116
        if (is_key && rowid_conversion != nullptr) {
376
30
            rowid_conversion->add(src_block_reader.current_block_row_locations());
377
30
        }
378
116
        output_rows += block.rows();
379
116
        block.clear_column_data();
380
116
    }
381
22
    if (ExecEnv::GetInstance()->storage_engine().stopped()) {
382
0
        return Status::Error<INTERNAL_ERROR>("tablet {} failed to do compaction, engine stopped",
383
0
                                             tablet_id);
384
0
    }
385
386
22
    if (stats_output != nullptr) {
387
22
        if (is_key) {
388
11
            stats_output->output_rows = output_rows;
389
11
            stats_output->merged_rows = src_block_reader.merged_rows();
390
11
            stats_output->filtered_rows = src_block_reader.filtered_rows();
391
11
        }
392
22
        stats_output->bytes_read_from_local =
393
22
                src_block_reader.stats().file_cache_stats.bytes_read_from_local;
394
22
        stats_output->bytes_read_from_remote =
395
22
                src_block_reader.stats().file_cache_stats.bytes_read_from_remote;
396
22
        stats_output->cached_bytes_total =
397
22
                src_block_reader.stats().file_cache_stats.bytes_write_into_cache;
398
22
    }
399
400
    // segcompaction produce only one segment at once
401
22
    RETURN_IF_ERROR(dst_segment_writer.finalize_columns_data());
402
22
    RETURN_IF_ERROR(dst_segment_writer.finalize_columns_index(index_size));
403
404
22
    if (is_key) {
405
11
        Slice min_key = dst_segment_writer.min_encoded_key();
406
11
        Slice max_key = dst_segment_writer.max_encoded_key();
407
11
        DCHECK_LE(min_key.compare(max_key), 0);
408
11
        key_bounds.set_min_key(min_key.to_string());
409
11
        key_bounds.set_max_key(max_key.to_string());
410
11
    }
411
412
22
    return Status::OK();
413
22
}
414
415
96
int64_t estimate_batch_size(int group_index, BaseTabletSPtr tablet, int64_t way_cnt) {
416
96
    std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
417
96
    CompactionSampleInfo info = tablet->sample_infos[group_index];
418
96
    if (way_cnt <= 0) {
419
0
        LOG(INFO) << "estimate batch size for vertical compaction, tablet id: "
420
0
                  << tablet->tablet_id() << " way cnt: " << way_cnt;
421
0
        return 4096 - 32;
422
0
    }
423
96
    int64_t block_mem_limit = config::compaction_memory_bytes_limit / way_cnt;
424
96
    if (tablet->last_compaction_status.is<ErrorCode::MEM_LIMIT_EXCEEDED>()) {
425
0
        block_mem_limit /= 4;
426
0
    }
427
428
96
    int64_t group_data_size = 0;
429
96
    if (info.group_data_size > 0 && info.bytes > 0 && info.rows > 0) {
430
0
        double smoothing_factor = 0.5;
431
0
        group_data_size =
432
0
                int64_t((cast_set<double>(info.group_data_size) * (1 - smoothing_factor)) +
433
0
                        (cast_set<double>(info.bytes / info.rows) * smoothing_factor));
434
0
        tablet->sample_infos[group_index].group_data_size = group_data_size;
435
96
    } else if (info.group_data_size > 0 && (info.bytes <= 0 || info.rows <= 0)) {
436
0
        group_data_size = info.group_data_size;
437
96
    } else if (info.group_data_size <= 0 && info.bytes > 0 && info.rows > 0) {
438
0
        group_data_size = info.bytes / info.rows;
439
0
        tablet->sample_infos[group_index].group_data_size = group_data_size;
440
96
    } else {
441
96
        LOG(INFO) << "estimate batch size for vertical compaction, tablet id: "
442
96
                  << tablet->tablet_id() << " group data size: " << info.group_data_size
443
96
                  << " row num: " << info.rows << " consume bytes: " << info.bytes;
444
96
        return 1024 - 32;
445
96
    }
446
447
0
    if (group_data_size <= 0) {
448
0
        LOG(WARNING) << "estimate batch size for vertical compaction, tablet id: "
449
0
                     << tablet->tablet_id() << " unexpected group data size: " << group_data_size;
450
0
        return 4096 - 32;
451
0
    }
452
453
0
    tablet->sample_infos[group_index].bytes = 0;
454
0
    tablet->sample_infos[group_index].rows = 0;
455
456
0
    int64_t batch_size = block_mem_limit / group_data_size;
457
0
    int64_t res = std::max(std::min(batch_size, int64_t(4096 - 32)), int64_t(32L));
458
0
    LOG(INFO) << "estimate batch size for vertical compaction, tablet id: " << tablet->tablet_id()
459
0
              << " group data size: " << info.group_data_size << " row num: " << info.rows
460
0
              << " consume bytes: " << info.bytes << " way cnt: " << way_cnt
461
0
              << " batch size: " << res;
462
0
    return res;
463
0
}
464
465
// steps to do vertical merge:
466
// 1. split columns into column groups
467
// 2. compact groups one by one, generate a row_source_buf when compact key group
468
// and use this row_source_buf to compact value column groups
469
// 3. build output rowset
470
Status Merger::vertical_merge_rowsets(BaseTabletSPtr tablet, ReaderType reader_type,
471
                                      const TabletSchema& tablet_schema,
472
                                      const std::vector<RowsetReaderSharedPtr>& src_rowset_readers,
473
                                      RowsetWriter* dst_rowset_writer,
474
                                      uint32_t max_rows_per_segment, int64_t merge_way_num,
475
84
                                      Statistics* stats_output) {
476
84
    LOG(INFO) << "Start to do vertical compaction, tablet_id: " << tablet->tablet_id();
477
84
    std::vector<std::vector<uint32_t>> column_groups;
478
84
    std::vector<uint32_t> key_group_cluster_key_idxes;
479
84
    vertical_split_columns(tablet_schema, &column_groups, &key_group_cluster_key_idxes);
480
481
    // Calculate total rows for density calculation after compaction
482
84
    int64_t total_rows = 0;
483
250
    for (const auto& rs_reader : src_rowset_readers) {
484
250
        total_rows += rs_reader->rowset()->rowset_meta()->num_rows();
485
250
    }
486
487
    // Use historical density for sparse wide table optimization
488
    // density = (total_cells - null_cells) / total_cells, smaller means more sparse
489
    // When density <= threshold, enable sparse optimization
490
    // threshold = 0 means disable, 1 means always enable (default)
491
84
    bool enable_sparse_optimization = false;
492
84
    if (config::sparse_column_compaction_threshold_percent > 0 &&
493
84
        tablet->keys_type() == KeysType::UNIQUE_KEYS) {
494
44
        double density = tablet->compaction_density.load();
495
44
        enable_sparse_optimization = density <= config::sparse_column_compaction_threshold_percent;
496
497
44
        LOG(INFO) << "Vertical compaction sparse optimization check: tablet_id="
498
44
                  << tablet->tablet_id() << ", density=" << density
499
44
                  << ", threshold=" << config::sparse_column_compaction_threshold_percent
500
44
                  << ", total_rows=" << total_rows
501
44
                  << ", num_columns=" << tablet_schema.num_columns()
502
44
                  << ", total_cells=" << total_rows * tablet_schema.num_columns()
503
44
                  << ", enable_sparse_optimization=" << enable_sparse_optimization;
504
44
    }
505
506
84
    vectorized::RowSourcesBuffer row_sources_buf(
507
84
            tablet->tablet_id(), dst_rowset_writer->context().tablet_path, reader_type);
508
84
    Merger::Statistics total_stats;
509
84
    if (stats_output != nullptr) {
510
84
        total_stats.rowid_conversion = stats_output->rowid_conversion;
511
84
    }
512
84
    {
513
84
        std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
514
84
        tablet->sample_infos.resize(column_groups.size());
515
84
    }
516
    // compact group one by one
517
375
    for (auto i = 0; i < column_groups.size(); ++i) {
518
291
        VLOG_NOTICE << "row source size: " << row_sources_buf.total_size();
519
291
        bool is_key = (i == 0);
520
291
        int64_t batch_size = config::compaction_batch_size != -1
521
291
                                     ? config::compaction_batch_size
522
291
                                     : estimate_batch_size(i, tablet, merge_way_num);
523
291
        CompactionSampleInfo sample_info;
524
291
        Merger::Statistics group_stats;
525
291
        group_stats.rowid_conversion = total_stats.rowid_conversion;
526
291
        Merger::Statistics* group_stats_ptr = stats_output != nullptr ? &group_stats : nullptr;
527
291
        Status st = vertical_compact_one_group(
528
291
                tablet, reader_type, tablet_schema, is_key, column_groups[i], &row_sources_buf,
529
291
                src_rowset_readers, dst_rowset_writer, max_rows_per_segment, group_stats_ptr,
530
291
                key_group_cluster_key_idxes, batch_size, &sample_info, enable_sparse_optimization);
531
291
        {
532
291
            std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
533
291
            tablet->sample_infos[i] = sample_info;
534
291
        }
535
291
        RETURN_IF_ERROR(st);
536
291
        if (stats_output != nullptr) {
537
291
            total_stats.bytes_read_from_local += group_stats.bytes_read_from_local;
538
291
            total_stats.bytes_read_from_remote += group_stats.bytes_read_from_remote;
539
291
            total_stats.cached_bytes_total += group_stats.cached_bytes_total;
540
291
            total_stats.cloud_local_read_time += group_stats.cloud_local_read_time;
541
291
            total_stats.cloud_remote_read_time += group_stats.cloud_remote_read_time;
542
291
            if (is_key) {
543
84
                total_stats.output_rows = group_stats.output_rows;
544
84
                total_stats.merged_rows = group_stats.merged_rows;
545
84
                total_stats.filtered_rows = group_stats.filtered_rows;
546
84
                total_stats.rowid_conversion = group_stats.rowid_conversion;
547
84
            }
548
291
        }
549
291
        if (is_key) {
550
84
            RETURN_IF_ERROR(row_sources_buf.flush());
551
84
        }
552
291
        RETURN_IF_ERROR(row_sources_buf.seek_to_begin());
553
291
    }
554
555
    // Calculate and store density for next compaction's sparse optimization threshold
556
    // density = (total_cells - total_null_count) / total_cells
557
    // Smaller density means more sparse
558
84
    {
559
84
        std::unique_lock<std::mutex> lock(tablet->sample_info_lock);
560
84
        int64_t total_null_count = 0;
561
291
        for (const auto& info : tablet->sample_infos) {
562
291
            total_null_count += info.null_count;
563
291
        }
564
84
        int64_t total_cells = total_rows * tablet_schema.num_columns();
565
84
        if (total_cells > 0) {
566
83
            double density = static_cast<double>(total_cells - total_null_count) /
567
83
                             static_cast<double>(total_cells);
568
83
            tablet->compaction_density.store(density);
569
83
            LOG(INFO) << "Vertical compaction density update: tablet_id=" << tablet->tablet_id()
570
83
                      << ", total_cells=" << total_cells
571
83
                      << ", total_null_count=" << total_null_count << ", density=" << density;
572
83
        }
573
84
    }
574
575
    // finish compact, build output rowset
576
84
    VLOG_NOTICE << "finish compact groups";
577
84
    RETURN_IF_ERROR(dst_rowset_writer->final_flush());
578
579
84
    if (stats_output != nullptr) {
580
84
        *stats_output = total_stats;
581
84
    }
582
583
84
    return Status::OK();
584
84
}
585
#include "common/compile_check_end.h"
586
} // namespace doris