Coverage Report

Created: 2026-03-15 17:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/primary_key_index.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/index/primary_key_index.h"
19
20
#include <butil/time.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
23
#include <utility>
24
25
#include "common/compiler_util.h" // IWYU pragma: keep
26
#include "common/config.h"
27
#include "io/fs/file_writer.h"
28
#include "storage/index/bloom_filter/bloom_filter_index_reader.h"
29
#include "storage/index/bloom_filter/bloom_filter_index_writer.h"
30
#include "storage/olap_common.h"
31
#include "storage/segment/encoding_info.h"
32
#include "storage/types.h"
33
34
namespace doris {
35
36
static bvar::Adder<size_t> g_primary_key_index_memory_bytes("doris_primary_key_index_memory_bytes");
37
38
75
Status PrimaryKeyIndexBuilder::init() {
39
    // TODO(liaoxin) using the column type directly if there's only one column in unique key columns
40
75
    const auto* type_info = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>();
41
75
    segment_v2::IndexedColumnWriterOptions options;
42
75
    options.write_ordinal_index = true;
43
75
    options.write_value_index = true;
44
75
    options.data_page_size = config::primary_key_data_page_size;
45
75
    options.encoding = segment_v2::EncodingInfo::get_default_encoding(type_info->type(), {}, true);
46
75
    options.compression = segment_v2::ZSTD;
47
75
    _primary_key_index_builder.reset(
48
75
            new segment_v2::IndexedColumnWriter(options, type_info, _file_writer));
49
75
    RETURN_IF_ERROR(_primary_key_index_builder->init());
50
51
75
    auto opt = segment_v2::BloomFilterOptions();
52
75
    opt.fpp = 0.01;
53
75
    RETURN_IF_ERROR(segment_v2::PrimaryKeyBloomFilterIndexWriterImpl::create(
54
75
            opt, type_info, &_bloom_filter_index_builder));
55
75
    return Status::OK();
56
75
}
57
58
134k
Status PrimaryKeyIndexBuilder::add_item(const Slice& key) {
59
134k
    RETURN_IF_ERROR(_primary_key_index_builder->add(&key));
60
134k
    Slice key_without_seq = Slice(key.get_data(), key.get_size() - _seq_col_length - _rowid_length);
61
134k
    RETURN_IF_ERROR(_bloom_filter_index_builder->add_values(&key_without_seq, 1));
62
    // the key is already sorted, so the first key is min_key, and
63
    // the last key is max_key.
64
134k
    if (UNLIKELY(_num_rows == 0)) {
65
75
        _min_key.append(key.get_data(), key.get_size());
66
75
    }
67
134k
    DCHECK(key.compare(_max_key) > 0)
68
0
            << "found duplicate key or key is not sorted! current key: " << key
69
0
            << ", last max key: " << _max_key;
70
134k
    _max_key.clear();
71
134k
    _max_key.append(key.get_data(), key.get_size());
72
134k
    _num_rows++;
73
134k
    _size += key.get_size();
74
134k
    return Status::OK();
75
134k
}
76
77
75
Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) {
78
    // finish primary key index
79
75
    RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index()));
80
75
    _disk_size += _primary_key_index_builder->disk_size();
81
82
    // set min_max key, the sequence column should be removed
83
75
    meta->set_min_key(min_key().to_string());
84
75
    meta->set_max_key(max_key().to_string());
85
86
    // finish bloom filter index
87
75
    RETURN_IF_ERROR(_bloom_filter_index_builder->flush());
88
75
    uint64_t start_size = _file_writer->bytes_appended();
89
75
    RETURN_IF_ERROR(
90
75
            _bloom_filter_index_builder->finish(_file_writer, meta->mutable_bloom_filter_index()));
91
75
    _disk_size += _file_writer->bytes_appended() - start_size;
92
75
    _primary_key_index_builder.reset(nullptr);
93
75
    _bloom_filter_index_builder.reset(nullptr);
94
75
    return Status::OK();
95
75
}
96
97
Status PrimaryKeyIndexReader::parse_index(io::FileReaderSPtr file_reader,
98
                                          const segment_v2::PrimaryKeyIndexMetaPB& meta,
99
60
                                          OlapReaderStatistics* pk_index_load_stats) {
100
    // parse primary key index
101
60
    _index_reader.reset(new segment_v2::IndexedColumnReader(file_reader, meta.primary_key_index()));
102
60
    _index_reader->set_is_pk_index(true);
103
60
    RETURN_IF_ERROR(_index_reader->load(!config::disable_pk_storage_page_cache, false,
104
60
                                        pk_index_load_stats));
105
106
60
    _index_parsed = true;
107
60
    return Status::OK();
108
60
}
109
110
Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader,
111
                                       const segment_v2::PrimaryKeyIndexMetaPB& meta,
112
5
                                       OlapReaderStatistics* pk_index_load_stats) {
113
    // parse bloom filter
114
5
    segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index();
115
5
    segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader),
116
5
                                                       column_index_meta.bloom_filter_index());
117
5
    RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false,
118
5
                                         pk_index_load_stats));
119
5
    std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter;
120
5
    RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter, pk_index_load_stats));
121
5
    RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf));
122
5
    segment_v2::g_pk_total_bloom_filter_num << 1;
123
5
    segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size();
124
5
    segment_v2::g_pk_read_bloom_filter_num << 1;
125
5
    segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size();
126
5
    _bf_num += 1;
127
5
    _bf_bytes += _bf->size();
128
129
5
    _bf_parsed = true;
130
131
5
    return Status::OK();
132
5
}
133
134
} // namespace doris