Coverage Report

Created: 2026-05-13 17:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/primary_key_index.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <memory>
25
26
#include "common/status.h"
27
#include "io/fs/file_reader_writer_fwd.h"
28
#include "storage/index/bloom_filter/bloom_filter.h"
29
#include "storage/index/bloom_filter/bloom_filter_index_writer.h"
30
#include "storage/index/indexed_column_reader.h"
31
#include "storage/index/indexed_column_writer.h"
32
#include "storage/olap_common.h"
33
#include "util/faststring.h"
34
#include "util/slice.h"
35
36
namespace doris {
37
38
namespace io {
39
class FileWriter;
40
} // namespace io
41
namespace segment_v2 {
42
43
class PrimaryKeyIndexMetaPB;
44
} // namespace segment_v2
45
46
// Build index for primary key.
47
// The primary key index is designed in a similar way like RocksDB
48
// Partitioned Index, which is created in the segment file when MemTable flushes.
49
// Index is stored in multiple pages to leverage the IndexedColumnWriter.
50
//
51
// NOTE: for now, it's only used when unique key merge-on-write property enabled.
52
class PrimaryKeyIndexBuilder {
53
public:
54
    PrimaryKeyIndexBuilder(io::FileWriter* file_writer, size_t seq_col_length, size_t rowid_length)
55
75
            : _file_writer(file_writer),
56
75
              _num_rows(0),
57
75
              _size(0),
58
75
              _disk_size(0),
59
75
              _seq_col_length(seq_col_length),
60
75
              _rowid_length(rowid_length) {}
61
62
    Status init();
63
64
    Status add_item(const Slice& key);
65
66
75
    uint32_t num_rows() const { return _num_rows; }
67
68
5
    uint64_t size() const { return _size; }
69
70
75
    uint64_t disk_size() const { return _disk_size; }
71
72
    // used for be ut
73
2
    uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); }
74
75
150
    Slice min_key() {
76
150
        return Slice(_min_key.data(), _min_key.size() - _seq_col_length - _rowid_length);
77
150
    }
78
150
    Slice max_key() {
79
150
        return Slice(_max_key.data(), _max_key.size() - _seq_col_length - _rowid_length);
80
150
    }
81
82
    Status finalize(segment_v2::PrimaryKeyIndexMetaPB* meta);
83
84
private:
85
    io::FileWriter* _file_writer = nullptr;
86
    uint32_t _num_rows;
87
    uint64_t _size;
88
    uint64_t _disk_size;
89
    size_t _seq_col_length;
90
    size_t _rowid_length;
91
92
    faststring _min_key;
93
    faststring _max_key;
94
    std::unique_ptr<segment_v2::IndexedColumnWriter> _primary_key_index_builder;
95
    std::unique_ptr<segment_v2::BloomFilterIndexWriter> _bloom_filter_index_builder;
96
};
97
98
class PrimaryKeyIndexReader {
99
public:
100
60
    PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {}
101
102
60
    ~PrimaryKeyIndexReader() {
103
60
        segment_v2::g_pk_total_bloom_filter_num << -static_cast<int64_t>(_bf_num);
104
60
        segment_v2::g_pk_total_bloom_filter_total_bytes << -static_cast<int64_t>(_bf_bytes);
105
60
        segment_v2::g_pk_read_bloom_filter_num << -static_cast<int64_t>(_bf_num);
106
60
        segment_v2::g_pk_read_bloom_filter_total_bytes << -static_cast<int64_t>(_bf_bytes);
107
60
    }
108
109
    Status parse_index(io::FileReaderSPtr file_reader,
110
                       const segment_v2::PrimaryKeyIndexMetaPB& meta,
111
                       OlapReaderStatistics* pk_index_load_stats);
112
113
    Status parse_bf(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta,
114
                    OlapReaderStatistics* pk_index_load_stats);
115
116
    Status new_iterator(std::unique_ptr<segment_v2::IndexedColumnIterator>* index_iterator,
117
63
                        OlapReaderStatistics* stats) const {
118
63
        DCHECK(_index_parsed);
119
63
        index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get(), stats));
120
63
        return Status::OK();
121
63
    }
122
123
60
    FieldType type() const {
124
60
        DCHECK(_index_parsed);
125
60
        return _index_reader->type();
126
60
    }
127
128
    // verify whether exist in BloomFilter
129
4.55k
    bool check_present(const Slice& key) {
130
4.55k
        DCHECK(_bf_parsed);
131
4.55k
        return _bf->test_bytes(key.data, key.size);
132
4.55k
    }
133
134
56
    int64_t num_rows() const {
135
56
        DCHECK(_index_parsed);
136
56
        return _index_reader->num_values();
137
56
    }
138
139
0
    uint64_t get_bf_memory_size() {
140
0
        DCHECK(_bf_parsed);
141
0
        return _bf->size();
142
0
    }
143
144
0
    uint64_t get_memory_size() {
145
0
        DCHECK(_index_parsed);
146
0
        return _index_reader->get_memory_size();
147
0
    }
148
149
    static constexpr size_t ROW_ID_LENGTH = sizeof(uint32_t) + 1;
150
151
private:
152
    bool _index_parsed;
153
    bool _bf_parsed;
154
    std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
155
    std::unique_ptr<segment_v2::BloomFilter> _bf;
156
    size_t _bf_num = 0;
157
    uint64_t _bf_bytes = 0;
158
};
159
} // namespace doris