Coverage Report

Created: 2026-03-16 21:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/primary_key_index.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <memory>
25
26
#include "common/status.h"
27
#include "io/fs/file_reader_writer_fwd.h"
28
#include "storage/index/bloom_filter/bloom_filter.h"
29
#include "storage/index/bloom_filter/bloom_filter_index_writer.h"
30
#include "storage/index/indexed_column_reader.h"
31
#include "storage/index/indexed_column_writer.h"
32
#include "storage/olap_common.h"
33
#include "util/faststring.h"
34
#include "util/slice.h"
35
36
namespace doris {
37
#include "common/compile_check_begin.h"
38
class TypeInfo;
39
40
namespace io {
41
class FileWriter;
42
} // namespace io
43
namespace segment_v2 {
44
45
class PrimaryKeyIndexMetaPB;
46
} // namespace segment_v2
47
48
// Build index for primary key.
49
// The primary key index is designed in a similar way like RocksDB
50
// Partitioned Index, which is created in the segment file when MemTable flushes.
51
// Index is stored in multiple pages to leverage the IndexedColumnWriter.
52
//
53
// NOTE: for now, it's only used when unique key merge-on-write property enabled.
54
class PrimaryKeyIndexBuilder {
55
public:
56
    PrimaryKeyIndexBuilder(io::FileWriter* file_writer, size_t seq_col_length, size_t rowid_length)
57
75
            : _file_writer(file_writer),
58
75
              _num_rows(0),
59
75
              _size(0),
60
75
              _disk_size(0),
61
75
              _seq_col_length(seq_col_length),
62
75
              _rowid_length(rowid_length) {}
63
64
    Status init();
65
66
    Status add_item(const Slice& key);
67
68
75
    uint32_t num_rows() const { return _num_rows; }
69
70
5
    uint64_t size() const { return _size; }
71
72
75
    uint64_t disk_size() const { return _disk_size; }
73
74
    // used for be ut
75
2
    uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); }
76
77
150
    Slice min_key() {
78
150
        return Slice(_min_key.data(), _min_key.size() - _seq_col_length - _rowid_length);
79
150
    }
80
150
    Slice max_key() {
81
150
        return Slice(_max_key.data(), _max_key.size() - _seq_col_length - _rowid_length);
82
150
    }
83
84
    Status finalize(segment_v2::PrimaryKeyIndexMetaPB* meta);
85
86
private:
87
    io::FileWriter* _file_writer = nullptr;
88
    uint32_t _num_rows;
89
    uint64_t _size;
90
    uint64_t _disk_size;
91
    size_t _seq_col_length;
92
    size_t _rowid_length;
93
94
    faststring _min_key;
95
    faststring _max_key;
96
    std::unique_ptr<segment_v2::IndexedColumnWriter> _primary_key_index_builder;
97
    std::unique_ptr<segment_v2::BloomFilterIndexWriter> _bloom_filter_index_builder;
98
};
99
100
class PrimaryKeyIndexReader {
101
public:
102
60
    PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {}
103
104
60
    ~PrimaryKeyIndexReader() {
105
60
        segment_v2::g_pk_total_bloom_filter_num << -static_cast<int64_t>(_bf_num);
106
60
        segment_v2::g_pk_total_bloom_filter_total_bytes << -static_cast<int64_t>(_bf_bytes);
107
60
        segment_v2::g_pk_read_bloom_filter_num << -static_cast<int64_t>(_bf_num);
108
60
        segment_v2::g_pk_read_bloom_filter_total_bytes << -static_cast<int64_t>(_bf_bytes);
109
60
    }
110
111
    Status parse_index(io::FileReaderSPtr file_reader,
112
                       const segment_v2::PrimaryKeyIndexMetaPB& meta,
113
                       OlapReaderStatistics* pk_index_load_stats);
114
115
    Status parse_bf(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta,
116
                    OlapReaderStatistics* pk_index_load_stats);
117
118
    Status new_iterator(std::unique_ptr<segment_v2::IndexedColumnIterator>* index_iterator,
119
63
                        OlapReaderStatistics* stats) const {
120
63
        DCHECK(_index_parsed);
121
63
        index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get(), stats));
122
63
        return Status::OK();
123
63
    }
124
125
60
    const TypeInfo* type_info() const {
126
60
        DCHECK(_index_parsed);
127
60
        return _index_reader->type_info();
128
60
    }
129
130
    // verify whether exist in BloomFilter
131
4.55k
    bool check_present(const Slice& key) {
132
4.55k
        DCHECK(_bf_parsed);
133
4.55k
        return _bf->test_bytes(key.data, key.size);
134
4.55k
    }
135
136
56
    int64_t num_rows() const {
137
56
        DCHECK(_index_parsed);
138
56
        return _index_reader->num_values();
139
56
    }
140
141
0
    uint64_t get_bf_memory_size() {
142
0
        DCHECK(_bf_parsed);
143
0
        return _bf->size();
144
0
    }
145
146
0
    uint64_t get_memory_size() {
147
0
        DCHECK(_index_parsed);
148
0
        return _index_reader->get_memory_size();
149
0
    }
150
151
    static constexpr size_t ROW_ID_LENGTH = sizeof(uint32_t) + 1;
152
153
private:
154
    bool _index_parsed;
155
    bool _bf_parsed;
156
    std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
157
    std::unique_ptr<segment_v2::BloomFilter> _bf;
158
    size_t _bf_num = 0;
159
    uint64_t _bf_bytes = 0;
160
};
161
#include "common/compile_check_end.h"
162
} // namespace doris