Coverage Report

Created: 2026-04-13 22:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/index/primary_key_index.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <stddef.h>
22
#include <stdint.h>
23
24
#include <memory>
25
26
#include "common/status.h"
27
#include "io/fs/file_reader_writer_fwd.h"
28
#include "storage/index/bloom_filter/bloom_filter.h"
29
#include "storage/index/bloom_filter/bloom_filter_index_writer.h"
30
#include "storage/index/indexed_column_reader.h"
31
#include "storage/index/indexed_column_writer.h"
32
#include "storage/olap_common.h"
33
#include "util/faststring.h"
34
#include "util/slice.h"
35
36
namespace doris {
37
class TypeInfo;
38
39
namespace io {
40
class FileWriter;
41
} // namespace io
42
namespace segment_v2 {
43
44
class PrimaryKeyIndexMetaPB;
45
} // namespace segment_v2
46
47
// Build index for primary key.
48
// The primary key index is designed in a similar way like RocksDB
49
// Partitioned Index, which is created in the segment file when MemTable flushes.
50
// Index is stored in multiple pages to leverage the IndexedColumnWriter.
51
//
52
// NOTE: for now, it's only used when unique key merge-on-write property enabled.
53
class PrimaryKeyIndexBuilder {
54
public:
55
    PrimaryKeyIndexBuilder(io::FileWriter* file_writer, size_t seq_col_length, size_t rowid_length)
56
75
            : _file_writer(file_writer),
57
75
              _num_rows(0),
58
75
              _size(0),
59
75
              _disk_size(0),
60
75
              _seq_col_length(seq_col_length),
61
75
              _rowid_length(rowid_length) {}
62
63
    Status init();
64
65
    Status add_item(const Slice& key);
66
67
75
    uint32_t num_rows() const { return _num_rows; }
68
69
5
    uint64_t size() const { return _size; }
70
71
75
    uint64_t disk_size() const { return _disk_size; }
72
73
    // used for be ut
74
2
    uint32_t data_page_num() const { return _primary_key_index_builder->data_page_num(); }
75
76
150
    Slice min_key() {
77
150
        return Slice(_min_key.data(), _min_key.size() - _seq_col_length - _rowid_length);
78
150
    }
79
150
    Slice max_key() {
80
150
        return Slice(_max_key.data(), _max_key.size() - _seq_col_length - _rowid_length);
81
150
    }
82
83
    Status finalize(segment_v2::PrimaryKeyIndexMetaPB* meta);
84
85
private:
86
    io::FileWriter* _file_writer = nullptr;
87
    uint32_t _num_rows;
88
    uint64_t _size;
89
    uint64_t _disk_size;
90
    size_t _seq_col_length;
91
    size_t _rowid_length;
92
93
    faststring _min_key;
94
    faststring _max_key;
95
    std::unique_ptr<segment_v2::IndexedColumnWriter> _primary_key_index_builder;
96
    std::unique_ptr<segment_v2::BloomFilterIndexWriter> _bloom_filter_index_builder;
97
};
98
99
class PrimaryKeyIndexReader {
100
public:
101
60
    PrimaryKeyIndexReader() : _index_parsed(false), _bf_parsed(false) {}
102
103
60
    ~PrimaryKeyIndexReader() {
104
60
        segment_v2::g_pk_total_bloom_filter_num << -static_cast<int64_t>(_bf_num);
105
60
        segment_v2::g_pk_total_bloom_filter_total_bytes << -static_cast<int64_t>(_bf_bytes);
106
60
        segment_v2::g_pk_read_bloom_filter_num << -static_cast<int64_t>(_bf_num);
107
60
        segment_v2::g_pk_read_bloom_filter_total_bytes << -static_cast<int64_t>(_bf_bytes);
108
60
    }
109
110
    Status parse_index(io::FileReaderSPtr file_reader,
111
                       const segment_v2::PrimaryKeyIndexMetaPB& meta,
112
                       OlapReaderStatistics* pk_index_load_stats);
113
114
    Status parse_bf(io::FileReaderSPtr file_reader, const segment_v2::PrimaryKeyIndexMetaPB& meta,
115
                    OlapReaderStatistics* pk_index_load_stats);
116
117
    Status new_iterator(std::unique_ptr<segment_v2::IndexedColumnIterator>* index_iterator,
118
63
                        OlapReaderStatistics* stats) const {
119
63
        DCHECK(_index_parsed);
120
63
        index_iterator->reset(new segment_v2::IndexedColumnIterator(_index_reader.get(), stats));
121
63
        return Status::OK();
122
63
    }
123
124
60
    const TypeInfo* type_info() const {
125
60
        DCHECK(_index_parsed);
126
60
        return _index_reader->type_info();
127
60
    }
128
129
    // verify whether exist in BloomFilter
130
4.55k
    bool check_present(const Slice& key) {
131
4.55k
        DCHECK(_bf_parsed);
132
4.55k
        return _bf->test_bytes(key.data, key.size);
133
4.55k
    }
134
135
56
    int64_t num_rows() const {
136
56
        DCHECK(_index_parsed);
137
56
        return _index_reader->num_values();
138
56
    }
139
140
0
    uint64_t get_bf_memory_size() {
141
0
        DCHECK(_bf_parsed);
142
0
        return _bf->size();
143
0
    }
144
145
0
    uint64_t get_memory_size() {
146
0
        DCHECK(_index_parsed);
147
0
        return _index_reader->get_memory_size();
148
0
    }
149
150
    static constexpr size_t ROW_ID_LENGTH = sizeof(uint32_t) + 1;
151
152
private:
153
    bool _index_parsed;
154
    bool _bf_parsed;
155
    std::unique_ptr<segment_v2::IndexedColumnReader> _index_reader;
156
    std::unique_ptr<segment_v2::BloomFilter> _bf;
157
    size_t _bf_num = 0;
158
    uint64_t _bf_bytes = 0;
159
};
160
} // namespace doris