be/src/storage/index/primary_key_index.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/index/primary_key_index.h" |
19 | | |
20 | | #include <butil/time.h> |
21 | | #include <gen_cpp/segment_v2.pb.h> |
22 | | |
23 | | #include <utility> |
24 | | |
25 | | #include "common/compiler_util.h" // IWYU pragma: keep |
26 | | #include "common/config.h" |
27 | | #include "io/fs/file_writer.h" |
28 | | #include "storage/index/bloom_filter/bloom_filter_index_reader.h" |
29 | | #include "storage/index/bloom_filter/bloom_filter_index_writer.h" |
30 | | #include "storage/olap_common.h" |
31 | | #include "storage/segment/encoding_info.h" |
32 | | #include "storage/types.h" |
33 | | |
34 | | namespace doris { |
35 | | |
36 | | static bvar::Adder<size_t> g_primary_key_index_memory_bytes("doris_primary_key_index_memory_bytes"); |
37 | | |
38 | 75 | Status PrimaryKeyIndexBuilder::init() { |
39 | | // TODO(liaoxin) using the column type directly if there's only one column in unique key columns |
40 | 75 | const auto* type_info = get_scalar_type_info<FieldType::OLAP_FIELD_TYPE_VARCHAR>(); |
41 | 75 | segment_v2::IndexedColumnWriterOptions options; |
42 | 75 | options.write_ordinal_index = true; |
43 | 75 | options.write_value_index = true; |
44 | 75 | options.data_page_size = config::primary_key_data_page_size; |
45 | 75 | options.encoding = segment_v2::EncodingInfo::get_default_encoding(type_info->type(), {}, true); |
46 | 75 | options.compression = segment_v2::ZSTD; |
47 | 75 | _primary_key_index_builder.reset( |
48 | 75 | new segment_v2::IndexedColumnWriter(options, type_info, _file_writer)); |
49 | 75 | RETURN_IF_ERROR(_primary_key_index_builder->init()); |
50 | | |
51 | 75 | auto opt = segment_v2::BloomFilterOptions(); |
52 | 75 | opt.fpp = 0.01; |
53 | 75 | RETURN_IF_ERROR(segment_v2::PrimaryKeyBloomFilterIndexWriterImpl::create( |
54 | 75 | opt, type_info, &_bloom_filter_index_builder)); |
55 | 75 | return Status::OK(); |
56 | 75 | } |
57 | | |
58 | 134k | Status PrimaryKeyIndexBuilder::add_item(const Slice& key) { |
59 | 134k | RETURN_IF_ERROR(_primary_key_index_builder->add(&key)); |
60 | 134k | Slice key_without_seq = Slice(key.get_data(), key.get_size() - _seq_col_length - _rowid_length); |
61 | 134k | RETURN_IF_ERROR(_bloom_filter_index_builder->add_values(&key_without_seq, 1)); |
62 | | // the key is already sorted, so the first key is min_key, and |
63 | | // the last key is max_key. |
64 | 134k | if (UNLIKELY(_num_rows == 0)) { |
65 | 75 | _min_key.append(key.get_data(), key.get_size()); |
66 | 75 | } |
67 | 134k | DCHECK(key.compare(_max_key) > 0) |
68 | 0 | << "found duplicate key or key is not sorted! current key: " << key |
69 | 0 | << ", last max key: " << _max_key; |
70 | 134k | _max_key.clear(); |
71 | 134k | _max_key.append(key.get_data(), key.get_size()); |
72 | 134k | _num_rows++; |
73 | 134k | _size += key.get_size(); |
74 | 134k | return Status::OK(); |
75 | 134k | } |
76 | | |
77 | 75 | Status PrimaryKeyIndexBuilder::finalize(segment_v2::PrimaryKeyIndexMetaPB* meta) { |
78 | | // finish primary key index |
79 | 75 | RETURN_IF_ERROR(_primary_key_index_builder->finish(meta->mutable_primary_key_index())); |
80 | 75 | _disk_size += _primary_key_index_builder->disk_size(); |
81 | | |
82 | | // set min_max key, the sequence column should be removed |
83 | 75 | meta->set_min_key(min_key().to_string()); |
84 | 75 | meta->set_max_key(max_key().to_string()); |
85 | | |
86 | | // finish bloom filter index |
87 | 75 | RETURN_IF_ERROR(_bloom_filter_index_builder->flush()); |
88 | 75 | uint64_t start_size = _file_writer->bytes_appended(); |
89 | 75 | RETURN_IF_ERROR( |
90 | 75 | _bloom_filter_index_builder->finish(_file_writer, meta->mutable_bloom_filter_index())); |
91 | 75 | _disk_size += _file_writer->bytes_appended() - start_size; |
92 | 75 | _primary_key_index_builder.reset(nullptr); |
93 | 75 | _bloom_filter_index_builder.reset(nullptr); |
94 | 75 | return Status::OK(); |
95 | 75 | } |
96 | | |
97 | | Status PrimaryKeyIndexReader::parse_index(io::FileReaderSPtr file_reader, |
98 | | const segment_v2::PrimaryKeyIndexMetaPB& meta, |
99 | 60 | OlapReaderStatistics* pk_index_load_stats) { |
100 | | // parse primary key index |
101 | 60 | _index_reader.reset(new segment_v2::IndexedColumnReader(file_reader, meta.primary_key_index())); |
102 | 60 | _index_reader->set_is_pk_index(true); |
103 | 60 | RETURN_IF_ERROR(_index_reader->load(!config::disable_pk_storage_page_cache, false, |
104 | 60 | pk_index_load_stats)); |
105 | | |
106 | 60 | _index_parsed = true; |
107 | 60 | return Status::OK(); |
108 | 60 | } |
109 | | |
110 | | Status PrimaryKeyIndexReader::parse_bf(io::FileReaderSPtr file_reader, |
111 | | const segment_v2::PrimaryKeyIndexMetaPB& meta, |
112 | 5 | OlapReaderStatistics* pk_index_load_stats) { |
113 | | // parse bloom filter |
114 | 5 | segment_v2::ColumnIndexMetaPB column_index_meta = meta.bloom_filter_index(); |
115 | 5 | segment_v2::BloomFilterIndexReader bf_index_reader(std::move(file_reader), |
116 | 5 | column_index_meta.bloom_filter_index()); |
117 | 5 | RETURN_IF_ERROR(bf_index_reader.load(!config::disable_pk_storage_page_cache, false, |
118 | 5 | pk_index_load_stats)); |
119 | 5 | std::unique_ptr<segment_v2::BloomFilterIndexIterator> bf_iter; |
120 | 5 | RETURN_IF_ERROR(bf_index_reader.new_iterator(&bf_iter, pk_index_load_stats)); |
121 | 5 | RETURN_IF_ERROR(bf_iter->read_bloom_filter(0, &_bf)); |
122 | 5 | segment_v2::g_pk_total_bloom_filter_num << 1; |
123 | 5 | segment_v2::g_pk_total_bloom_filter_total_bytes << _bf->size(); |
124 | 5 | segment_v2::g_pk_read_bloom_filter_num << 1; |
125 | 5 | segment_v2::g_pk_read_bloom_filter_total_bytes << _bf->size(); |
126 | 5 | _bf_num += 1; |
127 | 5 | _bf_bytes += _bf->size(); |
128 | | |
129 | 5 | _bf_parsed = true; |
130 | | |
131 | 5 | return Status::OK(); |
132 | 5 | } |
133 | | |
134 | | } // namespace doris |