be/src/storage/key/row_key_encoder.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/key/row_key_encoder.h" |
19 | | |
20 | | #include <cassert> |
21 | | |
22 | | #include "common/cast_set.h" |
23 | | #include "common/compiler_util.h" // IWYU pragma: keep |
24 | | #include "common/consts.h" |
25 | | #include "common/logging.h" |
26 | | #include "storage/iterator/olap_data_convertor.h" |
27 | | #include "storage/key_coder.h" |
28 | | #include "storage/tablet/tablet_schema.h" |
29 | | |
30 | | namespace doris { |
31 | | |
32 | 2.49k | RowKeyEncoder::RowKeyEncoder(const TabletSchema& schema, bool mow) { |
33 | | // The schema-key view always encodes the schema key columns; the primary |
34 | | // key index is built over these even when the segment sorts by cluster |
35 | | // keys. |
36 | 4.63k | for (size_t cid = 0; cid < schema.num_key_columns(); ++cid) { |
37 | 2.13k | _schema_key_coders.push_back(get_key_coder(schema.column(cid).type())); |
38 | 2.13k | } |
39 | 2.49k | _num_short_key_columns = schema.num_short_key_columns(); |
40 | | // encode the sequence id into the primary key index |
41 | 2.49k | if (schema.has_sequence_col()) { |
42 | 107 | const auto& column = schema.column(schema.sequence_col_idx()); |
43 | 107 | _seq_coder = get_key_coder(column.type()); |
44 | 107 | _seq_col_length = column.length(); |
45 | 107 | } |
46 | | // The sort-key view is whatever the segment sorts by: mow tables with |
47 | | // cluster keys sort by the cluster key columns (made unique by a rowid |
48 | | // suffix), every other table sorts by the schema key columns. |
49 | 2.49k | if (mow && !schema.cluster_key_uids().empty()) { |
50 | 5 | _rowid_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); |
51 | 8 | for (auto uid : schema.cluster_key_uids()) { |
52 | 8 | _add_sort_key_column(schema.column_by_uid(uid)); |
53 | 8 | } |
54 | 2.49k | } else { |
55 | 4.61k | for (size_t cid = 0; cid < schema.num_key_columns(); ++cid) { |
56 | 2.12k | _add_sort_key_column(schema.column(cid)); |
57 | 2.12k | } |
58 | 2.49k | } |
59 | 2.49k | } |
60 | | |
61 | 2.13k | void RowKeyEncoder::_add_sort_key_column(const TabletColumn& column) { |
62 | 2.13k | _sort_key_coders.push_back(get_key_coder(column.type())); |
63 | 2.13k | _sort_key_index_size.push_back(cast_set<uint16_t>(column.index_length())); |
64 | 2.13k | } |
65 | | |
66 | | std::string RowKeyEncoder::full_encode(const std::vector<IOlapColumnDataAccessor*>& key_columns, |
67 | 6.78k | size_t pos) const { |
68 | 6.78k | assert(_sort_key_index_size.size() == _sort_key_coders.size()); |
69 | 6.78k | assert(key_columns.size() == _sort_key_coders.size()); |
70 | 6.78k | return _full_encode(_sort_key_coders, key_columns, pos); |
71 | 6.78k | } |
72 | | |
73 | | std::string RowKeyEncoder::full_encode_primary_keys( |
74 | 16 | const std::vector<IOlapColumnDataAccessor*>& key_columns, size_t pos) const { |
75 | 16 | return _full_encode(_schema_key_coders, key_columns, pos); |
76 | 16 | } |
77 | | |
78 | | std::string RowKeyEncoder::_full_encode(const std::vector<const KeyCoder*>& key_coders, |
79 | | const std::vector<IOlapColumnDataAccessor*>& key_columns, |
80 | 6.80k | size_t pos) { |
81 | 6.80k | assert(key_columns.size() == key_coders.size()); |
82 | | |
83 | 6.80k | std::string encoded_keys; |
84 | 6.80k | size_t cid = 0; |
85 | 6.80k | for (const auto& column : key_columns) { |
86 | 6.10k | const auto* field = column->get_data_at(pos); |
87 | 6.10k | if (UNLIKELY(!field)) { |
88 | 6 | encoded_keys.push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
89 | 6 | ++cid; |
90 | 6 | continue; |
91 | 6 | } |
92 | 6.09k | encoded_keys.push_back(KeyConsts::KEY_NORMAL_MARKER); |
93 | 6.09k | DCHECK(key_coders[cid] != nullptr); |
94 | 6.09k | key_coders[cid]->full_encode_ascending(field, &encoded_keys); |
95 | 6.09k | ++cid; |
96 | 6.09k | } |
97 | 6.80k | return encoded_keys; |
98 | 6.80k | } |
99 | | |
100 | | std::string RowKeyEncoder::encode_short_keys( |
101 | 13.0k | const std::vector<IOlapColumnDataAccessor*>& key_columns, size_t pos) const { |
102 | 13.0k | assert(key_columns.size() == _num_short_key_columns); |
103 | 13.0k | assert(key_columns.size() <= _sort_key_coders.size()); |
104 | | |
105 | 13.0k | std::string encoded_keys; |
106 | 13.0k | size_t cid = 0; |
107 | 13.0k | for (const auto& column : key_columns) { |
108 | 10.1k | const auto* field = column->get_data_at(pos); |
109 | 10.1k | if (UNLIKELY(!field)) { |
110 | 1 | encoded_keys.push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
111 | 1 | ++cid; |
112 | 1 | continue; |
113 | 1 | } |
114 | 10.1k | encoded_keys.push_back(KeyConsts::KEY_NORMAL_MARKER); |
115 | 10.1k | _sort_key_coders[cid]->encode_ascending(field, _sort_key_index_size[cid], &encoded_keys); |
116 | 10.1k | ++cid; |
117 | 10.1k | } |
118 | 13.0k | return encoded_keys; |
119 | 13.0k | } |
120 | | |
121 | | void RowKeyEncoder::append_seq_suffix(std::string* encoded_keys, |
122 | 20 | const IOlapColumnDataAccessor* seq_column, size_t pos) const { |
123 | 20 | const auto* field = seq_column->get_data_at(pos); |
124 | | // To facilitate the use of the primary key index, encode the seq column |
125 | | // to the minimum value of the corresponding length when the seq column |
126 | | // is null |
127 | 20 | if (UNLIKELY(!field)) { |
128 | 3 | encoded_keys->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
129 | 3 | encoded_keys->append(_seq_col_length, KeyConsts::KEY_MINIMAL_MARKER); |
130 | 3 | return; |
131 | 3 | } |
132 | 17 | encoded_keys->push_back(KeyConsts::KEY_NORMAL_MARKER); |
133 | 17 | _seq_coder->full_encode_ascending(field, encoded_keys); |
134 | 17 | } |
135 | | |
136 | 10 | void RowKeyEncoder::append_rowid_suffix(std::string* encoded_keys, uint32_t rowid) const { |
137 | 10 | encoded_keys->push_back(KeyConsts::KEY_NORMAL_MARKER); |
138 | 10 | _rowid_coder->full_encode_ascending(&rowid, encoded_keys); |
139 | 10 | } |
140 | | |
141 | | } // namespace doris |