be/src/storage/row_cursor.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/row_cursor.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <numeric> |
24 | | #include <ostream> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "common/consts.h" |
28 | | #include "core/data_type/primitive_type.h" |
29 | | #include "core/field.h" |
30 | | #include "storage/key_coder.h" |
31 | | #include "storage/olap_common.h" |
32 | | #include "storage/olap_define.h" |
33 | | #include "storage/tablet/tablet_schema.h" |
34 | | #include "storage/types.h" |
35 | | #include "util/slice.h" |
36 | | |
37 | | namespace doris { |
38 | | using namespace ErrorCode; |
39 | | |
40 | 2.88M | RowCursor::RowCursor() = default; |
41 | 2.90M | RowCursor::~RowCursor() = default; |
42 | 0 | RowCursor::RowCursor(RowCursor&&) noexcept = default; |
43 | 0 | RowCursor& RowCursor::operator=(RowCursor&&) noexcept = default; |
44 | | |
45 | 2.89M | void RowCursor::_init_schema(TabletSchemaSPtr schema, uint32_t column_count) { |
46 | 2.89M | std::vector<uint32_t> columns(column_count); |
47 | 2.89M | std::iota(columns.begin(), columns.end(), 0); |
48 | 2.89M | _schema.reset(new Schema(schema->columns(), columns)); |
49 | 2.89M | } |
50 | | |
51 | 2.90M | Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple) { |
52 | 2.90M | size_t key_size = tuple.size(); |
53 | 2.90M | if (key_size > schema->num_columns()) { |
54 | 0 | return Status::Error<INVALID_ARGUMENT>( |
55 | 0 | "Input param are invalid. Column count is bigger than num_columns of schema. " |
56 | 0 | "column_count={}, schema.num_columns={}", |
57 | 0 | key_size, schema->num_columns()); |
58 | 0 | } |
59 | 2.90M | _init_schema(schema, cast_set<uint32_t>(key_size)); |
60 | 2.90M | return _from_tuple(tuple); |
61 | 2.90M | } |
62 | | |
63 | 288 | Status RowCursor::init_scan_key(TabletSchemaSPtr schema, std::vector<Field> fields) { |
64 | 288 | size_t key_size = fields.size(); |
65 | 288 | if (key_size > schema->num_columns()) { |
66 | 0 | return Status::Error<INVALID_ARGUMENT>( |
67 | 0 | "Input param are invalid. Column count is bigger than num_columns of schema. " |
68 | 0 | "column_count={}, schema.num_columns={}", |
69 | 0 | key_size, schema->num_columns()); |
70 | 0 | } |
71 | 288 | _init_schema(schema, cast_set<uint32_t>(key_size)); |
72 | 288 | _fields = std::move(fields); |
73 | 288 | return Status::OK(); |
74 | 288 | } |
75 | | |
76 | 2.89M | Status RowCursor::_from_tuple(const OlapTuple& tuple) { |
77 | 2.89M | if (tuple.size() != _schema->num_column_ids()) { |
78 | 0 | return Status::Error<INVALID_ARGUMENT>( |
79 | 0 | "column count does not match. tuple_size={}, field_count={}", tuple.size(), |
80 | 0 | _schema->num_column_ids()); |
81 | 0 | } |
82 | 2.89M | _fields.resize(tuple.size()); |
83 | 9.11M | for (size_t i = 0; i < tuple.size(); ++i) { |
84 | 6.21M | _fields[i] = tuple.get_field(i); |
85 | 6.21M | } |
86 | 2.89M | return Status::OK(); |
87 | 2.89M | } |
88 | | |
89 | 0 | RowCursor RowCursor::clone() const { |
90 | 0 | RowCursor result; |
91 | 0 | result._schema = std::make_unique<Schema>(*_schema); |
92 | 0 | result._fields = _fields; |
93 | 0 | return result; |
94 | 0 | } |
95 | | |
96 | 8.02M | std::string RowCursor::to_string() const { |
97 | 8.02M | std::string result; |
98 | 29.3M | for (size_t i = 0; i < _fields.size(); ++i) { |
99 | 21.3M | if (i > 0) { |
100 | 13.3M | result.append("|"); |
101 | 13.3M | } |
102 | 21.3M | if (_fields[i].is_null()) { |
103 | 330k | result.append("1&NULL"); |
104 | 21.0M | } else { |
105 | 21.0M | result.append("0&"); |
106 | 21.0M | result.append( |
107 | 21.0M | _fields[i].to_debug_string(_schema->column(cast_set<uint32_t>(i))->frac())); |
108 | 21.0M | } |
109 | 21.3M | } |
110 | 8.02M | return result; |
111 | 8.02M | } |
112 | | |
113 | | void RowCursor::_encode_column_value(const TabletColumn* column, const Field& value, |
114 | 11.0M | bool full_encode, std::string* buf) const { |
115 | 11.0M | FieldType ft = column->type(); |
116 | 11.0M | const KeyCoder* coder = get_key_coder(ft); |
117 | | |
118 | 11.0M | if (field_is_slice_type(ft)) { |
119 | | // String types: CHAR, VARCHAR, STRING — all stored as String in Field. |
120 | 9.85M | const String& str = value.get<TYPE_STRING>(); |
121 | | |
122 | 9.85M | if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) { |
123 | | // CHAR type: must pad with \0 to the declared column length |
124 | 610 | size_t col_len = column->length(); |
125 | 610 | String padded(col_len, '\0'); |
126 | 610 | memcpy(padded.data(), str.data(), std::min(str.size(), col_len)); |
127 | | |
128 | 610 | Slice slice(padded.data(), col_len); |
129 | 610 | if (full_encode) { |
130 | 32 | coder->full_encode_ascending(&slice, buf); |
131 | 578 | } else { |
132 | 578 | coder->encode_ascending(&slice, column->index_length(), buf); |
133 | 578 | } |
134 | 9.85M | } else { |
135 | | // VARCHAR / STRING: use actual length |
136 | 9.85M | Slice slice(str.data(), str.size()); |
137 | 9.85M | if (full_encode) { |
138 | 9.80M | coder->full_encode_ascending(&slice, buf); |
139 | 9.80M | } else { |
140 | 48.0k | coder->encode_ascending(&slice, column->index_length(), buf); |
141 | 48.0k | } |
142 | 9.85M | } |
143 | 9.85M | return; |
144 | 9.85M | } |
145 | | |
146 | | // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending |
147 | | // ignores `index_size` and delegates to full_encode_ascending, so the |
148 | | // `full_encode` flag here is a no-op and we always call the full helper. |
149 | 1.15M | switch (ft) { |
150 | 0 | #define CASE(FT, PT) \ |
151 | 1.15M | case FieldType::FT: \ |
152 | 1.15M | full_encode_field_as_key<PrimitiveType::PT>(value, coder, buf); \ |
153 | 1.15M | break; |
154 | 1.15M | DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) |
155 | 0 | #undef CASE |
156 | 0 | default: |
157 | 0 | LOG(FATAL) << "unsupported field type for encoding: " << int(ft); |
158 | 0 | break; |
159 | 1.15M | } |
160 | 1.15M | } |
161 | | |
162 | | // Encodes the first `num_keys` key columns as a memcomparable byte string. |
163 | | // Each slot is [marker][value bytes]. The marker sits at a position that |
164 | | // real entries fill with KEY_NORMAL_MARKER (0x02), so any byte > 0x02 there |
165 | | // sorts strictly after every real entry — independent of the value bytes. |
166 | | // |
167 | | // Examples — PK (a STRING, b STRING), stored entry (foo, bar) encodes as |
168 | | // `02 foo | 02 bar`. Calls with num_keys=2 and only partial key "foo": |
169 | | // |
170 | | // padding_minimal=true -> 02 foo | 00 (MINIMAL) |
171 | | // padding_minimal=false, is_mow=false -> 02 foo | FF (MAXIMAL) |
172 | | // padding_minimal=false, is_mow=true -> 02 foo | 03 (NORMAL_NEXT) |
173 | | template <bool is_mow> |
174 | | void RowCursor::encode_key_with_padding(std::string* buf, size_t num_keys, |
175 | 3.39M | bool padding_minimal) const { |
176 | 14.1M | for (uint32_t cid = 0; cid < num_keys; cid++) { |
177 | 13.9M | auto* column = _schema->column(cid); |
178 | 13.9M | if (column == nullptr) { |
179 | 3.15M | if (padding_minimal) { |
180 | 1.45M | buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); |
181 | 1.69M | } else { |
182 | 1.69M | if (is_mow) { |
183 | 1.37M | buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); |
184 | 1.37M | } else { |
185 | 321k | buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); |
186 | 321k | } |
187 | 1.69M | } |
188 | 3.15M | break; |
189 | 3.15M | } |
190 | | |
191 | 10.7M | if (cid >= _fields.size() || _fields[cid].is_null()) { |
192 | 109k | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
193 | 109k | continue; |
194 | 109k | } |
195 | | |
196 | 10.6M | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); |
197 | 10.6M | _encode_column_value(column, _fields[cid], is_mow, buf); |
198 | 10.6M | } |
199 | 3.39M | } _ZNK5doris9RowCursor23encode_key_with_paddingILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb Line | Count | Source | 175 | 761k | bool padding_minimal) const { | 176 | 1.53M | for (uint32_t cid = 0; cid < num_keys; cid++) { | 177 | 1.34M | auto* column = _schema->column(cid); | 178 | 1.34M | if (column == nullptr) { | 179 | 572k | if (padding_minimal) { | 180 | 249k | buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); | 181 | 322k | } else { | 182 | 322k | if (is_mow) { | 183 | 0 | buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); | 184 | 322k | } else { | 185 | 322k | buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); | 186 | 322k | } | 187 | 322k | } | 188 | 572k | break; | 189 | 572k | } | 190 | | | 191 | 774k | if (cid >= _fields.size() || _fields[cid].is_null()) { | 192 | 93.1k | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 193 | 93.1k | continue; | 194 | 93.1k | } | 195 | | | 196 | 677k | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 197 | 677k | _encode_column_value(column, _fields[cid], is_mow, buf); | 198 | 677k | } | 199 | 761k | } |
_ZNK5doris9RowCursor23encode_key_with_paddingILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb Line | Count | Source | 175 | 2.63M | bool padding_minimal) const { | 176 | 12.6M | for (uint32_t cid = 0; cid < num_keys; cid++) { | 177 | 12.5M | auto* column = _schema->column(cid); | 178 | 12.5M | if (column == nullptr) { | 179 | 2.58M | if (padding_minimal) { | 180 | 1.20M | buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); | 181 | 1.37M | } else { | 182 | 1.37M | if (is_mow) { | 183 | 1.37M | buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); | 184 | 18.4E | } else { | 185 | 18.4E | buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); | 186 | 18.4E | } | 187 | 1.37M | } | 188 | 2.58M | break; | 189 | 2.58M | } | 190 | | | 191 | 10.0M | if (cid >= _fields.size() || _fields[cid].is_null()) { | 192 | 16.3k | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 193 | 16.3k | continue; | 194 | 16.3k | } | 195 | | | 196 | 9.97M | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 197 | 9.97M | _encode_column_value(column, _fields[cid], is_mow, buf); | 198 | 9.97M | } | 199 | 2.63M | } |
|
200 | | |
201 | | // Explicit template instantiations |
202 | | template void RowCursor::encode_key_with_padding<false>(std::string*, size_t, bool) const; |
203 | | template void RowCursor::encode_key_with_padding<true>(std::string*, size_t, bool) const; |
204 | | |
205 | | template <bool full_encode> |
206 | 129k | void RowCursor::encode_key(std::string* buf, size_t num_keys) const { |
207 | 387k | for (uint32_t cid = 0; cid < num_keys; cid++) { |
208 | 258k | if (cid >= _fields.size() || _fields[cid].is_null()) { |
209 | 6 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
210 | 6 | continue; |
211 | 6 | } |
212 | 258k | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); |
213 | 258k | _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); |
214 | 258k | } |
215 | 129k | } _ZNK5doris9RowCursor10encode_keyILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 206 | 34 | void RowCursor::encode_key(std::string* buf, size_t num_keys) const { | 207 | 83 | for (uint32_t cid = 0; cid < num_keys; cid++) { | 208 | 49 | if (cid >= _fields.size() || _fields[cid].is_null()) { | 209 | 6 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 210 | 6 | continue; | 211 | 6 | } | 212 | 43 | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 213 | 43 | _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); | 214 | 43 | } | 215 | 34 | } |
_ZNK5doris9RowCursor10encode_keyILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 206 | 129k | void RowCursor::encode_key(std::string* buf, size_t num_keys) const { | 207 | 387k | for (uint32_t cid = 0; cid < num_keys; cid++) { | 208 | 258k | if (cid >= _fields.size() || _fields[cid].is_null()) { | 209 | 0 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 210 | 0 | continue; | 211 | 0 | } | 212 | 258k | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 213 | 258k | _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); | 214 | 258k | } | 215 | 129k | } |
|
216 | | |
217 | | template void RowCursor::encode_key<false>(std::string*, size_t) const; |
218 | | template void RowCursor::encode_key<true>(std::string*, size_t) const; |
219 | | |
220 | | } // namespace doris |