be/src/storage/row_cursor.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/row_cursor.h" |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <numeric> |
24 | | #include <ostream> |
25 | | |
26 | | #include "common/cast_set.h" |
27 | | #include "common/consts.h" |
28 | | #include "core/data_type/primitive_type.h" |
29 | | #include "core/field.h" |
30 | | #include "storage/key_coder.h" |
31 | | #include "storage/olap_common.h" |
32 | | #include "storage/olap_define.h" |
33 | | #include "storage/tablet/tablet_schema.h" |
34 | | #include "storage/types.h" |
35 | | #include "util/slice.h" |
36 | | |
37 | | namespace doris { |
38 | | using namespace ErrorCode; |
39 | | |
40 | 96 | RowCursor::RowCursor() = default; |
41 | 96 | RowCursor::~RowCursor() = default; |
42 | 0 | RowCursor::RowCursor(RowCursor&&) noexcept = default; |
43 | 0 | RowCursor& RowCursor::operator=(RowCursor&&) noexcept = default; |
44 | | |
45 | 96 | void RowCursor::_init_schema(TabletSchemaSPtr schema, uint32_t column_count) { |
46 | 96 | std::vector<uint32_t> columns(column_count); |
47 | 96 | std::iota(columns.begin(), columns.end(), 0); |
48 | 96 | _schema.reset(new Schema(schema->columns(), columns)); |
49 | 96 | } |
50 | | |
51 | 33 | Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple) { |
52 | 33 | size_t key_size = tuple.size(); |
53 | 33 | if (key_size > schema->num_columns()) { |
54 | 0 | return Status::Error<INVALID_ARGUMENT>( |
55 | 0 | "Input param are invalid. Column count is bigger than num_columns of schema. " |
56 | 0 | "column_count={}, schema.num_columns={}", |
57 | 0 | key_size, schema->num_columns()); |
58 | 0 | } |
59 | 33 | _init_schema(schema, cast_set<uint32_t>(key_size)); |
60 | 33 | return _from_tuple(tuple); |
61 | 33 | } |
62 | | |
63 | 63 | Status RowCursor::init_scan_key(TabletSchemaSPtr schema, std::vector<Field> fields) { |
64 | 63 | size_t key_size = fields.size(); |
65 | 63 | if (key_size > schema->num_columns()) { |
66 | 0 | return Status::Error<INVALID_ARGUMENT>( |
67 | 0 | "Input param are invalid. Column count is bigger than num_columns of schema. " |
68 | 0 | "column_count={}, schema.num_columns={}", |
69 | 0 | key_size, schema->num_columns()); |
70 | 0 | } |
71 | 63 | _init_schema(schema, cast_set<uint32_t>(key_size)); |
72 | 63 | _fields = std::move(fields); |
73 | 63 | return Status::OK(); |
74 | 63 | } |
75 | | |
76 | 33 | Status RowCursor::_from_tuple(const OlapTuple& tuple) { |
77 | 33 | if (tuple.size() != _schema->num_column_ids()) { |
78 | 0 | return Status::Error<INVALID_ARGUMENT>( |
79 | 0 | "column count does not match. tuple_size={}, field_count={}", tuple.size(), |
80 | 0 | _schema->num_column_ids()); |
81 | 0 | } |
82 | 33 | _fields.resize(tuple.size()); |
83 | 92 | for (size_t i = 0; i < tuple.size(); ++i) { |
84 | 59 | _fields[i] = tuple.get_field(i); |
85 | 59 | } |
86 | 33 | return Status::OK(); |
87 | 33 | } |
88 | | |
89 | 0 | RowCursor RowCursor::clone() const { |
90 | 0 | RowCursor result; |
91 | 0 | result._schema = std::make_unique<Schema>(*_schema); |
92 | 0 | result._fields = _fields; |
93 | 0 | return result; |
94 | 0 | } |
95 | | |
96 | 0 | std::string RowCursor::to_string() const { |
97 | 0 | std::string result; |
98 | 0 | for (size_t i = 0; i < _fields.size(); ++i) { |
99 | 0 | if (i > 0) { |
100 | 0 | result.append("|"); |
101 | 0 | } |
102 | 0 | if (_fields[i].is_null()) { |
103 | 0 | result.append("1&NULL"); |
104 | 0 | } else { |
105 | 0 | result.append("0&"); |
106 | 0 | result.append( |
107 | 0 | _fields[i].to_debug_string(_schema->column(cast_set<uint32_t>(i))->frac())); |
108 | 0 | } |
109 | 0 | } |
110 | 0 | return result; |
111 | 0 | } |
112 | | |
113 | | void RowCursor::_encode_column_value(const TabletColumn* column, const Field& value, |
114 | 322k | bool full_encode, std::string* buf) const { |
115 | 322k | FieldType ft = column->type(); |
116 | 322k | const KeyCoder* coder = get_key_coder(ft); |
117 | | |
118 | 322k | if (field_is_slice_type(ft)) { |
119 | | // String types: CHAR, VARCHAR, STRING — all stored as String in Field. |
120 | 11 | const String& str = value.get<TYPE_STRING>(); |
121 | | |
122 | 11 | if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) { |
123 | | // CHAR type: must pad with \0 to the declared column length |
124 | 5 | size_t col_len = column->length(); |
125 | 5 | String padded(col_len, '\0'); |
126 | 5 | memcpy(padded.data(), str.data(), std::min(str.size(), col_len)); |
127 | | |
128 | 5 | Slice slice(padded.data(), col_len); |
129 | 5 | if (full_encode) { |
130 | 2 | coder->full_encode_ascending(&slice, buf); |
131 | 3 | } else { |
132 | 3 | coder->encode_ascending(&slice, column->index_length(), buf); |
133 | 3 | } |
134 | 6 | } else { |
135 | | // VARCHAR / STRING: use actual length |
136 | 6 | Slice slice(str.data(), str.size()); |
137 | 6 | if (full_encode) { |
138 | 3 | coder->full_encode_ascending(&slice, buf); |
139 | 3 | } else { |
140 | 3 | coder->encode_ascending(&slice, column->index_length(), buf); |
141 | 3 | } |
142 | 6 | } |
143 | 11 | return; |
144 | 11 | } |
145 | | |
146 | | // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending |
147 | | // ignores `index_size` and delegates to full_encode_ascending, so the |
148 | | // `full_encode` flag here is a no-op and we always call the full helper. |
149 | 322k | switch (ft) { |
150 | 0 | #define CASE(FT, PT) \ |
151 | 322k | case FieldType::FT: \ |
152 | 322k | full_encode_field_as_key<PrimitiveType::PT>(value, coder, buf); \ |
153 | 322k | break; |
154 | 322k | DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) |
155 | 0 | #undef CASE |
156 | 0 | default: |
157 | 0 | LOG(FATAL) << "unsupported field type for encoding: " << int(ft); |
158 | 0 | break; |
159 | 322k | } |
160 | 322k | } |
161 | | |
162 | | // Encodes the first `num_keys` key columns as a memcomparable byte string. |
163 | | // Each slot is [marker][value bytes]. The marker sits at a position that |
164 | | // real entries fill with KEY_NORMAL_MARKER (0x02), so any byte > 0x02 there |
165 | | // sorts strictly after every real entry — independent of the value bytes. |
166 | | // |
167 | | // Examples — PK (a STRING, b STRING), stored entry (foo, bar) encodes as |
168 | | // `02 foo | 02 bar`. Calls with num_keys=2 and only partial key "foo": |
169 | | // |
170 | | // padding_minimal=true -> 02 foo | 00 (MINIMAL) |
171 | | // padding_minimal=false, is_mow=false -> 02 foo | FF (MAXIMAL) |
172 | | // padding_minimal=false, is_mow=true -> 02 foo | 03 (NORMAL_NEXT) |
173 | | template <bool is_mow> |
174 | | void RowCursor::encode_key_with_padding(std::string* buf, size_t num_keys, |
175 | 6 | bool padding_minimal) const { |
176 | 18 | for (uint32_t cid = 0; cid < num_keys; cid++) { |
177 | 18 | auto* column = _schema->column(cid); |
178 | 18 | if (column == nullptr) { |
179 | 6 | if (padding_minimal) { |
180 | 3 | buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); |
181 | 3 | } else { |
182 | 3 | if (is_mow) { |
183 | 1 | buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); |
184 | 2 | } else { |
185 | 2 | buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); |
186 | 2 | } |
187 | 3 | } |
188 | 6 | break; |
189 | 6 | } |
190 | | |
191 | 12 | if (cid >= _fields.size() || _fields[cid].is_null()) { |
192 | 1 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
193 | 1 | continue; |
194 | 1 | } |
195 | | |
196 | 11 | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); |
197 | 11 | _encode_column_value(column, _fields[cid], is_mow, buf); |
198 | 11 | } |
199 | 6 | } _ZNK5doris9RowCursor23encode_key_with_paddingILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb Line | Count | Source | 175 | 4 | bool padding_minimal) const { | 176 | 12 | for (uint32_t cid = 0; cid < num_keys; cid++) { | 177 | 12 | auto* column = _schema->column(cid); | 178 | 12 | if (column == nullptr) { | 179 | 4 | if (padding_minimal) { | 180 | 2 | buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); | 181 | 2 | } else { | 182 | 2 | if (is_mow) { | 183 | 0 | buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); | 184 | 2 | } else { | 185 | 2 | buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); | 186 | 2 | } | 187 | 2 | } | 188 | 4 | break; | 189 | 4 | } | 190 | | | 191 | 8 | if (cid >= _fields.size() || _fields[cid].is_null()) { | 192 | 1 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 193 | 1 | continue; | 194 | 1 | } | 195 | | | 196 | 7 | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 197 | 7 | _encode_column_value(column, _fields[cid], is_mow, buf); | 198 | 7 | } | 199 | 4 | } |
_ZNK5doris9RowCursor23encode_key_with_paddingILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb Line | Count | Source | 175 | 2 | bool padding_minimal) const { | 176 | 6 | for (uint32_t cid = 0; cid < num_keys; cid++) { | 177 | 6 | auto* column = _schema->column(cid); | 178 | 6 | if (column == nullptr) { | 179 | 2 | if (padding_minimal) { | 180 | 1 | buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); | 181 | 1 | } else { | 182 | 1 | if (is_mow) { | 183 | 1 | buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); | 184 | 1 | } else { | 185 | 0 | buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); | 186 | 0 | } | 187 | 1 | } | 188 | 2 | break; | 189 | 2 | } | 190 | | | 191 | 4 | if (cid >= _fields.size() || _fields[cid].is_null()) { | 192 | 0 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 193 | 0 | continue; | 194 | 0 | } | 195 | | | 196 | 4 | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 197 | 4 | _encode_column_value(column, _fields[cid], is_mow, buf); | 198 | 4 | } | 199 | 2 | } |
|
200 | | |
201 | | // Explicit template instantiations |
202 | | template void RowCursor::encode_key_with_padding<false>(std::string*, size_t, bool) const; |
203 | | template void RowCursor::encode_key_with_padding<true>(std::string*, size_t, bool) const; |
204 | | |
205 | | template <bool full_encode> |
206 | 129k | void RowCursor::encode_key(std::string* buf, size_t num_keys) const { |
207 | 387k | for (uint32_t cid = 0; cid < num_keys; cid++) { |
208 | 258k | if (cid >= _fields.size() || _fields[cid].is_null()) { |
209 | 6 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
210 | 6 | continue; |
211 | 6 | } |
212 | 258k | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); |
213 | 258k | _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); |
214 | 258k | } |
215 | 129k | } _ZNK5doris9RowCursor10encode_keyILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 206 | 34 | void RowCursor::encode_key(std::string* buf, size_t num_keys) const { | 207 | 83 | for (uint32_t cid = 0; cid < num_keys; cid++) { | 208 | 49 | if (cid >= _fields.size() || _fields[cid].is_null()) { | 209 | 6 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 210 | 6 | continue; | 211 | 6 | } | 212 | 43 | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 213 | 43 | _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); | 214 | 43 | } | 215 | 34 | } |
_ZNK5doris9RowCursor10encode_keyILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 206 | 129k | void RowCursor::encode_key(std::string* buf, size_t num_keys) const { | 207 | 387k | for (uint32_t cid = 0; cid < num_keys; cid++) { | 208 | 258k | if (cid >= _fields.size() || _fields[cid].is_null()) { | 209 | 0 | buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); | 210 | 0 | continue; | 211 | 0 | } | 212 | 258k | buf->push_back(KeyConsts::KEY_NORMAL_MARKER); | 213 | 258k | _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); | 214 | 258k | } | 215 | 129k | } |
|
216 | | |
217 | | template void RowCursor::encode_key<false>(std::string*, size_t) const; |
218 | | template void RowCursor::encode_key<true>(std::string*, size_t) const; |
219 | | |
220 | | } // namespace doris |