Coverage Report

Created: 2026-05-27 05:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/row_cursor.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/row_cursor.h"
19
20
#include <glog/logging.h>
21
22
#include <algorithm>
23
#include <numeric>
24
#include <ostream>
25
26
#include "common/cast_set.h"
27
#include "common/consts.h"
28
#include "core/data_type/primitive_type.h"
29
#include "core/field.h"
30
#include "storage/key_coder.h"
31
#include "storage/olap_common.h"
32
#include "storage/olap_define.h"
33
#include "storage/tablet/tablet_schema.h"
34
#include "storage/types.h"
35
#include "util/slice.h"
36
37
namespace doris {
38
using namespace ErrorCode;
39
40
4.19M
RowCursor::RowCursor() = default;
41
4.18M
RowCursor::~RowCursor() = default;
42
0
RowCursor::RowCursor(RowCursor&&) noexcept = default;
43
0
RowCursor& RowCursor::operator=(RowCursor&&) noexcept = default;
44
45
312
void RowCursor::_init_schema(TabletSchemaSPtr schema, uint32_t column_count) {
46
312
    std::vector<uint32_t> columns(column_count);
47
312
    std::iota(columns.begin(), columns.end(), 0);
48
312
    _schema.reset(new Schema(schema->columns(), columns));
49
312
}
50
51
3.40M
void RowCursor::_init_schema(const std::shared_ptr<Schema>& shared_schema, uint32_t column_count) {
52
3.40M
    _schema.reset(new Schema(*shared_schema));
53
3.40M
}
54
55
95
Status RowCursor::init(TabletSchemaSPtr schema, size_t num_columns) {
56
95
    if (num_columns > schema->num_columns()) {
57
0
        return Status::Error<INVALID_ARGUMENT>(
58
0
                "Input param are invalid. Column count is bigger than num_columns of schema. "
59
0
                "column_count={}, schema.num_columns={}",
60
0
                num_columns, schema->num_columns());
61
0
    }
62
95
    _init_schema(schema, cast_set<uint32_t>(num_columns));
63
    // Initialize all fields as null (TYPE_NULL).
64
95
    _fields.resize(num_columns);
65
95
    return Status::OK();
66
95
}
67
68
0
Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple) {
69
0
    size_t key_size = tuple.size();
70
0
    if (key_size > schema->num_columns()) {
71
0
        return Status::Error<INVALID_ARGUMENT>(
72
0
                "Input param are invalid. Column count is bigger than num_columns of schema. "
73
0
                "column_count={}, schema.num_columns={}",
74
0
                key_size, schema->num_columns());
75
0
    }
76
0
    _init_schema(schema, cast_set<uint32_t>(key_size));
77
0
    return from_tuple(tuple);
78
0
}
79
80
Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple,
81
3.41M
                       const std::shared_ptr<Schema>& shared_schema) {
82
3.41M
    size_t key_size = tuple.size();
83
3.41M
    if (key_size > schema->num_columns()) {
84
0
        return Status::Error<INVALID_ARGUMENT>(
85
0
                "Input param are invalid. Column count is bigger than num_columns of schema. "
86
0
                "column_count={}, schema.num_columns={}",
87
0
                key_size, schema->num_columns());
88
0
    }
89
3.41M
    _init_schema(shared_schema, cast_set<uint32_t>(key_size));
90
3.41M
    return from_tuple(tuple);
91
3.41M
}
92
93
217
Status RowCursor::init_scan_key(TabletSchemaSPtr schema, std::vector<Field> fields) {
94
217
    size_t key_size = fields.size();
95
217
    if (key_size > schema->num_columns()) {
96
0
        return Status::Error<INVALID_ARGUMENT>(
97
0
                "Input param are invalid. Column count is bigger than num_columns of schema. "
98
0
                "column_count={}, schema.num_columns={}",
99
0
                key_size, schema->num_columns());
100
0
    }
101
217
    _init_schema(schema, cast_set<uint32_t>(key_size));
102
217
    _fields = std::move(fields);
103
217
    return Status::OK();
104
217
}
105
106
3.41M
Status RowCursor::from_tuple(const OlapTuple& tuple) {
107
3.41M
    if (tuple.size() != _schema->num_column_ids()) {
108
0
        return Status::Error<INVALID_ARGUMENT>(
109
0
                "column count does not match. tuple_size={}, field_count={}", tuple.size(),
110
0
                _schema->num_column_ids());
111
0
    }
112
3.41M
    _fields.resize(tuple.size());
113
11.0M
    for (size_t i = 0; i < tuple.size(); ++i) {
114
7.68M
        _fields[i] = tuple.get_field(i);
115
7.68M
    }
116
3.41M
    return Status::OK();
117
3.41M
}
118
119
774k
RowCursor RowCursor::clone() const {
120
774k
    RowCursor result;
121
774k
    result._schema = std::make_unique<Schema>(*_schema);
122
774k
    result._fields = _fields;
123
774k
    return result;
124
774k
}
125
126
775k
void RowCursor::pad_char_fields() {
127
1.57M
    for (size_t i = 0; i < _fields.size(); ++i) {
128
795k
        const TabletColumn* col = _schema->column(cast_set<uint32_t>(i));
129
795k
        if (col->type() == FieldType::OLAP_FIELD_TYPE_CHAR && !_fields[i].is_null()) {
130
719
            String padded = _fields[i].get<TYPE_CHAR>();
131
719
            padded.resize(col->length(), '\0');
132
719
            _fields[i] = Field::create_field<TYPE_CHAR>(std::move(padded));
133
719
        }
134
795k
    }
135
775k
}
136
137
9.18M
std::string RowCursor::to_string() const {
138
9.18M
    std::string result;
139
33.3M
    for (size_t i = 0; i < _fields.size(); ++i) {
140
24.2M
        if (i > 0) {
141
15.0M
            result.append("|");
142
15.0M
        }
143
24.2M
        if (_fields[i].is_null()) {
144
448k
            result.append("1&NULL");
145
23.7M
        } else {
146
23.7M
            result.append("0&");
147
23.7M
            result.append(
148
23.7M
                    _fields[i].to_debug_string(_schema->column(cast_set<uint32_t>(i))->frac()));
149
23.7M
        }
150
24.2M
    }
151
9.18M
    return result;
152
9.18M
}
153
154
void RowCursor::_encode_column_value(const TabletColumn* column, const Field& value,
155
12.3M
                                     bool full_encode, std::string* buf) const {
156
12.3M
    FieldType ft = column->type();
157
12.3M
    const KeyCoder* coder = get_key_coder(ft);
158
159
12.3M
    if (field_is_slice_type(ft)) {
160
        // String types: CHAR, VARCHAR, STRING — all stored as String in Field.
161
11.2M
        const String& str = value.get<TYPE_STRING>();
162
163
11.2M
        if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) {
164
            // CHAR type: must pad with \0 to the declared column length
165
753
            size_t col_len = column->length();
166
753
            String padded(col_len, '\0');
167
753
            memcpy(padded.data(), str.data(), std::min(str.size(), col_len));
168
169
753
            Slice slice(padded.data(), col_len);
170
753
            if (full_encode) {
171
32
                coder->full_encode_ascending(&slice, buf);
172
721
            } else {
173
721
                coder->encode_ascending(&slice, column->index_length(), buf);
174
721
            }
175
11.2M
        } else {
176
            // VARCHAR / STRING: use actual length
177
11.2M
            Slice slice(str.data(), str.size());
178
11.2M
            if (full_encode) {
179
11.1M
                coder->full_encode_ascending(&slice, buf);
180
11.1M
            } else {
181
22.8k
                coder->encode_ascending(&slice, column->index_length(), buf);
182
22.8k
            }
183
11.2M
        }
184
11.2M
        return;
185
11.2M
    }
186
187
    // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending
188
    // ignores `index_size` and delegates to full_encode_ascending, so the
189
    // `full_encode` flag here is a no-op and we always call the full helper.
190
1.17M
    switch (ft) {
191
0
#define CASE(FT, PT)                                                    \
192
1.17M
    case FieldType::FT:                                                 \
193
1.17M
        full_encode_field_as_key<PrimitiveType::PT>(value, coder, buf); \
194
1.17M
        break;
195
1.17M
        DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE)
196
0
#undef CASE
197
0
    default:
198
0
        LOG(FATAL) << "unsupported field type for encoding: " << int(ft);
199
0
        break;
200
1.17M
    }
201
1.17M
}
202
203
// Encodes the first `num_keys` key columns as a memcomparable byte string.
204
// Each slot is [marker][value bytes]. The marker sits at a position that
205
// real entries fill with KEY_NORMAL_MARKER (0x02), so any byte > 0x02 there
206
// sorts strictly after every real entry — independent of the value bytes.
207
//
208
// Examples — PK (a STRING, b STRING), stored entry (foo, bar) encodes as
209
// `02 foo | 02 bar`. Calls with num_keys=2 and only partial key "foo":
210
//
211
//   padding_minimal=true                  -> 02 foo | 00          (MINIMAL)
212
//   padding_minimal=false, is_mow=false   -> 02 foo | FF          (MAXIMAL)
213
//   padding_minimal=false, is_mow=true    -> 02 foo | 03      (NORMAL_NEXT)
214
template <bool is_mow>
215
void RowCursor::encode_key_with_padding(std::string* buf, size_t num_keys,
216
3.76M
                                        bool padding_minimal) const {
217
15.9M
    for (uint32_t cid = 0; cid < num_keys; cid++) {
218
15.6M
        auto* column = _schema->column(cid);
219
15.6M
        if (column == nullptr) {
220
3.48M
            if (padding_minimal) {
221
1.58M
                buf->push_back(KeyConsts::KEY_MINIMAL_MARKER);
222
1.89M
            } else {
223
1.89M
                if (is_mow) {
224
1.57M
                    buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER);
225
1.57M
                } else {
226
319k
                    buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER);
227
319k
                }
228
1.89M
            }
229
3.48M
            break;
230
3.48M
        }
231
232
12.1M
        if (cid >= _fields.size() || _fields[cid].is_null()) {
233
114k
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
234
114k
            continue;
235
114k
        }
236
237
12.0M
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
238
12.0M
        _encode_column_value(column, _fields[cid], is_mow, buf);
239
12.0M
    }
240
3.76M
}
_ZNK5doris9RowCursor23encode_key_with_paddingILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb
Line
Count
Source
216
775k
                                        bool padding_minimal) const {
217
1.56M
    for (uint32_t cid = 0; cid < num_keys; cid++) {
218
1.35M
        auto* column = _schema->column(cid);
219
1.35M
        if (column == nullptr) {
220
563k
            if (padding_minimal) {
221
243k
                buf->push_back(KeyConsts::KEY_MINIMAL_MARKER);
222
320k
            } else {
223
320k
                if (is_mow) {
224
0
                    buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER);
225
320k
                } else {
226
320k
                    buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER);
227
320k
                }
228
320k
            }
229
563k
            break;
230
563k
        }
231
232
795k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
233
98.0k
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
234
98.0k
            continue;
235
98.0k
        }
236
237
695k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
238
695k
        _encode_column_value(column, _fields[cid], is_mow, buf);
239
695k
    }
240
775k
}
_ZNK5doris9RowCursor23encode_key_with_paddingILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb
Line
Count
Source
216
2.99M
                                        bool padding_minimal) const {
217
14.3M
    for (uint32_t cid = 0; cid < num_keys; cid++) {
218
14.2M
        auto* column = _schema->column(cid);
219
14.2M
        if (column == nullptr) {
220
2.91M
            if (padding_minimal) {
221
1.34M
                buf->push_back(KeyConsts::KEY_MINIMAL_MARKER);
222
1.57M
            } else {
223
1.57M
                if (is_mow) {
224
1.57M
                    buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER);
225
18.4E
                } else {
226
18.4E
                    buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER);
227
18.4E
                }
228
1.57M
            }
229
2.91M
            break;
230
2.91M
        }
231
232
11.3M
        if (cid >= _fields.size() || _fields[cid].is_null()) {
233
16.1k
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
234
16.1k
            continue;
235
16.1k
        }
236
237
11.3M
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
238
11.3M
        _encode_column_value(column, _fields[cid], is_mow, buf);
239
11.3M
    }
240
2.99M
}
241
242
// Explicit template instantiations
243
template void RowCursor::encode_key_with_padding<false>(std::string*, size_t, bool) const;
244
template void RowCursor::encode_key_with_padding<true>(std::string*, size_t, bool) const;
245
246
template <bool full_encode>
247
129k
void RowCursor::encode_key(std::string* buf, size_t num_keys) const {
248
387k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
249
258k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
250
6
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
251
6
            continue;
252
6
        }
253
258k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
254
258k
        _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf);
255
258k
    }
256
129k
}
_ZNK5doris9RowCursor10encode_keyILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm
Line
Count
Source
247
34
void RowCursor::encode_key(std::string* buf, size_t num_keys) const {
248
83
    for (uint32_t cid = 0; cid < num_keys; cid++) {
249
49
        if (cid >= _fields.size() || _fields[cid].is_null()) {
250
6
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
251
6
            continue;
252
6
        }
253
43
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
254
43
        _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf);
255
43
    }
256
34
}
_ZNK5doris9RowCursor10encode_keyILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm
Line
Count
Source
247
129k
void RowCursor::encode_key(std::string* buf, size_t num_keys) const {
248
387k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
249
258k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
250
0
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
251
0
            continue;
252
0
        }
253
258k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
254
258k
        _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf);
255
258k
    }
256
129k
}
257
258
template void RowCursor::encode_key<false>(std::string*, size_t) const;
259
template void RowCursor::encode_key<true>(std::string*, size_t) const;
260
261
} // namespace doris