Coverage Report

Created: 2026-06-04 03:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/row_cursor.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/row_cursor.h"
19
20
#include <glog/logging.h>
21
22
#include <algorithm>
23
#include <numeric>
24
#include <ostream>
25
26
#include "common/cast_set.h"
27
#include "common/consts.h"
28
#include "core/data_type/primitive_type.h"
29
#include "core/field.h"
30
#include "storage/key_coder.h"
31
#include "storage/olap_common.h"
32
#include "storage/olap_define.h"
33
#include "storage/tablet/tablet_schema.h"
34
#include "storage/types.h"
35
#include "util/slice.h"
36
37
namespace doris {
38
using namespace ErrorCode;
39
40
8.95k
RowCursor::RowCursor() = default;
41
8.95k
RowCursor::~RowCursor() = default;
42
0
RowCursor::RowCursor(RowCursor&&) noexcept = default;
43
0
RowCursor& RowCursor::operator=(RowCursor&&) noexcept = default;
44
45
8.94k
void RowCursor::_init_schema(TabletSchemaSPtr schema, uint32_t column_count) {
46
8.94k
    std::vector<uint32_t> columns(column_count);
47
8.94k
    std::iota(columns.begin(), columns.end(), 0);
48
8.94k
    _schema.reset(new Schema(schema->columns(), columns));
49
8.94k
}
50
51
8.89k
Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple) {
52
8.89k
    size_t key_size = tuple.size();
53
8.89k
    if (key_size > schema->num_columns()) {
54
0
        return Status::Error<INVALID_ARGUMENT>(
55
0
                "Input param are invalid. Column count is bigger than num_columns of schema. "
56
0
                "column_count={}, schema.num_columns={}",
57
0
                key_size, schema->num_columns());
58
0
    }
59
8.89k
    _init_schema(schema, cast_set<uint32_t>(key_size));
60
8.89k
    return _from_tuple(tuple);
61
8.89k
}
62
63
63
Status RowCursor::init_scan_key(TabletSchemaSPtr schema, std::vector<Field> fields) {
64
63
    size_t key_size = fields.size();
65
63
    if (key_size > schema->num_columns()) {
66
0
        return Status::Error<INVALID_ARGUMENT>(
67
0
                "Input param are invalid. Column count is bigger than num_columns of schema. "
68
0
                "column_count={}, schema.num_columns={}",
69
0
                key_size, schema->num_columns());
70
0
    }
71
63
    _init_schema(schema, cast_set<uint32_t>(key_size));
72
63
    _fields = std::move(fields);
73
63
    return Status::OK();
74
63
}
75
76
8.89k
Status RowCursor::_from_tuple(const OlapTuple& tuple) {
77
8.89k
    if (tuple.size() != _schema->num_column_ids()) {
78
0
        return Status::Error<INVALID_ARGUMENT>(
79
0
                "column count does not match. tuple_size={}, field_count={}", tuple.size(),
80
0
                _schema->num_column_ids());
81
0
    }
82
8.89k
    _fields.resize(tuple.size());
83
26.0k
    for (size_t i = 0; i < tuple.size(); ++i) {
84
17.1k
        _fields[i] = tuple.get_field(i);
85
17.1k
    }
86
8.89k
    return Status::OK();
87
8.89k
}
88
89
0
RowCursor RowCursor::clone() const {
90
0
    RowCursor result;
91
0
    result._schema = std::make_unique<Schema>(*_schema);
92
0
    result._fields = _fields;
93
0
    return result;
94
0
}
95
96
22.8k
std::string RowCursor::to_string() const {
97
22.8k
    std::string result;
98
83.2k
    for (size_t i = 0; i < _fields.size(); ++i) {
99
60.4k
        if (i > 0) {
100
37.5k
            result.append("|");
101
37.5k
        }
102
60.4k
        if (_fields[i].is_null()) {
103
362
            result.append("1&NULL");
104
60.0k
        } else {
105
60.0k
            result.append("0&");
106
60.0k
            result.append(
107
60.0k
                    _fields[i].to_debug_string(_schema->column(cast_set<uint32_t>(i))->frac()));
108
60.0k
        }
109
60.4k
    }
110
22.8k
    return result;
111
22.8k
}
112
113
void RowCursor::_encode_column_value(const TabletColumn* column, const Field& value,
114
359k
                                     bool full_encode, std::string* buf) const {
115
359k
    FieldType ft = column->type();
116
359k
    const KeyCoder* coder = get_key_coder(ft);
117
118
359k
    if (field_is_slice_type(ft)) {
119
        // String types: CHAR, VARCHAR, STRING — all stored as String in Field.
120
30.1k
        const String& str = value.get<TYPE_STRING>();
121
122
30.1k
        if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) {
123
            // CHAR type: must pad with \0 to the declared column length
124
5
            size_t col_len = column->length();
125
5
            String padded(col_len, '\0');
126
5
            memcpy(padded.data(), str.data(), std::min(str.size(), col_len));
127
128
5
            Slice slice(padded.data(), col_len);
129
5
            if (full_encode) {
130
2
                coder->full_encode_ascending(&slice, buf);
131
3
            } else {
132
3
                coder->encode_ascending(&slice, column->index_length(), buf);
133
3
            }
134
30.1k
        } else {
135
            // VARCHAR / STRING: use actual length
136
30.1k
            Slice slice(str.data(), str.size());
137
30.1k
            if (full_encode) {
138
30.0k
                coder->full_encode_ascending(&slice, buf);
139
30.0k
            } else {
140
83
                coder->encode_ascending(&slice, column->index_length(), buf);
141
83
            }
142
30.1k
        }
143
30.1k
        return;
144
30.1k
    }
145
146
    // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending
147
    // ignores `index_size` and delegates to full_encode_ascending, so the
148
    // `full_encode` flag here is a no-op and we always call the full helper.
149
329k
    switch (ft) {
150
0
#define CASE(FT, PT)                                                    \
151
329k
    case FieldType::FT:                                                 \
152
329k
        full_encode_field_as_key<PrimitiveType::PT>(value, coder, buf); \
153
329k
        break;
154
329k
        DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE)
155
0
#undef CASE
156
0
    default:
157
0
        LOG(FATAL) << "unsupported field type for encoding: " << int(ft);
158
0
        break;
159
329k
    }
160
329k
}
161
162
// Encodes the first `num_keys` key columns as a memcomparable byte string.
163
// Each slot is [marker][value bytes]. The marker sits at a position that
164
// real entries fill with KEY_NORMAL_MARKER (0x02), so any byte > 0x02 there
165
// sorts strictly after every real entry — independent of the value bytes.
166
//
167
// Examples — PK (a STRING, b STRING), stored entry (foo, bar) encodes as
168
// `02 foo | 02 bar`. Calls with num_keys=2 and only partial key "foo":
169
//
170
//   padding_minimal=true                  -> 02 foo | 00          (MINIMAL)
171
//   padding_minimal=false, is_mow=false   -> 02 foo | FF          (MAXIMAL)
172
//   padding_minimal=false, is_mow=true    -> 02 foo | 03      (NORMAL_NEXT)
173
template <bool is_mow>
174
void RowCursor::encode_key_with_padding(std::string* buf, size_t num_keys,
175
14.5k
                                        bool padding_minimal) const {
176
51.4k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
177
51.0k
        auto* column = _schema->column(cid);
178
51.0k
        if (column == nullptr) {
179
14.1k
            if (padding_minimal) {
180
6.71k
                buf->push_back(KeyConsts::KEY_MINIMAL_MARKER);
181
7.38k
            } else {
182
7.38k
                if (is_mow) {
183
6.13k
                    buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER);
184
6.13k
                } else {
185
1.24k
                    buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER);
186
1.24k
                }
187
7.38k
            }
188
14.1k
            break;
189
14.1k
        }
190
191
36.9k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
192
173
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
193
173
            continue;
194
173
        }
195
196
36.7k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
197
36.7k
        _encode_column_value(column, _fields[cid], is_mow, buf);
198
36.7k
    }
199
14.5k
}
_ZNK5doris9RowCursor23encode_key_with_paddingILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb
Line
Count
Source
175
2.82k
                                        bool padding_minimal) const {
176
5.65k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
177
5.21k
        auto* column = _schema->column(cid);
178
5.21k
        if (column == nullptr) {
179
2.38k
            if (padding_minimal) {
180
1.13k
                buf->push_back(KeyConsts::KEY_MINIMAL_MARKER);
181
1.25k
            } else {
182
1.25k
                if (is_mow) {
183
0
                    buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER);
184
1.25k
                } else {
185
1.25k
                    buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER);
186
1.25k
                }
187
1.25k
            }
188
2.38k
            break;
189
2.38k
        }
190
191
2.83k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
192
113
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
193
113
            continue;
194
113
        }
195
196
2.71k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
197
2.71k
        _encode_column_value(column, _fields[cid], is_mow, buf);
198
2.71k
    }
199
2.82k
}
_ZNK5doris9RowCursor23encode_key_with_paddingILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmb
Line
Count
Source
175
11.7k
                                        bool padding_minimal) const {
176
45.7k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
177
45.7k
        auto* column = _schema->column(cid);
178
45.7k
        if (column == nullptr) {
179
11.7k
            if (padding_minimal) {
180
5.58k
                buf->push_back(KeyConsts::KEY_MINIMAL_MARKER);
181
6.13k
            } else {
182
6.13k
                if (is_mow) {
183
6.13k
                    buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER);
184
18.4E
                } else {
185
18.4E
                    buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER);
186
18.4E
                }
187
6.13k
            }
188
11.7k
            break;
189
11.7k
        }
190
191
34.0k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
192
60
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
193
60
            continue;
194
60
        }
195
196
34.0k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
197
34.0k
        _encode_column_value(column, _fields[cid], is_mow, buf);
198
34.0k
    }
199
11.7k
}
200
201
// Explicit template instantiations
202
template void RowCursor::encode_key_with_padding<false>(std::string*, size_t, bool) const;
203
template void RowCursor::encode_key_with_padding<true>(std::string*, size_t, bool) const;
204
205
template <bool full_encode>
206
129k
void RowCursor::encode_key(std::string* buf, size_t num_keys) const {
207
387k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
208
258k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
209
6
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
210
6
            continue;
211
6
        }
212
258k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
213
258k
        _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf);
214
258k
    }
215
129k
}
_ZNK5doris9RowCursor10encode_keyILb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm
Line
Count
Source
206
34
void RowCursor::encode_key(std::string* buf, size_t num_keys) const {
207
83
    for (uint32_t cid = 0; cid < num_keys; cid++) {
208
49
        if (cid >= _fields.size() || _fields[cid].is_null()) {
209
6
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
210
6
            continue;
211
6
        }
212
43
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
213
43
        _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf);
214
43
    }
215
34
}
_ZNK5doris9RowCursor10encode_keyILb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm
Line
Count
Source
206
129k
void RowCursor::encode_key(std::string* buf, size_t num_keys) const {
207
387k
    for (uint32_t cid = 0; cid < num_keys; cid++) {
208
258k
        if (cid >= _fields.size() || _fields[cid].is_null()) {
209
0
            buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
210
0
            continue;
211
0
        }
212
258k
        buf->push_back(KeyConsts::KEY_NORMAL_MARKER);
213
258k
        _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf);
214
258k
    }
215
129k
}
216
217
template void RowCursor::encode_key<false>(std::string*, size_t) const;
218
template void RowCursor::encode_key<true>(std::string*, size_t) const;
219
220
} // namespace doris