Coverage Report

Created: 2026-06-22 19:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/key/row_key_encoder.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/key/row_key_encoder.h"
19
20
#include <cassert>
21
22
#include "common/cast_set.h"
23
#include "common/compiler_util.h" // IWYU pragma: keep
24
#include "common/consts.h"
25
#include "common/logging.h"
26
#include "storage/iterator/olap_data_convertor.h"
27
#include "storage/key_coder.h"
28
#include "storage/tablet/tablet_schema.h"
29
30
namespace doris {
31
32
2.49k
RowKeyEncoder::RowKeyEncoder(const TabletSchema& schema, bool mow) {
33
    // The schema-key view always encodes the schema key columns; the primary
34
    // key index is built over these even when the segment sorts by cluster
35
    // keys.
36
4.63k
    for (size_t cid = 0; cid < schema.num_key_columns(); ++cid) {
37
2.13k
        _schema_key_coders.push_back(get_key_coder(schema.column(cid).type()));
38
2.13k
    }
39
2.49k
    _num_short_key_columns = schema.num_short_key_columns();
40
    // encode the sequence id into the primary key index
41
2.49k
    if (schema.has_sequence_col()) {
42
107
        const auto& column = schema.column(schema.sequence_col_idx());
43
107
        _seq_coder = get_key_coder(column.type());
44
107
        _seq_col_length = column.length();
45
107
    }
46
    // The sort-key view is whatever the segment sorts by: mow tables with
47
    // cluster keys sort by the cluster key columns (made unique by a rowid
48
    // suffix), every other table sorts by the schema key columns.
49
2.49k
    if (mow && !schema.cluster_key_uids().empty()) {
50
5
        _rowid_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT);
51
8
        for (auto uid : schema.cluster_key_uids()) {
52
8
            _add_sort_key_column(schema.column_by_uid(uid));
53
8
        }
54
2.49k
    } else {
55
4.61k
        for (size_t cid = 0; cid < schema.num_key_columns(); ++cid) {
56
2.12k
            _add_sort_key_column(schema.column(cid));
57
2.12k
        }
58
2.49k
    }
59
2.49k
}
60
61
2.13k
void RowKeyEncoder::_add_sort_key_column(const TabletColumn& column) {
62
2.13k
    _sort_key_coders.push_back(get_key_coder(column.type()));
63
2.13k
    _sort_key_index_size.push_back(cast_set<uint16_t>(column.index_length()));
64
2.13k
}
65
66
std::string RowKeyEncoder::full_encode(const std::vector<IOlapColumnDataAccessor*>& key_columns,
67
6.78k
                                       size_t pos) const {
68
6.78k
    assert(_sort_key_index_size.size() == _sort_key_coders.size());
69
6.78k
    assert(key_columns.size() == _sort_key_coders.size());
70
6.78k
    return _full_encode(_sort_key_coders, key_columns, pos);
71
6.78k
}
72
73
std::string RowKeyEncoder::full_encode_primary_keys(
74
16
        const std::vector<IOlapColumnDataAccessor*>& key_columns, size_t pos) const {
75
16
    return _full_encode(_schema_key_coders, key_columns, pos);
76
16
}
77
78
std::string RowKeyEncoder::_full_encode(const std::vector<const KeyCoder*>& key_coders,
79
                                        const std::vector<IOlapColumnDataAccessor*>& key_columns,
80
6.80k
                                        size_t pos) {
81
6.80k
    assert(key_columns.size() == key_coders.size());
82
83
6.80k
    std::string encoded_keys;
84
6.80k
    size_t cid = 0;
85
6.80k
    for (const auto& column : key_columns) {
86
6.10k
        const auto* field = column->get_data_at(pos);
87
6.10k
        if (UNLIKELY(!field)) {
88
6
            encoded_keys.push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
89
6
            ++cid;
90
6
            continue;
91
6
        }
92
6.09k
        encoded_keys.push_back(KeyConsts::KEY_NORMAL_MARKER);
93
6.09k
        DCHECK(key_coders[cid] != nullptr);
94
6.09k
        key_coders[cid]->full_encode_ascending(field, &encoded_keys);
95
6.09k
        ++cid;
96
6.09k
    }
97
6.80k
    return encoded_keys;
98
6.80k
}
99
100
std::string RowKeyEncoder::encode_short_keys(
101
13.0k
        const std::vector<IOlapColumnDataAccessor*>& key_columns, size_t pos) const {
102
13.0k
    assert(key_columns.size() == _num_short_key_columns);
103
13.0k
    assert(key_columns.size() <= _sort_key_coders.size());
104
105
13.0k
    std::string encoded_keys;
106
13.0k
    size_t cid = 0;
107
13.0k
    for (const auto& column : key_columns) {
108
10.1k
        const auto* field = column->get_data_at(pos);
109
10.1k
        if (UNLIKELY(!field)) {
110
1
            encoded_keys.push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
111
1
            ++cid;
112
1
            continue;
113
1
        }
114
10.1k
        encoded_keys.push_back(KeyConsts::KEY_NORMAL_MARKER);
115
10.1k
        _sort_key_coders[cid]->encode_ascending(field, _sort_key_index_size[cid], &encoded_keys);
116
10.1k
        ++cid;
117
10.1k
    }
118
13.0k
    return encoded_keys;
119
13.0k
}
120
121
void RowKeyEncoder::append_seq_suffix(std::string* encoded_keys,
122
20
                                      const IOlapColumnDataAccessor* seq_column, size_t pos) const {
123
20
    const auto* field = seq_column->get_data_at(pos);
124
    // To facilitate the use of the primary key index, encode the seq column
125
    // to the minimum value of the corresponding length when the seq column
126
    // is null
127
20
    if (UNLIKELY(!field)) {
128
3
        encoded_keys->push_back(KeyConsts::KEY_NULL_FIRST_MARKER);
129
3
        encoded_keys->append(_seq_col_length, KeyConsts::KEY_MINIMAL_MARKER);
130
3
        return;
131
3
    }
132
17
    encoded_keys->push_back(KeyConsts::KEY_NORMAL_MARKER);
133
17
    _seq_coder->full_encode_ascending(field, encoded_keys);
134
17
}
135
136
10
void RowKeyEncoder::append_rowid_suffix(std::string* encoded_keys, uint32_t rowid) const {
137
10
    encoded_keys->push_back(KeyConsts::KEY_NORMAL_MARKER);
138
10
    _rowid_coder->full_encode_ascending(&rowid, encoded_keys);
139
10
}
140
141
} // namespace doris