Coverage Report

Created: 2025-06-13 23:07

/root/doris/be/src/util/key_util.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/olap_file.pb.h>
21
#include <gen_cpp/segment_v2.pb.h>
22
23
#include <cstdint>
24
#include <string>
25
26
#include "util/slice.h"
27
28
namespace doris {
29
30
// In our system, we have more complicated situation.
31
// First, our keys can be nullptr.
32
// Second, when key columns are not complete we want to distinguish GT and GE. For example,
33
// there are two key columns a and b, we have only one condition a > 1. We can only encode
34
// a prefix key 1, which is less than 1|2. This will make our read more data than
35
// we actually need. So we want to add more marker.
36
// a > 1: will be encoded into 1|\xFF
37
// a >= 1: will be encoded into 1|\x00
38
// a = 1 and b > 1: will be encoded into 1|\x02|1
39
// a = 1 and b is null: will be encoded into 1|\x01
40
41
// Used to represent minimal value for that field
42
constexpr uint8_t KEY_MINIMAL_MARKER = 0x00;
43
// Used to represent a null field, which value is seemed as minimal than other values
44
constexpr uint8_t KEY_NULL_FIRST_MARKER = 0x01;
45
// Used to represent a normal field, which content is encoded after this marker
46
constexpr uint8_t KEY_NORMAL_MARKER = 0x02;
47
// Used to represent maximal value for that field
48
constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF;
49
// Used to represent a value greater than the normal marker by 1, using by MoW
50
constexpr uint8_t KEY_NORMAL_NEXT_MARKER = 0x03;
51
52
// Encode one row into binary according given num_keys.
53
// A cell will be encoded in the format of a marker and encoded content.
54
// When function encoding row, if any cell isn't found in row, this function will
55
// fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will
56
// be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added.
57
// If all num_keys are found in row, no marker will be added.
58
template <typename RowType, bool is_mow = false>
59
void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys,
60
2
                             bool padding_minimal) {
61
6
    for (auto cid = 0; cid < num_keys; cid++) {
62
6
        auto field = row.schema()->column(cid);
63
6
        if (field == nullptr) {
64
2
            if (padding_minimal) {
65
1
                buf->push_back(KEY_MINIMAL_MARKER);
66
1
            } else {
67
1
                if (is_mow) {
68
0
                    buf->push_back(KEY_NORMAL_NEXT_MARKER);
69
1
                } else {
70
1
                    buf->push_back(KEY_MAXIMAL_MARKER);
71
1
                }
72
1
            }
73
2
            break;
74
2
        }
75
76
4
        auto cell = row.cell(cid);
77
4
        if (cell.is_null()) {
78
1
            buf->push_back(KEY_NULL_FIRST_MARKER);
79
1
            continue;
80
1
        }
81
3
        buf->push_back(KEY_NORMAL_MARKER);
82
3
        if (is_mow) {
83
0
            field->full_encode_ascending(cell.cell_ptr(), buf);
84
3
        } else {
85
3
            field->encode_ascending(cell.cell_ptr(), buf);
86
3
        }
87
3
    }
88
2
}
_ZN5doris23encode_key_with_paddingINS_9RowCursorELb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_mb
Line
Count
Source
60
2
                             bool padding_minimal) {
61
6
    for (auto cid = 0; cid < num_keys; cid++) {
62
6
        auto field = row.schema()->column(cid);
63
6
        if (field == nullptr) {
64
2
            if (padding_minimal) {
65
1
                buf->push_back(KEY_MINIMAL_MARKER);
66
1
            } else {
67
1
                if (is_mow) {
68
0
                    buf->push_back(KEY_NORMAL_NEXT_MARKER);
69
1
                } else {
70
1
                    buf->push_back(KEY_MAXIMAL_MARKER);
71
1
                }
72
1
            }
73
2
            break;
74
2
        }
75
76
4
        auto cell = row.cell(cid);
77
4
        if (cell.is_null()) {
78
1
            buf->push_back(KEY_NULL_FIRST_MARKER);
79
1
            continue;
80
1
        }
81
3
        buf->push_back(KEY_NORMAL_MARKER);
82
3
        if (is_mow) {
83
0
            field->full_encode_ascending(cell.cell_ptr(), buf);
84
3
        } else {
85
3
            field->encode_ascending(cell.cell_ptr(), buf);
86
3
        }
87
3
    }
88
2
}
Unexecuted instantiation: _ZN5doris23encode_key_with_paddingINS_9RowCursorELb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_mb
89
90
// Encode one row into binary according given num_keys.
91
// Client call this function must assure that row contains the first
92
// num_keys columns.
93
template <typename RowType, bool full_encode = false>
94
128k
void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
95
386k
    for (auto cid = 0; cid < num_keys; cid++) {
96
257k
        auto cell = row.cell(cid);
97
257k
        if (cell.is_null()) {
98
1
            buf->push_back(KEY_NULL_FIRST_MARKER);
99
1
            continue;
100
1
        }
101
257k
        buf->push_back(KEY_NORMAL_MARKER);
102
257k
        if (full_encode) {
103
257k
            row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf);
104
257k
        } else {
105
1
            row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf);
106
1
        }
107
257k
    }
108
128k
}
_ZN5doris10encode_keyINS_9RowCursorELb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_m
Line
Count
Source
94
1
void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
95
3
    for (auto cid = 0; cid < num_keys; cid++) {
96
2
        auto cell = row.cell(cid);
97
2
        if (cell.is_null()) {
98
1
            buf->push_back(KEY_NULL_FIRST_MARKER);
99
1
            continue;
100
1
        }
101
1
        buf->push_back(KEY_NORMAL_MARKER);
102
1
        if (full_encode) {
103
0
            row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf);
104
1
        } else {
105
1
            row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf);
106
1
        }
107
1
    }
108
1
}
_ZN5doris10encode_keyINS_9RowCursorELb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_m
Line
Count
Source
94
128k
void encode_key(std::string* buf, const RowType& row, size_t num_keys) {
95
386k
    for (auto cid = 0; cid < num_keys; cid++) {
96
257k
        auto cell = row.cell(cid);
97
257k
        if (cell.is_null()) {
98
0
            buf->push_back(KEY_NULL_FIRST_MARKER);
99
0
            continue;
100
0
        }
101
257k
        buf->push_back(KEY_NORMAL_MARKER);
102
257k
        if (full_encode) {
103
257k
            row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf);
104
257k
        } else {
105
0
            row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf);
106
0
        }
107
257k
    }
108
128k
}
109
110
// we can only know if a key is excluded from the segment
111
// based on strictly order compare result with segments key bounds
112
bool key_is_not_in_segment(Slice key, const KeyBoundsPB& segment_key_bounds,
113
                           bool is_segments_key_bounds_truncated);
114
115
} // namespace doris