/root/doris/be/src/util/key_util.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/olap_file.pb.h> |
21 | | #include <gen_cpp/segment_v2.pb.h> |
22 | | |
23 | | #include <cstdint> |
24 | | #include <string> |
25 | | |
26 | | #include "util/slice.h" |
27 | | |
28 | | namespace doris { |
29 | | |
30 | | // In our system, we have more complicated situation. |
31 | | // First, our keys can be nullptr. |
32 | | // Second, when key columns are not complete we want to distinguish GT and GE. For example, |
33 | | // there are two key columns a and b, we have only one condition a > 1. We can only encode |
34 | | // a prefix key 1, which is less than 1|2. This will make our read more data than |
35 | | // we actually need. So we want to add more marker. |
36 | | // a > 1: will be encoded into 1|\xFF |
37 | | // a >= 1: will be encoded into 1|\x00 |
38 | | // a = 1 and b > 1: will be encoded into 1|\x02|1 |
39 | | // a = 1 and b is null: will be encoded into 1|\x01 |
40 | | |
41 | | // Used to represent minimal value for that field |
42 | | constexpr uint8_t KEY_MINIMAL_MARKER = 0x00; |
43 | | // Used to represent a null field, which value is seemed as minimal than other values |
44 | | constexpr uint8_t KEY_NULL_FIRST_MARKER = 0x01; |
45 | | // Used to represent a normal field, which content is encoded after this marker |
46 | | constexpr uint8_t KEY_NORMAL_MARKER = 0x02; |
47 | | // Used to represent maximal value for that field |
48 | | constexpr uint8_t KEY_MAXIMAL_MARKER = 0xFF; |
49 | | // Used to represent a value greater than the normal marker by 1, using by MoW |
50 | | constexpr uint8_t KEY_NORMAL_NEXT_MARKER = 0x03; |
51 | | |
52 | | // Encode one row into binary according given num_keys. |
53 | | // A cell will be encoded in the format of a marker and encoded content. |
54 | | // When function encoding row, if any cell isn't found in row, this function will |
55 | | // fill a marker and return. If padding_minimal is true, KEY_MINIMAL_MARKER will |
56 | | // be added, if padding_minimal is false, KEY_MAXIMAL_MARKER will be added. |
57 | | // If all num_keys are found in row, no marker will be added. |
58 | | template <typename RowType, bool is_mow = false> |
59 | | void encode_key_with_padding(std::string* buf, const RowType& row, size_t num_keys, |
60 | 2 | bool padding_minimal) { |
61 | 6 | for (auto cid = 0; cid < num_keys; cid++) { |
62 | 6 | auto field = row.schema()->column(cid); |
63 | 6 | if (field == nullptr) { |
64 | 2 | if (padding_minimal) { |
65 | 1 | buf->push_back(KEY_MINIMAL_MARKER); |
66 | 1 | } else { |
67 | 1 | if (is_mow) { |
68 | 0 | buf->push_back(KEY_NORMAL_NEXT_MARKER); |
69 | 1 | } else { |
70 | 1 | buf->push_back(KEY_MAXIMAL_MARKER); |
71 | 1 | } |
72 | 1 | } |
73 | 2 | break; |
74 | 2 | } |
75 | | |
76 | 4 | auto cell = row.cell(cid); |
77 | 4 | if (cell.is_null()) { |
78 | 1 | buf->push_back(KEY_NULL_FIRST_MARKER); |
79 | 1 | continue; |
80 | 1 | } |
81 | 3 | buf->push_back(KEY_NORMAL_MARKER); |
82 | 3 | if (is_mow) { |
83 | 0 | field->full_encode_ascending(cell.cell_ptr(), buf); |
84 | 3 | } else { |
85 | 3 | field->encode_ascending(cell.cell_ptr(), buf); |
86 | 3 | } |
87 | 3 | } |
88 | 2 | } _ZN5doris23encode_key_with_paddingINS_9RowCursorELb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_mb Line | Count | Source | 60 | 2 | bool padding_minimal) { | 61 | 6 | for (auto cid = 0; cid < num_keys; cid++) { | 62 | 6 | auto field = row.schema()->column(cid); | 63 | 6 | if (field == nullptr) { | 64 | 2 | if (padding_minimal) { | 65 | 1 | buf->push_back(KEY_MINIMAL_MARKER); | 66 | 1 | } else { | 67 | 1 | if (is_mow) { | 68 | 0 | buf->push_back(KEY_NORMAL_NEXT_MARKER); | 69 | 1 | } else { | 70 | 1 | buf->push_back(KEY_MAXIMAL_MARKER); | 71 | 1 | } | 72 | 1 | } | 73 | 2 | break; | 74 | 2 | } | 75 | | | 76 | 4 | auto cell = row.cell(cid); | 77 | 4 | if (cell.is_null()) { | 78 | 1 | buf->push_back(KEY_NULL_FIRST_MARKER); | 79 | 1 | continue; | 80 | 1 | } | 81 | 3 | buf->push_back(KEY_NORMAL_MARKER); | 82 | 3 | if (is_mow) { | 83 | 0 | field->full_encode_ascending(cell.cell_ptr(), buf); | 84 | 3 | } else { | 85 | 3 | field->encode_ascending(cell.cell_ptr(), buf); | 86 | 3 | } | 87 | 3 | } | 88 | 2 | } |
Unexecuted instantiation: _ZN5doris23encode_key_with_paddingINS_9RowCursorELb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_mb |
89 | | |
90 | | // Encode one row into binary according given num_keys. |
91 | | // Client call this function must assure that row contains the first |
92 | | // num_keys columns. |
93 | | template <typename RowType, bool full_encode = false> |
94 | 128k | void encode_key(std::string* buf, const RowType& row, size_t num_keys) { |
95 | 386k | for (auto cid = 0; cid < num_keys; cid++) { |
96 | 257k | auto cell = row.cell(cid); |
97 | 257k | if (cell.is_null()) { |
98 | 1 | buf->push_back(KEY_NULL_FIRST_MARKER); |
99 | 1 | continue; |
100 | 1 | } |
101 | 257k | buf->push_back(KEY_NORMAL_MARKER); |
102 | 257k | if (full_encode) { |
103 | 257k | row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf); |
104 | 257k | } else { |
105 | 1 | row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf); |
106 | 1 | } |
107 | 257k | } |
108 | 128k | } _ZN5doris10encode_keyINS_9RowCursorELb0EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_m Line | Count | Source | 94 | 1 | void encode_key(std::string* buf, const RowType& row, size_t num_keys) { | 95 | 3 | for (auto cid = 0; cid < num_keys; cid++) { | 96 | 2 | auto cell = row.cell(cid); | 97 | 2 | if (cell.is_null()) { | 98 | 1 | buf->push_back(KEY_NULL_FIRST_MARKER); | 99 | 1 | continue; | 100 | 1 | } | 101 | 1 | buf->push_back(KEY_NORMAL_MARKER); | 102 | 1 | if (full_encode) { | 103 | 0 | row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf); | 104 | 1 | } else { | 105 | 1 | row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf); | 106 | 1 | } | 107 | 1 | } | 108 | 1 | } |
_ZN5doris10encode_keyINS_9RowCursorELb1EEEvPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKT_m Line | Count | Source | 94 | 128k | void encode_key(std::string* buf, const RowType& row, size_t num_keys) { | 95 | 386k | for (auto cid = 0; cid < num_keys; cid++) { | 96 | 257k | auto cell = row.cell(cid); | 97 | 257k | if (cell.is_null()) { | 98 | 0 | buf->push_back(KEY_NULL_FIRST_MARKER); | 99 | 0 | continue; | 100 | 0 | } | 101 | 257k | buf->push_back(KEY_NORMAL_MARKER); | 102 | 257k | if (full_encode) { | 103 | 257k | row.schema()->column(cid)->full_encode_ascending(cell.cell_ptr(), buf); | 104 | 257k | } else { | 105 | 0 | row.schema()->column(cid)->encode_ascending(cell.cell_ptr(), buf); | 106 | 0 | } | 107 | 257k | } | 108 | 128k | } |
|
109 | | |
110 | | // we can only know if a key is excluded from the segment |
111 | | // based on strictly order compare result with segments key bounds |
112 | | bool key_is_not_in_segment(Slice key, const KeyBoundsPB& segment_key_bounds, |
113 | | bool is_segments_key_bounds_truncated); |
114 | | |
115 | | } // namespace doris |