be/src/storage/olap_utils.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/Opcodes_types.h> |
21 | | #include <glog/logging.h> |
22 | | #include <math.h> |
23 | | |
24 | | #include <sstream> |
25 | | |
26 | | #include "common/logging.h" |
27 | | #include "core/data_type/primitive_type.h" |
28 | | #include "storage/olap_tuple.h" |
29 | | |
30 | | namespace doris { |
31 | | |
32 | | using CompareLargeFunc = bool (*)(const void*, const void*); |
33 | | |
34 | | /// OlapScanRange represents a single key-range interval used to scan an OLAP tablet. |
35 | | /// |
36 | | /// It is the final product of the scan-key generation pipeline: |
37 | | /// |
38 | | /// SQL WHERE conjuncts |
39 | | /// -> ColumnValueRange (per-column value constraints, see olap_scan_common.h) |
40 | | /// -> OlapScanKeys::extend_scan_key() (combine columns into multi-column prefix keys) |
41 | | /// -> OlapScanKeys::get_key_range() (emit one OlapScanRange per key pair) |
42 | | /// -> OlapScanner / tablet reader (use ranges for short-key index lookup) |
43 | | /// |
44 | | /// Example – table t(k1 INT, k2 INT, v INT) with key columns (k1, k2): |
45 | | /// |
46 | | /// WHERE k1 IN (1, 2) AND k2 = 10 |
47 | | /// => two OlapScanRange objects: |
48 | | /// range0: begin=(1, 10) end=(1, 10) include=[true, true] -- point lookup |
49 | | /// range1: begin=(2, 10) end=(2, 10) include=[true, true] -- point lookup |
50 | | /// |
51 | | /// WHERE k1 >= 5 AND k1 < 10 |
52 | | /// => one OlapScanRange: |
53 | | /// begin=(5) end=(10) begin_include=true end_include=false |
54 | | /// |
55 | | /// No key predicates at all (full table scan): |
56 | | /// => one default-constructed OlapScanRange with has_lower_bound=false, has_upper_bound=false. |
57 | | /// Consumers detect this and skip pushing key range to the reader (fall back to full scan). |
58 | | /// |
59 | | struct OlapScanRange { |
60 | | public: |
61 | | OlapScanRange() |
62 | 520k | : begin_include(true), |
63 | 520k | end_include(true), |
64 | 520k | has_lower_bound(false), |
65 | 520k | has_upper_bound(false) {} |
66 | | |
67 | | bool begin_include; |
68 | | bool end_include; |
69 | | |
70 | | /// Whether this range carries real begin/end bounds. |
71 | | /// false only for the default-constructed "full scan" placeholder |
72 | | /// (created when no key predicates exist at all). |
73 | | bool has_lower_bound; |
74 | | bool has_upper_bound; |
75 | | |
76 | | OlapTuple begin_scan_range; |
77 | | OlapTuple end_scan_range; |
78 | | |
79 | 0 | std::string debug_string() const { |
80 | 0 | std::ostringstream buf; |
81 | 0 | buf << "begin=(" << begin_scan_range.debug_string() << "), end=(" |
82 | 0 | << end_scan_range.debug_string() << ")"; |
83 | 0 | return buf.str(); |
84 | 0 | } |
85 | | }; |
86 | | |
87 | | enum SQLFilterOp { |
88 | | FILTER_LARGER = 0, |
89 | | FILTER_LARGER_OR_EQUAL = 1, |
90 | | FILTER_LESS = 2, |
91 | | FILTER_LESS_OR_EQUAL = 3, |
92 | | FILTER_IN = 4, |
93 | | FILTER_NOT_IN = 5, |
94 | | FILTER_EQ = 6, |
95 | | FILTER_NE = 7 |
96 | | }; |
97 | | |
98 | | template <PrimitiveType> |
99 | | constexpr bool always_false_v = false; |
100 | | |
101 | 761k | inline SQLFilterOp to_olap_filter_type(const std::string& function_name) { |
102 | 761k | if (function_name == "lt") { |
103 | 10.9k | return FILTER_LESS; |
104 | 750k | } else if (function_name == "gt") { |
105 | 60.9k | return FILTER_LARGER; |
106 | 689k | } else if (function_name == "le") { |
107 | 52.7k | return FILTER_LESS_OR_EQUAL; |
108 | 636k | } else if (function_name == "ge") { |
109 | 70.9k | return FILTER_LARGER_OR_EQUAL; |
110 | 565k | } else if (function_name == "eq") { |
111 | 514k | return FILTER_EQ; |
112 | 514k | } else if (function_name == "ne") { |
113 | 3.39k | return FILTER_NE; |
114 | 53.4k | } else if (function_name == "in") { |
115 | 53.4k | return FILTER_IN; |
116 | 18.4E | } else if (function_name == "not_in") { |
117 | 806 | return FILTER_NOT_IN; |
118 | 18.4E | } else { |
119 | 18.4E | DCHECK(false) << "Function Name: " << function_name; |
120 | 18.4E | return FILTER_IN; |
121 | 18.4E | } |
122 | 761k | } |
123 | | |
124 | | enum class MatchType { |
125 | | UNKNOWN = -1, |
126 | | MATCH_ANY = 0, |
127 | | MATCH_ALL = 1, |
128 | | MATCH_PHRASE = 2, |
129 | | MATCH_PHRASE_PREFIX = 8, |
130 | | MATCH_REGEXP = 9, |
131 | | MATCH_PHRASE_EDGE = 10, |
132 | | }; |
133 | | |
134 | 0 | inline MatchType to_match_type(TExprOpcode::type type) { |
135 | 0 | switch (type) { |
136 | 0 | case TExprOpcode::type::MATCH_ANY: |
137 | 0 | return MatchType::MATCH_ANY; |
138 | 0 | break; |
139 | 0 | case TExprOpcode::type::MATCH_ALL: |
140 | 0 | return MatchType::MATCH_ALL; |
141 | 0 | break; |
142 | 0 | case TExprOpcode::type::MATCH_PHRASE: |
143 | 0 | return MatchType::MATCH_PHRASE; |
144 | 0 | break; |
145 | 0 | case TExprOpcode::type::MATCH_PHRASE_PREFIX: |
146 | 0 | return MatchType::MATCH_PHRASE_PREFIX; |
147 | 0 | break; |
148 | 0 | case TExprOpcode::type::MATCH_REGEXP: |
149 | 0 | return MatchType::MATCH_REGEXP; |
150 | 0 | break; |
151 | 0 | case TExprOpcode::type::MATCH_PHRASE_EDGE: |
152 | 0 | return MatchType::MATCH_PHRASE_EDGE; |
153 | 0 | break; |
154 | 0 | default: |
155 | 0 | VLOG_CRITICAL << "TExprOpcode: " << type; |
156 | 0 | DCHECK(false); |
157 | 0 | } |
158 | 0 | return MatchType::MATCH_ANY; |
159 | 0 | } |
160 | | |
161 | 0 | inline MatchType to_match_type(const std::string& condition_op) { |
162 | 0 | if (condition_op.compare("match_any") == 0) { |
163 | 0 | return MatchType::MATCH_ANY; |
164 | 0 | } else if (condition_op.compare("match_all") == 0) { |
165 | 0 | return MatchType::MATCH_ALL; |
166 | 0 | } else if (condition_op.compare("match_phrase") == 0) { |
167 | 0 | return MatchType::MATCH_PHRASE; |
168 | 0 | } else if (condition_op.compare("match_phrase_prefix") == 0) { |
169 | 0 | return MatchType::MATCH_PHRASE_PREFIX; |
170 | 0 | } else if (condition_op.compare("match_regexp") == 0) { |
171 | 0 | return MatchType::MATCH_REGEXP; |
172 | 0 | } else if (condition_op.compare("match_phrase_edge") == 0) { |
173 | 0 | return MatchType::MATCH_PHRASE_EDGE; |
174 | 0 | } |
175 | 0 | return MatchType::UNKNOWN; |
176 | 0 | } |
177 | | |
178 | 0 | inline bool is_match_condition(const std::string& op) { |
179 | 0 | if (0 == strcasecmp(op.c_str(), "match_any") || 0 == strcasecmp(op.c_str(), "match_all") || |
180 | 0 | 0 == strcasecmp(op.c_str(), "match_phrase") || |
181 | 0 | 0 == strcasecmp(op.c_str(), "match_phrase_prefix") || |
182 | 0 | 0 == strcasecmp(op.c_str(), "match_regexp") || |
183 | 0 | 0 == strcasecmp(op.c_str(), "match_phrase_edge")) { |
184 | 0 | return true; |
185 | 0 | } |
186 | 0 | return false; |
187 | 0 | } |
188 | | |
189 | 0 | inline bool is_match_operator(const TExprOpcode::type& op_type) { |
190 | 0 | return TExprOpcode::MATCH_ANY == op_type || TExprOpcode::MATCH_ALL == op_type || |
191 | 0 | TExprOpcode::MATCH_PHRASE == op_type || TExprOpcode::MATCH_PHRASE_PREFIX == op_type || |
192 | 0 | TExprOpcode::MATCH_REGEXP == op_type || TExprOpcode::MATCH_PHRASE_EDGE == op_type; |
193 | 0 | } |
194 | | |
195 | | } // namespace doris |