be/src/exec/scan/predicate_lm_utils.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <algorithm> |
21 | | #include <string> |
22 | | #include <string_view> |
23 | | #include <vector> |
24 | | |
25 | | #include "common/status.h" |
26 | | #include "storage/tablet/tablet_schema.h" |
27 | | #include "util/string_util.h" |
28 | | |
29 | | namespace doris { |
30 | | |
31 | | namespace predicate_lm_utils_detail { |
32 | | |
33 | 67 | inline std::string to_lower_trimmed(std::string_view sv) { |
34 | 67 | sv = doris::trim(sv); |
35 | 67 | return doris::to_lower(std::string(sv)); |
36 | 67 | } |
37 | | |
38 | 19 | inline std::string normalize_table_name_for_match(std::string_view table_name_sv) { |
39 | 19 | table_name_sv = doris::trim(table_name_sv); |
40 | 19 | std::string s(table_name_sv); |
41 | | |
42 | | // Strip rollup suffix: "tbl(rollup)" => "tbl". |
43 | 19 | if (!s.empty() && s.back() == ')') { |
44 | 13 | auto pos = s.find_last_of('('); |
45 | 13 | if (pos != std::string::npos) { |
46 | 13 | s.resize(pos); |
47 | 13 | } |
48 | 13 | } |
49 | | |
50 | 19 | return to_lower_trimmed(std::string_view(s)); |
51 | 19 | } |
52 | | |
53 | 2 | inline bool ends_with(std::string_view s, std::string_view suffix) { |
54 | 2 | if (suffix.size() > s.size()) { |
55 | 1 | return false; |
56 | 1 | } |
57 | 1 | return s.substr(s.size() - suffix.size()) == suffix; |
58 | 2 | } |
59 | | |
60 | 44 | inline std::string normalize_identifier_piece(std::string_view sv) { |
61 | 44 | sv = doris::trim(sv); |
62 | 44 | if (sv.size() >= 2 && sv.front() == '`' && sv.back() == '`') { |
63 | 2 | sv = sv.substr(1, sv.size() - 2); |
64 | 2 | sv = doris::trim(sv); |
65 | 2 | } |
66 | 44 | return std::string(sv); |
67 | 44 | } |
68 | | |
69 | | inline bool qualifier_matches_current_table(const std::string& normalized_current_full_table_name, |
70 | | const std::string& normalized_current_table_only, |
71 | 29 | const std::string& qualifier_raw) { |
72 | 29 | std::string qualifier = to_lower_trimmed(std::string_view(qualifier_raw)); |
73 | 29 | if (qualifier.empty()) { |
74 | 19 | return true; |
75 | 19 | } |
76 | | |
77 | | // qualifier="tbl" |
78 | 10 | if (qualifier.find('.') == std::string::npos) { |
79 | 5 | return qualifier == normalized_current_table_only; |
80 | 5 | } |
81 | | |
82 | | // qualifier="db.tbl" (also allow suffix match for catalog.db.tbl) |
83 | 5 | if (qualifier == normalized_current_full_table_name) { |
84 | 3 | return true; |
85 | 3 | } |
86 | 2 | std::string with_dot = "." + qualifier; |
87 | 2 | return ends_with(normalized_current_full_table_name, with_dot); |
88 | 5 | } |
89 | | |
90 | | } // namespace predicate_lm_utils_detail |
91 | | |
92 | | inline Status parse_predicate_lm_stage1_cols_to_column_ids(const std::string& cols, |
93 | | const TabletSchemaSPtr& tablet_schema, |
94 | | std::string_view current_db_name, |
95 | | std::string_view current_table_name, |
96 | 20 | std::vector<ColumnId>* column_ids) { |
97 | 20 | column_ids->clear(); |
98 | 20 | if (cols.empty()) { |
99 | 1 | return Status::OK(); |
100 | 1 | } |
101 | | |
102 | 19 | const std::string normalized_db = predicate_lm_utils_detail::to_lower_trimmed(current_db_name); |
103 | 19 | const std::string normalized_tbl = |
104 | 19 | predicate_lm_utils_detail::normalize_table_name_for_match(current_table_name); |
105 | 19 | const std::string normalized_current_table_only = normalized_tbl; |
106 | | |
107 | 19 | std::string normalized_current_full_table_name; |
108 | 19 | if (!normalized_db.empty()) { |
109 | 19 | normalized_current_full_table_name.reserve(normalized_db.size() + 1 + normalized_tbl.size()); |
110 | 19 | normalized_current_full_table_name.append(normalized_db); |
111 | 19 | normalized_current_full_table_name.push_back('.'); |
112 | 19 | normalized_current_full_table_name.append(normalized_tbl); |
113 | 19 | } else { |
114 | 0 | normalized_current_full_table_name = normalized_tbl; |
115 | 0 | } |
116 | | |
117 | 19 | std::vector<std::string> parts = doris::split(cols, ","); |
118 | | |
119 | 29 | for (const auto& part : parts) { |
120 | 29 | std::string_view token_sv = doris::trim(std::string_view(part)); |
121 | 29 | if (token_sv.empty()) { |
122 | 0 | continue; |
123 | 0 | } |
124 | | |
125 | | // Support qualified identifiers: tbl.col / db.tbl.col |
126 | | // (Backticks are supported on each identifier piece, e.g. `db`.`tbl`.`col`) |
127 | 29 | std::vector<std::string> dot_parts = doris::split(std::string(token_sv), "."); |
128 | 29 | std::vector<std::string> ident_parts; |
129 | 29 | ident_parts.reserve(dot_parts.size()); |
130 | 44 | for (const auto& dot_part : dot_parts) { |
131 | 44 | auto piece = predicate_lm_utils_detail::normalize_identifier_piece(std::string_view(dot_part)); |
132 | 44 | if (!piece.empty()) { |
133 | 44 | ident_parts.emplace_back(std::move(piece)); |
134 | 44 | } |
135 | 44 | } |
136 | 29 | if (ident_parts.empty()) { |
137 | 0 | continue; |
138 | 0 | } |
139 | | |
140 | 29 | std::string col_name = std::move(ident_parts.back()); |
141 | 29 | ident_parts.pop_back(); |
142 | | |
143 | 29 | std::string qualifier; |
144 | 29 | if (!ident_parts.empty()) { |
145 | 10 | qualifier.reserve(64); |
146 | 25 | for (size_t i = 0; i < ident_parts.size(); ++i) { |
147 | 15 | if (i > 0) { |
148 | 5 | qualifier.push_back('.'); |
149 | 5 | } |
150 | 15 | qualifier.append(ident_parts[i]); |
151 | 15 | } |
152 | 10 | } |
153 | | |
154 | 29 | if (!predicate_lm_utils_detail::qualifier_matches_current_table( |
155 | 29 | normalized_current_full_table_name, normalized_current_table_only, qualifier)) { |
156 | 3 | continue; |
157 | 3 | } |
158 | | |
159 | 26 | int32_t cid = tablet_schema->field_index(col_name); |
160 | 26 | if (cid < 0) { |
161 | 3 | cid = tablet_schema->field_index(doris::to_lower(col_name)); |
162 | 3 | } |
163 | | |
164 | | // Ignore unknown columns (do not fail the query). |
165 | 26 | if (cid < 0) { |
166 | 2 | continue; |
167 | 2 | } |
168 | | |
169 | 24 | column_ids->push_back(static_cast<ColumnId>(cid)); |
170 | 24 | } |
171 | | |
172 | 19 | std::sort(column_ids->begin(), column_ids->end()); |
173 | 19 | column_ids->erase(std::unique(column_ids->begin(), column_ids->end()), column_ids->end()); |
174 | 19 | return Status::OK(); |
175 | 20 | } |
176 | | |
177 | | } // namespace doris |