Coverage Report

Created: 2026-06-26 13:27

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/scan/predicate_lm_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
#include <string>
22
#include <string_view>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "storage/tablet/tablet_schema.h"
27
#include "util/string_util.h"
28
29
namespace doris {
30
31
namespace predicate_lm_utils_detail {
32
33
67
inline std::string to_lower_trimmed(std::string_view sv) {
34
67
    sv = doris::trim(sv);
35
67
    return doris::to_lower(std::string(sv));
36
67
}
37
38
19
inline std::string normalize_table_name_for_match(std::string_view table_name_sv) {
39
19
    table_name_sv = doris::trim(table_name_sv);
40
19
    std::string s(table_name_sv);
41
42
    // Strip rollup suffix: "tbl(rollup)" => "tbl".
43
19
    if (!s.empty() && s.back() == ')') {
44
13
        auto pos = s.find_last_of('(');
45
13
        if (pos != std::string::npos) {
46
13
            s.resize(pos);
47
13
        }
48
13
    }
49
50
19
    return to_lower_trimmed(std::string_view(s));
51
19
}
52
53
2
inline bool ends_with(std::string_view s, std::string_view suffix) {
54
2
    if (suffix.size() > s.size()) {
55
1
        return false;
56
1
    }
57
1
    return s.substr(s.size() - suffix.size()) == suffix;
58
2
}
59
60
44
inline std::string normalize_identifier_piece(std::string_view sv) {
61
44
    sv = doris::trim(sv);
62
44
    if (sv.size() >= 2 && sv.front() == '`' && sv.back() == '`') {
63
2
        sv = sv.substr(1, sv.size() - 2);
64
2
        sv = doris::trim(sv);
65
2
    }
66
44
    return std::string(sv);
67
44
}
68
69
inline bool qualifier_matches_current_table(const std::string& normalized_current_full_table_name,
70
                                           const std::string& normalized_current_table_only,
71
29
                                           const std::string& qualifier_raw) {
72
29
    std::string qualifier = to_lower_trimmed(std::string_view(qualifier_raw));
73
29
    if (qualifier.empty()) {
74
19
        return true;
75
19
    }
76
77
    // qualifier="tbl"
78
10
    if (qualifier.find('.') == std::string::npos) {
79
5
        return qualifier == normalized_current_table_only;
80
5
    }
81
82
    // qualifier="db.tbl" (also allow suffix match for catalog.db.tbl)
83
5
    if (qualifier == normalized_current_full_table_name) {
84
3
        return true;
85
3
    }
86
2
    std::string with_dot = "." + qualifier;
87
2
    return ends_with(normalized_current_full_table_name, with_dot);
88
5
}
89
90
} // namespace predicate_lm_utils_detail
91
92
inline Status parse_predicate_lm_stage1_cols_to_column_ids(const std::string& cols,
93
                                                          const TabletSchemaSPtr& tablet_schema,
94
                                                          std::string_view current_db_name,
95
                                                          std::string_view current_table_name,
96
20
                                                          std::vector<ColumnId>* column_ids) {
97
20
    column_ids->clear();
98
20
    if (cols.empty()) {
99
1
        return Status::OK();
100
1
    }
101
102
19
    const std::string normalized_db = predicate_lm_utils_detail::to_lower_trimmed(current_db_name);
103
19
    const std::string normalized_tbl =
104
19
            predicate_lm_utils_detail::normalize_table_name_for_match(current_table_name);
105
19
    const std::string normalized_current_table_only = normalized_tbl;
106
107
19
    std::string normalized_current_full_table_name;
108
19
    if (!normalized_db.empty()) {
109
19
        normalized_current_full_table_name.reserve(normalized_db.size() + 1 + normalized_tbl.size());
110
19
        normalized_current_full_table_name.append(normalized_db);
111
19
        normalized_current_full_table_name.push_back('.');
112
19
        normalized_current_full_table_name.append(normalized_tbl);
113
19
    } else {
114
0
        normalized_current_full_table_name = normalized_tbl;
115
0
    }
116
117
19
    std::vector<std::string> parts = doris::split(cols, ",");
118
119
29
    for (const auto& part : parts) {
120
29
        std::string_view token_sv = doris::trim(std::string_view(part));
121
29
        if (token_sv.empty()) {
122
0
            continue;
123
0
        }
124
125
        // Support qualified identifiers: tbl.col / db.tbl.col
126
        // (Backticks are supported on each identifier piece, e.g. `db`.`tbl`.`col`)
127
29
        std::vector<std::string> dot_parts = doris::split(std::string(token_sv), ".");
128
29
        std::vector<std::string> ident_parts;
129
29
        ident_parts.reserve(dot_parts.size());
130
44
        for (const auto& dot_part : dot_parts) {
131
44
            auto piece = predicate_lm_utils_detail::normalize_identifier_piece(std::string_view(dot_part));
132
44
            if (!piece.empty()) {
133
44
                ident_parts.emplace_back(std::move(piece));
134
44
            }
135
44
        }
136
29
        if (ident_parts.empty()) {
137
0
            continue;
138
0
        }
139
140
29
        std::string col_name = std::move(ident_parts.back());
141
29
        ident_parts.pop_back();
142
143
29
        std::string qualifier;
144
29
        if (!ident_parts.empty()) {
145
10
            qualifier.reserve(64);
146
25
            for (size_t i = 0; i < ident_parts.size(); ++i) {
147
15
                if (i > 0) {
148
5
                    qualifier.push_back('.');
149
5
                }
150
15
                qualifier.append(ident_parts[i]);
151
15
            }
152
10
        }
153
154
29
        if (!predicate_lm_utils_detail::qualifier_matches_current_table(
155
29
                    normalized_current_full_table_name, normalized_current_table_only, qualifier)) {
156
3
            continue;
157
3
        }
158
159
26
        int32_t cid = tablet_schema->field_index(col_name);
160
26
        if (cid < 0) {
161
3
            cid = tablet_schema->field_index(doris::to_lower(col_name));
162
3
        }
163
164
        // Ignore unknown columns (do not fail the query).
165
26
        if (cid < 0) {
166
2
            continue;
167
2
        }
168
169
24
        column_ids->push_back(static_cast<ColumnId>(cid));
170
24
    }
171
172
19
    std::sort(column_ids->begin(), column_ids->end());
173
19
    column_ids->erase(std::unique(column_ids->begin(), column_ids->end()), column_ids->end());
174
19
    return Status::OK();
175
20
}
176
177
} // namespace doris