Coverage Report

Created: 2026-07-03 18:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/scan/predicate_lm_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
#include <string>
22
#include <string_view>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "storage/tablet/tablet_schema.h"
27
#include "util/string_util.h"
28
29
namespace doris {
30
31
namespace predicate_lm_utils_detail {
32
33
29
inline std::string to_lower_trimmed(std::string_view sv) {
34
29
    sv = doris::trim(sv);
35
29
    return doris::to_lower(std::string(sv));
36
29
}
37
38
7
inline std::string normalize_table_name_for_match(std::string_view table_name_sv) {
39
7
    table_name_sv = doris::trim(table_name_sv);
40
7
    std::string s(table_name_sv);
41
42
    // Strip rollup suffix: "tbl(rollup)" => "tbl".
43
7
    if (!s.empty() && s.back() == ')') {
44
1
        auto pos = s.find_last_of('(');
45
1
        if (pos != std::string::npos) {
46
1
            s.resize(pos);
47
1
        }
48
1
    }
49
50
7
    return to_lower_trimmed(std::string_view(s));
51
7
}
52
53
2
inline bool ends_with(std::string_view s, std::string_view suffix) {
54
2
    if (suffix.size() > s.size()) {
55
1
        return false;
56
1
    }
57
1
    return s.substr(s.size() - suffix.size()) == suffix;
58
2
}
59
60
26
inline std::string normalize_identifier_piece(std::string_view sv) {
61
26
    sv = doris::trim(sv);
62
26
    if (sv.size() >= 2 && sv.front() == '`' && sv.back() == '`') {
63
1
        sv = sv.substr(1, sv.size() - 2);
64
1
        sv = doris::trim(sv);
65
1
    }
66
26
    return std::string(sv);
67
26
}
68
69
inline bool qualifier_matches_current_table(const std::string& normalized_current_full_table_name,
70
                                            const std::string& normalized_current_table_only,
71
15
                                            const std::string& qualifier_raw) {
72
15
    std::string qualifier = to_lower_trimmed(std::string_view(qualifier_raw));
73
15
    if (qualifier.empty()) {
74
8
        return true;
75
8
    }
76
77
    // qualifier="tbl"
78
7
    if (qualifier.find('.') == std::string::npos) {
79
3
        return qualifier == normalized_current_table_only;
80
3
    }
81
82
    // qualifier="db.tbl" (also allow suffix match for catalog.db.tbl)
83
4
    if (qualifier == normalized_current_full_table_name) {
84
2
        return true;
85
2
    }
86
2
    std::string with_dot = "." + qualifier;
87
2
    return ends_with(normalized_current_full_table_name, with_dot);
88
4
}
89
90
} // namespace predicate_lm_utils_detail
91
92
inline Status parse_predicate_lm_stage1_cols_to_column_ids(const std::string& cols,
93
                                                           const TabletSchemaSPtr& tablet_schema,
94
                                                           std::string_view current_db_name,
95
                                                           std::string_view current_table_name,
96
8
                                                           std::vector<ColumnId>* column_ids) {
97
8
    column_ids->clear();
98
8
    if (cols.empty()) {
99
1
        return Status::OK();
100
1
    }
101
102
7
    const std::string normalized_db = predicate_lm_utils_detail::to_lower_trimmed(current_db_name);
103
7
    const std::string normalized_tbl =
104
7
            predicate_lm_utils_detail::normalize_table_name_for_match(current_table_name);
105
7
    const std::string normalized_current_table_only = normalized_tbl;
106
107
7
    std::string normalized_current_full_table_name;
108
7
    if (!normalized_db.empty()) {
109
7
        normalized_current_full_table_name.reserve(normalized_db.size() + 1 +
110
7
                                                   normalized_tbl.size());
111
7
        normalized_current_full_table_name.append(normalized_db);
112
7
        normalized_current_full_table_name.push_back('.');
113
7
        normalized_current_full_table_name.append(normalized_tbl);
114
7
    } else {
115
0
        normalized_current_full_table_name = normalized_tbl;
116
0
    }
117
118
7
    std::vector<std::string> parts = doris::split(cols, ",");
119
120
15
    for (const auto& part : parts) {
121
15
        std::string_view token_sv = doris::trim(std::string_view(part));
122
15
        if (token_sv.empty()) {
123
0
            continue;
124
0
        }
125
126
        // Support qualified identifiers: tbl.col / db.tbl.col
127
        // (Backticks are supported on each identifier piece, e.g. `db`.`tbl`.`col`)
128
15
        std::vector<std::string> dot_parts = doris::split(std::string(token_sv), ".");
129
15
        std::vector<std::string> ident_parts;
130
15
        ident_parts.reserve(dot_parts.size());
131
26
        for (const auto& dot_part : dot_parts) {
132
26
            auto piece = predicate_lm_utils_detail::normalize_identifier_piece(
133
26
                    std::string_view(dot_part));
134
26
            if (!piece.empty()) {
135
26
                ident_parts.emplace_back(std::move(piece));
136
26
            }
137
26
        }
138
15
        if (ident_parts.empty()) {
139
0
            continue;
140
0
        }
141
142
15
        std::string col_name = std::move(ident_parts.back());
143
15
        ident_parts.pop_back();
144
145
15
        std::string qualifier;
146
15
        if (!ident_parts.empty()) {
147
7
            qualifier.reserve(64);
148
18
            for (size_t i = 0; i < ident_parts.size(); ++i) {
149
11
                if (i > 0) {
150
4
                    qualifier.push_back('.');
151
4
                }
152
11
                qualifier.append(ident_parts[i]);
153
11
            }
154
7
        }
155
156
15
        if (!predicate_lm_utils_detail::qualifier_matches_current_table(
157
15
                    normalized_current_full_table_name, normalized_current_table_only, qualifier)) {
158
2
            continue;
159
2
        }
160
161
13
        int32_t cid = tablet_schema->field_index(col_name);
162
13
        if (cid < 0) {
163
2
            cid = tablet_schema->field_index(doris::to_lower(col_name));
164
2
        }
165
166
        // Ignore unknown columns (do not fail the query).
167
13
        if (cid < 0) {
168
1
            continue;
169
1
        }
170
171
12
        column_ids->push_back(static_cast<ColumnId>(cid));
172
12
    }
173
174
7
    std::sort(column_ids->begin(), column_ids->end());
175
7
    column_ids->erase(std::unique(column_ids->begin(), column_ids->end()), column_ids->end());
176
7
    return Status::OK();
177
8
}
178
179
} // namespace doris