Coverage Report

Created: 2026-03-13 03:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/udf/python/python_env.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/BackendService_types.h>
21
22
#include <filesystem>
23
#include <utility>
24
25
#include "common/exception.h"
26
#include "common/status.h"
27
28
namespace doris {
29
30
namespace fs = std::filesystem;
31
32
enum class PythonEnvType { CONDA, VENV };
33
34
struct PythonVersion {
35
    std::string full_version;    // e.g. "3.9.16"
36
    std::string base_path;       // e.g. "/root/anaconda3/envs/python3.9"
37
    std::string executable_path; // e.g. "{base_path}/bin/python3"
38
39
3.23k
    PythonVersion() = default;
40
41
    explicit PythonVersion(std::string full_version, std::string base_path,
42
                           std::string executable_path)
43
61
            : full_version(std::move(full_version)),
44
61
              base_path(std::move(base_path)),
45
61
              executable_path(std::move(executable_path)) {}
46
47
12.7k
    bool operator==(const PythonVersion& other) const {
48
12.7k
        return full_version == other.full_version && base_path == other.base_path &&
49
12.7k
               executable_path == other.executable_path;
50
12.7k
    }
51
52
    const std::string& get_base_path() const { return base_path; }
53
54
122
    const std::string& get_executable_path() const { return executable_path; }
55
56
36
    bool is_valid() const {
57
36
        return !full_version.empty() && !base_path.empty() && !executable_path.empty() &&
58
36
               fs::exists(base_path) && fs::exists(executable_path);
59
36
    }
60
61
1.91k
    std::string to_string() const {
62
1.91k
        return fmt::format("[full_version: {}, base_path: {}, executable_path: {}]", full_version,
63
1.91k
                           base_path, executable_path);
64
1.91k
    }
65
};
66
67
struct PythonEnvironment {
68
    std::string env_name; // e.g. "base" or "myenv"
69
    PythonVersion python_version;
70
71
    PythonEnvironment(const std::string& name, const PythonVersion& python_version);
72
73
    std::string to_string() const;
74
75
    bool is_valid() const;
76
77
    static Status scan_from_conda_root_path(const fs::path& conda_root_path,
78
                                            std::vector<PythonEnvironment>* environments);
79
80
    static Status scan_from_venv_root_path(const fs::path& venv_root_path,
81
                                           const std::vector<std::string>& interpreter_paths,
82
                                           std::vector<PythonEnvironment>* environments);
83
};
84
85
class PythonEnvScanner {
86
public:
87
20
    PythonEnvScanner(const fs::path& env_root_path) : _env_root_path(env_root_path) {}
88
89
16
    virtual ~PythonEnvScanner() = default;
90
91
    virtual Status scan() = 0;
92
93
    Status get_versions(std::vector<PythonVersion>* versions) const;
94
95
    Status get_version(const std::string& runtime_version, PythonVersion* version) const;
96
97
2
    const std::vector<PythonEnvironment>& get_envs() const { return _envs; }
98
99
    std::string root_path() const { return _env_root_path.string(); }
100
101
    virtual PythonEnvType env_type() const = 0;
102
103
    virtual std::string to_string() const = 0;
104
105
protected:
106
    fs::path _env_root_path;
107
    std::vector<PythonEnvironment> _envs;
108
};
109
110
class CondaEnvScanner : public PythonEnvScanner {
111
public:
112
10
    CondaEnvScanner(const fs::path& python_root_path) : PythonEnvScanner(python_root_path) {}
113
114
    ~CondaEnvScanner() override = default;
115
116
    Status scan() override;
117
118
    std::string to_string() const override;
119
120
3
    PythonEnvType env_type() const override { return PythonEnvType::CONDA; }
121
};
122
123
class VenvEnvScanner : public PythonEnvScanner {
124
public:
125
    VenvEnvScanner(const fs::path& python_root_path,
126
                   const std::vector<std::string>& interpreter_paths)
127
10
            : PythonEnvScanner(python_root_path), _interpreter_paths(interpreter_paths) {}
128
129
7
    ~VenvEnvScanner() override = default;
130
131
    Status scan() override;
132
133
    std::string to_string() const override;
134
135
1
    PythonEnvType env_type() const override { return PythonEnvType::VENV; }
136
137
private:
138
    std::vector<std::string> _interpreter_paths;
139
};
140
141
// Holds a PythonEnvScanner instance and centralizes the initialization check
142
// to avoid accessing the scanner when the Python UDF feature is disabled.
143
// This class is intended for internal use by PythonVersionManager only.
144
class PythonEnvScannerHolder {
145
public:
146
    Status init(PythonEnvType env_type, const fs::path& python_root_path,
147
                const std::string& python_venv_interpreter_paths);
148
149
3.22k
    const PythonEnvScanner& get() const {
150
3.22k
        if (!_env_scanner) {
151
0
            throw Exception(ErrorCode::NOT_INITIALIZED,
152
0
                            "Set 'enable_python_udf_support = true' in be.conf to enable PythonUDF "
153
0
                            "feature");
154
0
        }
155
3.22k
        return *_env_scanner;
156
3.22k
    }
157
158
private:
159
    std::unique_ptr<PythonEnvScanner> _env_scanner;
160
};
161
162
class PythonVersionManager {
163
public:
164
3.23k
    static PythonVersionManager& instance() {
165
3.23k
        static PythonVersionManager instance;
166
3.23k
        return instance;
167
3.23k
    }
168
169
    Status init(PythonEnvType env_type, const fs::path& python_root_path,
170
12
                const std::string& python_venv_interpreter_paths) {
171
12
        return _holder.init(env_type, python_root_path, python_venv_interpreter_paths);
172
12
    }
173
174
3.21k
    Status get_version(const std::string& runtime_version, PythonVersion* version) const {
175
3.21k
        return _holder.get().get_version(runtime_version, version);
176
3.21k
    }
177
178
2
    const std::vector<PythonEnvironment>& get_envs() const { return _holder.get().get_envs(); }
179
180
2
    PythonEnvType env_type() const { return _holder.get().env_type(); }
181
182
7
    std::string to_string() const { return _holder.get().to_string(); }
183
184
    std::vector<TPythonEnvInfo> env_infos_to_thrift() const;
185
186
    std::vector<TPythonPackageInfo> package_infos_to_thrift(
187
            const std::vector<std::pair<std::string, std::string>>& packages) const;
188
189
private:
190
    PythonEnvScannerHolder _holder;
191
};
192
193
// List installed pip packages for a given Python version.
194
// Returns pairs of (package_name, version).
195
Status list_installed_packages(const PythonVersion& version,
196
                               std::vector<std::pair<std::string, std::string>>* packages);
197
198
} // namespace doris
199
200
namespace std {
201
template <>
202
struct hash<doris::PythonVersion> {
203
12.7k
    size_t operator()(const doris::PythonVersion& v) const noexcept {
204
12.7k
        return hash<string> {}(v.full_version);
205
12.7k
    }
206
};
207
} // namespace std