Coverage Report

Created: 2026-03-14 13:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/udf/python/python_env.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "udf/python/python_env.h"
19
20
#include <fmt/core.h>
21
#include <gen_cpp/BackendService_types.h>
22
#include <rapidjson/document.h>
23
24
#include <filesystem>
25
#include <memory>
26
#include <regex>
27
#include <vector>
28
29
#include "common/status.h"
30
#include "udf/python/python_server.h"
31
#include "util/string_util.h"
32
33
namespace doris {
34
35
namespace fs = std::filesystem;
36
37
2
static std::string _python_env_type_to_string(PythonEnvType env_type) {
38
2
    switch (env_type) {
39
2
    case PythonEnvType::CONDA:
40
2
        return "conda";
41
0
    case PythonEnvType::VENV:
42
0
        return "venv";
43
2
    }
44
0
    return "unknown";
45
2
}
46
47
// extract python version by executing `python --version` and extract "3.9.16" from "Python 3.9.16"
48
// @param python_path: path to python executable, e.g. "/opt/miniconda3/envs/myenv/bin/python"
49
// @param version: extracted python version, e.g. "3.9.16"
50
36
static Status extract_python_version(const std::string& python_path, std::string* version) {
51
36
    static std::regex python_version_re(R"(^Python (\d+\.\d+\.\d+))");
52
53
36
    if (!fs::exists(python_path)) {
54
1
        return Status::NotFound("Python executable not found: {}", python_path);
55
1
    }
56
57
35
    std::string cmd = fmt::format("\"{}\" --version", python_path);
58
35
    FILE* pipe = popen(cmd.c_str(), "r");
59
35
    if (!pipe) {
60
0
        return Status::InternalError("Failed to run: {}", cmd);
61
0
    }
62
63
35
    std::string result;
64
35
    char buf[128];
65
70
    while (fgets(buf, sizeof(buf), pipe)) {
66
35
        result += buf;
67
35
    }
68
35
    pclose(pipe);
69
70
35
    std::smatch match;
71
35
    if (std::regex_search(result, match, python_version_re)) {
72
34
        *version = match[1].str();
73
34
        return Status::OK();
74
34
    }
75
76
1
    return Status::InternalError("Failed to extract Python version from path: {}, result: {}",
77
1
                                 python_path, result);
78
35
}
79
80
PythonEnvironment::PythonEnvironment(const std::string& name, const PythonVersion& python_version)
81
23
        : env_name(name), python_version(python_version) {}
82
83
8
std::string PythonEnvironment::to_string() const {
84
8
    return fmt::format(
85
8
            "[env_name: {}, env_base_path: {}, python_base_path: {}, python_full_version: {}]",
86
8
            env_name, python_version.base_path, python_version.executable_path,
87
8
            python_version.full_version);
88
8
}
89
90
21
bool PythonEnvironment::is_valid() const {
91
21
    if (!python_version.is_valid()) return false;
92
93
20
    auto perms = fs::status(python_version.executable_path).permissions();
94
20
    if ((perms & fs::perms::owner_exec) == fs::perms::none) {
95
1
        return false;
96
1
    }
97
98
19
    std::string version;
99
19
    if (!extract_python_version(python_version.executable_path, &version).ok()) {
100
1
        LOG(WARNING) << "Failed to extract python version from path: "
101
1
                     << python_version.executable_path;
102
1
        return false;
103
1
    }
104
105
18
    return python_version.full_version == version;
106
19
}
107
108
// Scan for environments under the /{conda_root_path}/envs directory from the conda root.
109
Status PythonEnvironment::scan_from_conda_root_path(const fs::path& conda_root_path,
110
13
                                                    std::vector<PythonEnvironment>* environments) {
111
13
    DCHECK(!conda_root_path.empty() && environments != nullptr);
112
113
13
    fs::path envs_dir = conda_root_path / "envs";
114
13
    if (!fs::exists(envs_dir) || !fs::is_directory(envs_dir)) {
115
3
        return Status::NotFound("Conda envs directory not found: {}", envs_dir.string());
116
3
    }
117
118
13
    for (const auto& entry : fs::directory_iterator(envs_dir)) {
119
13
        if (!entry.is_directory()) continue;
120
121
11
        std::string env_name = entry.path().filename(); // e.g. "myenv"
122
11
        std::string env_base_path = entry.path();       // e.g. "/opt/miniconda3/envs/myenv"
123
11
        std::string python_path =
124
11
                env_base_path + "/bin/python"; // e.g. "/{env_base_path}/bin/python"
125
11
        std::string python_full_version;       // e.g. "3.9.16"
126
11
        RETURN_IF_ERROR(extract_python_version(python_path, &python_full_version));
127
10
        size_t pos = python_full_version.find_last_of('.');
128
129
10
        if (UNLIKELY(pos == std::string::npos)) {
130
0
            return Status::InvalidArgument("Invalid python version: {}", python_full_version);
131
0
        }
132
133
10
        PythonVersion python_version(python_full_version, env_base_path, python_path);
134
10
        PythonEnvironment conda_env(env_name, python_version);
135
136
10
        if (UNLIKELY(!conda_env.is_valid())) {
137
0
            LOG(WARNING) << "Invalid conda environment: " << conda_env.to_string();
138
0
            continue;
139
0
        }
140
141
10
        environments->push_back(std::move(conda_env));
142
10
    }
143
144
9
    if (environments->empty()) {
145
1
        return Status::NotFound("No conda python environments found");
146
1
    }
147
148
8
    return Status::OK();
149
9
}
150
151
Status PythonEnvironment::scan_from_venv_root_path(
152
        const fs::path& venv_root_path, const std::vector<std::string>& interpreter_paths,
153
9
        std::vector<PythonEnvironment>* environments) {
154
9
    DCHECK(!venv_root_path.empty() && environments != nullptr);
155
156
9
    for (const auto& interpreter_path : interpreter_paths) {
157
9
        if (!fs::exists(interpreter_path) || !fs::is_regular_file(interpreter_path)) {
158
3
            return Status::NotFound("Interpreter path not found: {}", interpreter_path);
159
3
        }
160
6
        std::string python_full_version;
161
6
        RETURN_IF_ERROR(extract_python_version(interpreter_path, &python_full_version));
162
6
        size_t pos = python_full_version.find_last_of('.');
163
6
        if (UNLIKELY(pos == std::string::npos)) {
164
0
            return Status::InvalidArgument("Invalid python version: {}", python_full_version);
165
0
        }
166
        // Extract major.minor version (e.g., "3.12" from "3.12.0")
167
6
        std::string python_major_minor_version = python_full_version.substr(0, pos);
168
169
6
        std::string env_name = fmt::format("python{}", python_full_version); // e.g. "python3.9.16"
170
6
        std::string env_base_path = fmt::format("{}/{}", venv_root_path.string(),
171
6
                                                env_name); // e.g. "/opt/venv/python3.9.16"
172
6
        std::string python_path =
173
6
                fmt::format("{}/bin/python", env_base_path); // e.g. "/{venv_base_path}/bin/python"
174
175
6
        if (!fs::exists(env_base_path) || !fs::exists(python_path)) {
176
0
            fs::create_directories(env_base_path);
177
            // Use --system-site-packages to inherit packages from system Python
178
            // This ensures pandas and pyarrow are available if installed in system
179
0
            std::string create_venv_cmd = fmt::format("{} -m venv --system-site-packages {}",
180
0
                                                      interpreter_path, env_base_path);
181
182
0
            if (system(create_venv_cmd.c_str()) != 0 || !fs::exists(python_path)) {
183
0
                return Status::RuntimeError("Failed to create python virtual environment, cmd: {}",
184
0
                                            create_venv_cmd);
185
0
            }
186
0
        }
187
188
        // Use major.minor version for site-packages path (e.g., "python3.12")
189
6
        std::string python_dependency_path = fmt::format("{}/lib/python{}/site-packages",
190
6
                                                         env_base_path, python_major_minor_version);
191
192
6
        if (!fs::exists(python_dependency_path)) {
193
0
            return Status::NotFound("Python dependency path not found: {}", python_dependency_path);
194
0
        }
195
196
6
        PythonVersion python_version(python_full_version, env_base_path, python_path);
197
6
        PythonEnvironment venv_env(env_name, python_version);
198
199
6
        if (UNLIKELY(!venv_env.is_valid())) {
200
0
            LOG(WARNING) << "Invalid venv environment: " << venv_env.to_string();
201
0
            continue;
202
0
        }
203
204
6
        environments->push_back(std::move(venv_env));
205
6
    }
206
207
6
    if (environments->empty()) {
208
0
        return Status::NotFound("No venv python environments found");
209
0
    }
210
211
6
    return Status::OK();
212
6
}
213
214
10
Status PythonEnvScanner::get_versions(std::vector<PythonVersion>* versions) const {
215
10
    DCHECK(versions != nullptr);
216
10
    if (_envs.empty()) {
217
1
        return Status::InternalError("not found available version");
218
1
    }
219
10
    for (const auto& env : _envs) {
220
10
        versions->push_back(env.python_version);
221
10
    }
222
9
    return Status::OK();
223
10
}
224
225
Status PythonEnvScanner::get_version(const std::string& runtime_version,
226
3.47k
                                     PythonVersion* version) const {
227
3.47k
    if (_envs.empty()) {
228
1
        return Status::InternalError("not found available version");
229
1
    }
230
3.46k
    std::string_view runtime_version_view(runtime_version);
231
3.46k
    runtime_version_view = trim(runtime_version_view);
232
3.47k
    for (const auto& env : _envs) {
233
3.47k
        if (env.python_version.full_version == runtime_version_view) {
234
3.47k
            *version = env.python_version;
235
3.47k
            return Status::OK();
236
3.47k
        }
237
3.47k
    }
238
18.4E
    return Status::NotFound("not found runtime version: {}", runtime_version);
239
3.46k
}
240
241
6
Status CondaEnvScanner::scan() {
242
6
    RETURN_IF_ERROR(PythonEnvironment::scan_from_conda_root_path(_env_root_path, &_envs));
243
5
    return Status::OK();
244
6
}
245
246
2
std::string CondaEnvScanner::to_string() const {
247
2
    std::stringstream ss;
248
2
    ss << "Conda environments: ";
249
2
    for (const auto& conda_env : _envs) {
250
1
        ss << conda_env.to_string() << ", ";
251
1
    }
252
2
    return ss.str();
253
2
}
254
255
8
Status VenvEnvScanner::scan() {
256
8
    RETURN_IF_ERROR(PythonEnvironment::scan_from_venv_root_path(_env_root_path, _interpreter_paths,
257
8
                                                                &_envs));
258
6
    return Status::OK();
259
8
}
260
261
7
std::string VenvEnvScanner::to_string() const {
262
7
    std::stringstream ss;
263
7
    ss << "Venv environments: ";
264
7
    for (const auto& venv_env : _envs) {
265
6
        ss << venv_env.to_string() << ", ";
266
6
    }
267
7
    return ss.str();
268
7
}
269
270
Status PythonEnvScannerHolder::init(PythonEnvType env_type, const fs::path& python_root_path,
271
12
                                    const std::string& python_venv_interpreter_paths) {
272
12
    switch (env_type) {
273
3
    case PythonEnvType::CONDA: {
274
3
        if (!fs::exists(python_root_path) || !fs::is_directory(python_root_path)) {
275
1
            return Status::InvalidArgument("Invalid conda root path: {}",
276
1
                                           python_root_path.string());
277
1
        }
278
2
        _env_scanner = std::make_unique<CondaEnvScanner>(python_root_path);
279
2
        break;
280
3
    }
281
8
    case PythonEnvType::VENV: {
282
8
        if (!fs::exists(python_root_path) || !fs::is_directory(python_root_path)) {
283
1
            return Status::InvalidArgument("Invalid venv root path: {}", python_root_path.string());
284
1
        }
285
7
        std::vector<std::string> interpreter_paths = split(python_venv_interpreter_paths, ":");
286
7
        if (interpreter_paths.empty()) {
287
0
            return Status::InvalidArgument("Invalid python interpreter paths: {}",
288
0
                                           python_venv_interpreter_paths);
289
0
        }
290
7
        _env_scanner = std::make_unique<VenvEnvScanner>(python_root_path, interpreter_paths);
291
7
        break;
292
7
    }
293
1
    default:
294
1
        return Status::NotSupported("Unsupported python runtime type: {}",
295
1
                                    static_cast<int>(env_type));
296
12
    }
297
9
    std::vector<PythonVersion> versions;
298
9
    RETURN_IF_ERROR(_env_scanner->scan());
299
8
    RETURN_IF_ERROR(_env_scanner->get_versions(&versions));
300
8
    return Status::OK();
301
8
}
302
303
2
std::vector<TPythonEnvInfo> PythonVersionManager::env_infos_to_thrift() const {
304
2
    std::vector<TPythonEnvInfo> infos;
305
2
    const auto& envs = this->get_envs();
306
2
    infos.reserve(envs.size());
307
308
2
    const auto env_type_str = _python_env_type_to_string(this->env_type());
309
2
    for (const auto& env : envs) {
310
2
        TPythonEnvInfo info;
311
2
        info.__set_env_name(env.env_name);
312
2
        info.__set_full_version(env.python_version.full_version);
313
2
        info.__set_env_type(env_type_str);
314
2
        info.__set_base_path(env.python_version.base_path);
315
2
        info.__set_executable_path(env.python_version.executable_path);
316
2
        infos.emplace_back(std::move(info));
317
2
    }
318
319
2
    return infos;
320
2
}
321
322
std::vector<TPythonPackageInfo> PythonVersionManager::package_infos_to_thrift(
323
1
        const std::vector<std::pair<std::string, std::string>>& packages) const {
324
1
    std::vector<TPythonPackageInfo> infos;
325
1
    infos.reserve(packages.size());
326
11
    for (const auto& [name, ver] : packages) {
327
11
        TPythonPackageInfo info;
328
11
        info.__set_package_name(name);
329
11
        info.__set_version(ver);
330
11
        infos.emplace_back(std::move(info));
331
11
    }
332
1
    return infos;
333
1
}
334
335
Status list_installed_packages(const PythonVersion& version,
336
7
                               std::vector<std::pair<std::string, std::string>>* packages) {
337
7
    DCHECK(packages != nullptr);
338
7
    if (!version.is_valid()) {
339
1
        return Status::InvalidArgument("Invalid python version: {}", version.to_string());
340
1
    }
341
342
    // Run pip list --format=json to get installed packages
343
6
    std::string cmd =
344
6
            fmt::format("\"{}\" -m pip list --format=json 2>/dev/null", version.executable_path);
345
6
    FILE* pipe = popen(cmd.c_str(), "r");
346
6
    if (!pipe) {
347
0
        return Status::InternalError("Failed to run pip list for python version: {}",
348
0
                                     version.full_version);
349
0
    }
350
351
6
    std::string result;
352
6
    char buf[4096];
353
12
    while (fgets(buf, sizeof(buf), pipe)) {
354
6
        result += buf;
355
6
    }
356
6
    int ret = pclose(pipe);
357
6
    if (ret != 0) {
358
1
        return Status::InternalError(
359
1
                "pip list failed for python version: {}, exit code: {}, output: {}",
360
1
                version.full_version, ret, result);
361
1
    }
362
363
    // Parse JSON output: [{"name": "pkg", "version": "1.0"}, ...]
364
    // Simple JSON parsing without external library
365
    // Each entry looks like: {"name": "package_name", "version": "1.2.3"}
366
5
    rapidjson::Document doc;
367
5
    if (doc.Parse(result.data(), result.size()).HasParseError() || !doc.IsArray()) [[unlikely]] {
368
2
        return Status::InternalError("Failed to parse pip list json output for python version: {}",
369
2
                                     version.full_version);
370
2
    }
371
372
3
    packages->reserve(packages->size() + doc.Size());
373
14
    for (const auto& item : doc.GetArray()) {
374
14
        auto name_it = item.FindMember("name");
375
14
        auto version_it = item.FindMember("version");
376
14
        if (name_it == item.MemberEnd() || version_it == item.MemberEnd() ||
377
14
            !name_it->value.IsString() || !version_it->value.IsString()) [[unlikely]] {
378
1
            return Status::InternalError("Invalid pip list json format for python version: {}",
379
1
                                         version.full_version);
380
1
        }
381
13
        packages->emplace_back(name_it->value.GetString(), version_it->value.GetString());
382
13
    }
383
384
2
    return Status::OK();
385
3
}
386
387
} // namespace doris