Coverage Report

Created: 2026-03-12 14:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/udf/python/python_udtf_client.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <arrow/status.h>
21
22
#include "udf/python/python_client.h"
23
24
namespace doris {
25
26
class PythonUDTFClient;
27
28
using PythonUDTFClientPtr = std::shared_ptr<PythonUDTFClient>;
29
30
/**
31
 * Python UDTF Client
32
 * 
33
 * Implements simplified UDTF (User-Defined Table Function):
34
 * 
35
 * Handler Function:
36
 * - evaluate_func(*args): Process input arguments and yield output rows
37
 * 
38
 * UDTF Characteristics:
39
 * - Takes scalar or table inputs
40
 * - Returns table (multiple rows)
41
 * - Simple yield pattern
42
 * 
43
 * Example:
44
 * ```python
45
 * def evaluate_func(text, delimiter):
46
 *     # Split string by delimiter and return multiple results
47
 *     for item in text.split(delimiter):
48
 *         # or yield (item, )
49
 *         yield item 
50
 * ```
51
 * 
52
 * Communication protocol with Python server:
53
 * 1. Send input row batch to Python
54
 * 2. Python calls evaluate_func() for each input row
55
 * 3. Collect all output rows and return
56
 */
57
class PythonUDTFClient : public PythonClient {
58
public:
59
0
    PythonUDTFClient() = default;
60
    ~PythonUDTFClient() override = default;
61
62
    static Status create(const PythonUDFMeta& func_meta, ProcessPtr process,
63
                         PythonUDTFClientPtr* client);
64
65
    /**
66
     * Evaluate UDTF on input rows
67
     * 
68
     * Protocol (ListArray-based):
69
     * Python server returns a RecordBatch with 1 column:
70
     * - Column 0: ListArray where each list element corresponds to one input row's outputs
71
     * 
72
     * Example:
73
     *   Input: 3 rows
74
     *   Output ListArray:
75
     *     [0]: [val1, val2, val3]      (3 elements for input row 0)
76
     *     [1]: []                       (0 elements for input row 1)
77
     *     [2]: [val4, val5, val6, val7] (4 elements for input row 2)
78
     * 
79
     * @param input Input row batch (columns = UDTF function parameters)
80
     * @param list_array Output ListArray (length = num_input_rows)
81
     * @return Status
82
     */
83
    Status evaluate(const arrow::RecordBatch& input, std::shared_ptr<arrow::ListArray>* list_array);
84
85
private:
86
    DISALLOW_COPY_AND_ASSIGN(PythonUDTFClient);
87
};
88
89
} // namespace doris