be/src/udf/python/python_udf_client.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <arrow/status.h> |
21 | | |
22 | | #include "udf/python/python_client.h" |
23 | | |
24 | | namespace doris { |
25 | | |
26 | | class PythonUDFClient; |
27 | | |
28 | | using PythonUDFClientPtr = std::shared_ptr<PythonUDFClient>; |
29 | | |
30 | | /** |
31 | | * Python UDF Client |
32 | | * |
33 | | * Implements standard UDF (User-Defined Function) pattern with a single evaluation function: |
34 | | * - evaluate_func(*args): Process input arguments and return result |
35 | | * |
36 | | * UDF Characteristics: |
37 | | * - Takes scalar or column inputs |
38 | | * - Returns scalar or column outputs |
39 | | * - Stateless evaluation (each call is independent) |
40 | | * - Simple input-output transformation |
41 | | * |
42 | | * Example: |
43 | | * ```python |
44 | | * def evaluate_func(x, y): |
45 | | * # Add two numbers |
46 | | * return x + y |
47 | | * ``` |
48 | | * |
49 | | * Communication protocol with Python server: |
50 | | * 1. Send input batch (RecordBatch with N rows) |
51 | | * 2. Python calls evaluate_func() for each row (or vectorized) |
52 | | * 3. Receive output batch (RecordBatch with N rows) |
53 | | */ |
54 | | class PythonUDFClient : public PythonClient { |
55 | | public: |
56 | 1.25k | PythonUDFClient() = default; |
57 | | ~PythonUDFClient() override = default; |
58 | | |
59 | | static Status create(const PythonUDFMeta& func_meta, ProcessPtr process, |
60 | | PythonUDFClientPtr* client); |
61 | | |
62 | | /** |
63 | | * Evaluate UDF on input rows |
64 | | * |
65 | | * @param input Input row batch (columns = UDF function parameters) |
66 | | * @param output Output row batch (single column = UDF return value) |
67 | | * @return Status |
68 | | */ |
69 | | Status evaluate(const arrow::RecordBatch& input, std::shared_ptr<arrow::RecordBatch>* output); |
70 | | |
71 | | private: |
72 | | DISALLOW_COPY_AND_ASSIGN(PythonUDFClient); |
73 | | }; |
74 | | |
75 | | } // namespace doris |