Coverage Report

Created: 2025-04-30 17:40

/root/doris/be/src/udf/udf.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/apache/impala/blob/branch-2.9.0/be/src/udf/udf.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <gen_cpp/Types_types.h>
24
25
#include <cstdint>
26
#include <memory>
27
#include <optional>
28
#include <string>
29
#include <vector>
30
31
#include "runtime/types.h"
32
#include "util/runtime_profile.h"
33
#include "vec/common/arena.h"
34
35
namespace doris {
36
37
struct ColumnPtrWrapper;
38
struct StringRef;
39
class RuntimeState;
40
41
namespace vectorized {
42
class IDataType;
43
using DataTypePtr = std::shared_ptr<const IDataType>;
44
} // namespace vectorized
45
46
// The FunctionContext is passed to every UDF/UDA and is the interface for the UDF to the
47
// rest of the system. It contains APIs to examine the system state, report errors
48
// and manage memory.
49
class FunctionContext {
50
public:
51
    enum FunctionStateScope {
52
        /// Indicates that the function state for this FunctionContext's UDF is shared across
53
        /// the plan fragment (a query is divided into multiple plan fragments, each of which
54
        /// is responsible for a part of the query execution). Within the plan fragment, there
55
        /// may be multiple instances of the UDF executing concurrently with multiple
56
        /// FunctionContexts sharing this state, meaning that the state must be
57
        /// thread-safe. The Prepare() function for the UDF may be called with this scope
58
        /// concurrently on a single host if the UDF will be evaluated in multiple plan
59
        /// fragments on that host. In general, read-only state that doesn't need to be
60
        /// recomputed for every UDF call should be fragment-local.
61
        /// TODO: not yet implemented
62
        FRAGMENT_LOCAL,
63
64
        /// Indicates that the function state is local to the execution thread. This state
65
        /// does not need to be thread-safe. However, this state will be initialized (via the
66
        /// Prepare() function) once for every execution thread, so fragment-local state
67
        /// should be used when possible for better performance. In general, inexpensive
68
        /// shared state that is written to by the UDF (e.g. scratch space) should be
69
        /// thread-local.
70
        THREAD_LOCAL,
71
    };
72
73
    static std::unique_ptr<doris::FunctionContext> create_context(
74
            RuntimeState* state, const vectorized::DataTypePtr& return_type,
75
            const std::vector<vectorized::DataTypePtr>& arg_types);
76
77
    /// Returns a new FunctionContext with the same constant args, fragment-local state, and
78
    /// debug flag as this FunctionContext. The caller is responsible for calling delete on
79
    /// it.
80
    std::unique_ptr<doris::FunctionContext> clone();
81
82
    void set_constant_cols(const std::vector<std::shared_ptr<doris::ColumnPtrWrapper>>& cols);
83
84
44.3M
    RuntimeState* state() { return _state; }
85
86
624
    void set_dict_function(const TDictFunction& dict_function) { _dict_function = dict_function; }
87
88
626
    std::optional<TDictFunction>& dict_function() { return _dict_function; };
89
1.58M
    bool check_overflow_for_decimal() const { return _check_overflow_for_decimal; }
90
91
1.18M
    bool set_check_overflow_for_decimal(bool check_overflow_for_decimal) {
92
1.18M
        return _check_overflow_for_decimal = check_overflow_for_decimal;
93
1.18M
    }
94
95
7.16k
    void set_string_as_jsonb_string(bool string_as_jsonb_string) {
96
7.16k
        _string_as_jsonb_string = string_as_jsonb_string;
97
7.16k
    }
98
99
78.6k
    void set_jsonb_string_as_string(bool jsonb_string_as_string) {
100
78.6k
        _jsonb_string_as_string = jsonb_string_as_string;
101
78.6k
    }
102
103
0
    void set_udf_execute_timer(RuntimeProfile::Counter* udf_execute_timer) {
104
0
        _udf_execute_timer = udf_execute_timer;
105
0
    }
106
107
14.0k
    RuntimeProfile::Counter* get_udf_execute_timer() { return _udf_execute_timer; }
108
109
    // Cast flag, when enable string_as_jsonb_string, string casting to jsonb will not parse string
110
    // instead just insert a string literal
111
6.00k
    bool string_as_jsonb_string() const { return _string_as_jsonb_string; }
112
113
    // Cast flag, when enable jsonb_string_as_string, jsonb string casting to string will not parse string
114
    // instead just insert a string literal
115
254k
    bool jsonb_string_as_string() const { return _jsonb_string_as_string; }
116
117
    // Sets an error for this UDF. If this is called, this will trigger the
118
    // query to fail.
119
    // Note: when you set error for the UDFs used in Data Load, you should
120
    // ensure the function return value is null.
121
    void set_error(const char* error_msg);
122
123
    // Adds a warning that is returned to the user. This can include things like
124
    // overflow or other recoverable error conditions.
125
    // Warnings are capped at a maximum number. Returns true if the warning was
126
    // added and false if it was ignored due to the cap.
127
    bool add_warning(const char* warning_msg);
128
129
    /// Methods for maintaining state across UDF/UDA function calls. SetFunctionState() can
130
    /// be used to store a pointer that can then be retrieved via GetFunctionState(). If
131
    /// GetFunctionState() is called when no pointer is set, it will return
132
    /// nullptr. SetFunctionState() does not take ownership of 'ptr'; it is up to the UDF/UDA
133
    /// to clean up any function state if necessary.
134
    void set_function_state(FunctionStateScope scope, std::shared_ptr<void> ptr);
135
136
    void* get_function_state(FunctionStateScope scope) const;
137
138
    // Returns the return type information of this function. For UDAs, this is the final
139
    // return type of the UDA (e.g., the type returned by the finalize function).
140
    const vectorized::DataTypePtr get_return_type() const;
141
142
    // Returns the number of arguments to this function (not including the FunctionContext*
143
    // argument).
144
    int get_num_args() const;
145
146
    // Returns the type information for the arg_idx-th argument (0-indexed, not including
147
    // the FunctionContext* argument). Returns nullptr if arg_idx is invalid.
148
    const vectorized::DataTypePtr get_arg_type(int arg_idx) const;
149
150
    // Returns true if the arg_idx-th input argument (0 indexed, not including the
151
    // FunctionContext* argument) is a constant (e.g. 5, "string", 1 + 1).
152
    bool is_col_constant(int arg_idx) const;
153
154
    // Returns a pointer to the value of the arg_idx-th input argument (0 indexed, not
155
    // including the FunctionContext* argument). Returns nullptr if the argument is not
156
    // constant. This function can be used to obtain user-specified constants in a UDF's
157
    // Init() or Close() functions.
158
    doris::ColumnPtrWrapper* get_constant_col(int arg_idx) const;
159
160
    // Creates a StringRef, which memory is available when this function context is used next time
161
    StringRef create_temp_string_val(int64_t len);
162
163
4.68M
    ~FunctionContext() = default;
164
165
2.92k
    vectorized::Arena& get_arena() { return arena; }
166
167
private:
168
4.68M
    FunctionContext() = default;
169
170
    // Disable copy ctor and assignment operator
171
    FunctionContext(const FunctionContext& other);
172
173
    FunctionContext& operator=(const FunctionContext& other);
174
175
    // We use the query's runtime state to report errors and warnings. nullptr for test
176
    // contexts.
177
    RuntimeState* _state = nullptr;
178
179
    // Empty if there's no error
180
    std::string _error_msg;
181
182
    // The number of warnings reported.
183
    int64_t _num_warnings;
184
185
    /// The function state accessed via FunctionContext::Get/SetFunctionState()
186
    std::shared_ptr<void> _thread_local_fn_state;
187
    std::shared_ptr<void> _fragment_local_fn_state;
188
189
    // Type descriptor for the return type of the function.
190
    vectorized::DataTypePtr _return_type;
191
192
    // Type descriptors for each argument of the function.
193
    std::vector<vectorized::DataTypePtr> _arg_types;
194
195
    std::vector<std::shared_ptr<doris::ColumnPtrWrapper>> _constant_cols;
196
197
    //udf execute timer
198
    RuntimeProfile::Counter* _udf_execute_timer = nullptr;
199
    bool _check_overflow_for_decimal = false;
200
201
    bool _string_as_jsonb_string = false;
202
    bool _jsonb_string_as_string = false;
203
204
    std::string _string_result;
205
206
    vectorized::Arena arena;
207
208
    std::optional<TDictFunction> _dict_function;
209
};
210
211
using doris::FunctionContext;
212
} // namespace doris