Coverage Report

Created: 2026-04-22 11:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/ai/ai_functions.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/FrontendService.h>
21
#include <gen_cpp/PaloInternalService_types.h>
22
#include <glog/logging.h>
23
24
#include <algorithm>
25
#include <cstdlib>
26
#include <memory>
27
#include <string>
28
#include <type_traits>
29
#include <utility>
30
#include <vector>
31
32
#include "common/config.h"
33
#include "common/status.h"
34
#include "core/column/column_array.h"
35
#include "core/column/column_const.h"
36
#include "core/column/column_nullable.h"
37
#include "core/cow.h"
38
#include "core/data_type/data_type_array.h"
39
#include "core/data_type/data_type_number.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/data_type/primitive_type.h"
42
#include "exprs/function/ai/ai_adapter.h"
43
#include "exprs/function/function.h"
44
#include "runtime/query_context.h"
45
#include "runtime/runtime_state.h"
46
#include "service/http/http_client.h"
47
#include "util/security.h"
48
#include "util/string_util.h"
49
#include "util/threadpool.h"
50
51
namespace doris {
52
53
// Base class for AI-based functions
54
template <typename Derived>
55
class AIFunction : public IFunction {
56
public:
57
16
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE8get_nameB5cxx11Ev
Line
Count
Source
57
6
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_17FunctionAIExtractEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_14FunctionAIMaskEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
_ZNK5doris10AIFunctionINS_19FunctionAITranslateEE8get_nameB5cxx11Ev
Line
Count
Source
57
1
    std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }
58
59
    // If the user doesn't provide the first arg, `resource_name`
60
    // FE will add the `resource_name` to the arguments list using the Session Variable.
61
    // So the value here should be the maximum number that the function can accept.
62
0
    size_t get_number_of_arguments() const override {
63
0
        return assert_cast<const Derived&>(*this).number_of_arguments;
64
0
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE23get_number_of_argumentsEv
65
66
0
    bool is_blockable() const override { return true; }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE12is_blockableEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE12is_blockableEv
67
68
    virtual Status build_prompt(const Block& block, const ColumnNumbers& arguments, size_t row_num,
69
77
                                std::string& prompt) const {
70
77
        const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]);
71
77
        StringRef text_ref = text_column.column->get_data_at(row_num);
72
77
        prompt = std::string(text_ref.data, text_ref.size);
73
74
77
        return Status::OK();
75
77
    }
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
69
7
                                std::string& prompt) const {
70
7
        const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]);
71
7
        StringRef text_ref = text_column.column->get_data_at(row_num);
72
7
        prompt = std::string(text_ref.data, text_ref.size);
73
74
7
        return Status::OK();
75
7
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
69
63
                                std::string& prompt) const {
70
63
        const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]);
71
63
        StringRef text_ref = text_column.column->get_data_at(row_num);
72
63
        prompt = std::string(text_ref.data, text_ref.size);
73
74
63
        return Status::OK();
75
63
    }
_ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
69
1
                                std::string& prompt) const {
70
1
        const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]);
71
1
        StringRef text_ref = text_column.column->get_data_at(row_num);
72
1
        prompt = std::string(text_ref.data, text_ref.size);
73
74
1
        return Status::OK();
75
1
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
69
5
                                std::string& prompt) const {
70
5
        const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]);
71
5
        StringRef text_ref = text_column.column->get_data_at(row_num);
72
5
        prompt = std::string(text_ref.data, text_ref.size);
73
74
5
        return Status::OK();
75
5
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
69
1
                                std::string& prompt) const {
70
1
        const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]);
71
1
        StringRef text_ref = text_column.column->get_data_at(row_num);
72
1
        prompt = std::string(text_ref.data, text_ref.size);
73
74
1
        return Status::OK();
75
1
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
76
77
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
78
79
                        uint32_t result, size_t input_rows_count) const override {
79
79
        TAIResource config;
80
79
        std::shared_ptr<AIAdapter> adapter;
81
79
        if (Status status = this->_init_from_resource(context, block, arguments, config, adapter);
82
79
            !status.ok()) {
83
0
            return status;
84
0
        }
85
86
79
        return assert_cast<const Derived&>(*this).execute_with_adapter(
87
79
                context, block, arguments, result, input_rows_count, config, adapter);
88
79
    }
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
78
10
                        uint32_t result, size_t input_rows_count) const override {
79
10
        TAIResource config;
80
10
        std::shared_ptr<AIAdapter> adapter;
81
10
        if (Status status = this->_init_from_resource(context, block, arguments, config, adapter);
82
10
            !status.ok()) {
83
0
            return status;
84
0
        }
85
86
10
        return assert_cast<const Derived&>(*this).execute_with_adapter(
87
10
                context, block, arguments, result, input_rows_count, config, adapter);
88
10
    }
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
78
46
                        uint32_t result, size_t input_rows_count) const override {
79
46
        TAIResource config;
80
46
        std::shared_ptr<AIAdapter> adapter;
81
46
        if (Status status = this->_init_from_resource(context, block, arguments, config, adapter);
82
46
            !status.ok()) {
83
0
            return status;
84
0
        }
85
86
46
        return assert_cast<const Derived&>(*this).execute_with_adapter(
87
46
                context, block, arguments, result, input_rows_count, config, adapter);
88
46
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
78
2
                        uint32_t result, size_t input_rows_count) const override {
79
2
        TAIResource config;
80
2
        std::shared_ptr<AIAdapter> adapter;
81
2
        if (Status status = this->_init_from_resource(context, block, arguments, config, adapter);
82
2
            !status.ok()) {
83
0
            return status;
84
0
        }
85
86
2
        return assert_cast<const Derived&>(*this).execute_with_adapter(
87
2
                context, block, arguments, result, input_rows_count, config, adapter);
88
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
78
21
                        uint32_t result, size_t input_rows_count) const override {
79
21
        TAIResource config;
80
21
        std::shared_ptr<AIAdapter> adapter;
81
21
        if (Status status = this->_init_from_resource(context, block, arguments, config, adapter);
82
21
            !status.ok()) {
83
0
            return status;
84
0
        }
85
86
21
        return assert_cast<const Derived&>(*this).execute_with_adapter(
87
21
                context, block, arguments, result, input_rows_count, config, adapter);
88
21
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
89
90
protected:
91
    // Reads the shared AI context window size from query options. String AI batch functions and
92
    // ai_agg both use the same byte-based session variable so batching behavior stays consistent.
93
72
    static int64_t get_ai_context_window_size(FunctionContext* context) {
94
72
        DORIS_CHECK(context != nullptr);
95
72
        QueryContext* query_ctx = context->state()->get_query_ctx();
96
72
        DORIS_CHECK(query_ctx != nullptr);
97
98
72
        return query_ctx->query_options().ai_context_window_size;
99
72
    }
_ZN5doris10AIFunctionINS_13FunctionEmbedEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Line
Count
Source
93
3
    static int64_t get_ai_context_window_size(FunctionContext* context) {
94
3
        DORIS_CHECK(context != nullptr);
95
3
        QueryContext* query_ctx = context->state()->get_query_ctx();
96
3
        DORIS_CHECK(query_ctx != nullptr);
97
98
3
        return query_ctx->query_options().ai_context_window_size;
99
3
    }
_ZN5doris10AIFunctionINS_16FunctionAIFilterEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Line
Count
Source
93
46
    static int64_t get_ai_context_window_size(FunctionContext* context) {
94
46
        DORIS_CHECK(context != nullptr);
95
46
        QueryContext* query_ctx = context->state()->get_query_ctx();
96
46
        DORIS_CHECK(query_ctx != nullptr);
97
98
46
        return query_ctx->query_options().ai_context_window_size;
99
46
    }
Unexecuted instantiation: _ZN5doris10AIFunctionINS_20FunctionAIFixGrammarEE26get_ai_context_window_sizeEPNS_15FunctionContextE
_ZN5doris10AIFunctionINS_19FunctionAISentimentEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Line
Count
Source
93
2
    static int64_t get_ai_context_window_size(FunctionContext* context) {
94
2
        DORIS_CHECK(context != nullptr);
95
2
        QueryContext* query_ctx = context->state()->get_query_ctx();
96
2
        DORIS_CHECK(query_ctx != nullptr);
97
98
2
        return query_ctx->query_options().ai_context_window_size;
99
2
    }
Unexecuted instantiation: _ZN5doris10AIFunctionINS_19FunctionAISummarizeEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_18FunctionAIClassifyEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_17FunctionAIExtractEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_18FunctionAIGenerateEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_14FunctionAIMaskEE26get_ai_context_window_sizeEPNS_15FunctionContextE
_ZN5doris10AIFunctionINS_20FunctionAISimilarityEE26get_ai_context_window_sizeEPNS_15FunctionContextE
Line
Count
Source
93
21
    static int64_t get_ai_context_window_size(FunctionContext* context) {
94
21
        DORIS_CHECK(context != nullptr);
95
21
        QueryContext* query_ctx = context->state()->get_query_ctx();
96
21
        DORIS_CHECK(query_ctx != nullptr);
97
98
21
        return query_ctx->query_options().ai_context_window_size;
99
21
    }
Unexecuted instantiation: _ZN5doris10AIFunctionINS_19FunctionAITranslateEE26get_ai_context_window_sizeEPNS_15FunctionContextE
100
101
    // Derived classes can override this method for non-text/default behavior.
102
    // The base implementation handles all string-input/string-output batchable functions.
103
    Status execute_with_adapter(FunctionContext* context, Block& block,
104
                                const ColumnNumbers& arguments, uint32_t result,
105
                                size_t input_rows_count, const TAIResource& config,
106
69
                                std::shared_ptr<AIAdapter>& adapter) const {
107
69
        auto col_result = assert_cast<const Derived&>(*this).create_result_column();
108
69
        RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config,
109
69
                                                adapter, *col_result));
110
111
36
        block.replace_by_position(result, std::move(col_result));
112
36
        return Status::OK();
113
69
    }
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
106
46
                                std::shared_ptr<AIAdapter>& adapter) const {
107
46
        auto col_result = assert_cast<const Derived&>(*this).create_result_column();
108
46
        RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config,
109
46
                                                adapter, *col_result));
110
111
19
        block.replace_by_position(result, std::move(col_result));
112
19
        return Status::OK();
113
46
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
106
2
                                std::shared_ptr<AIAdapter>& adapter) const {
107
2
        auto col_result = assert_cast<const Derived&>(*this).create_result_column();
108
2
        RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config,
109
2
                                                adapter, *col_result));
110
111
2
        block.replace_by_position(result, std::move(col_result));
112
2
        return Status::OK();
113
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
106
21
                                std::shared_ptr<AIAdapter>& adapter) const {
107
21
        auto col_result = assert_cast<const Derived&>(*this).create_result_column();
108
21
        RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config,
109
21
                                                adapter, *col_result));
110
111
15
        block.replace_by_position(result, std::move(col_result));
112
15
        return Status::OK();
113
21
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
114
115
2
    MutableColumnPtr create_result_column() const { return ColumnString::create(); }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE20create_result_columnEv
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE20create_result_columnEv
Line
Count
Source
115
2
    MutableColumnPtr create_result_column() const { return ColumnString::create(); }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE20create_result_columnEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE20create_result_columnEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE20create_result_columnEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE20create_result_columnEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE20create_result_columnEv
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE20create_result_columnEv
116
117
    // Provider-reusable hook for AI functions(string) -> string.
118
    Status append_batch_results(const std::vector<std::string>& batch_results,
119
2
                                IColumn& col_result) const {
120
2
        auto& string_col = assert_cast<ColumnString&>(col_result);
121
4
        for (const auto& batch_result : batch_results) {
122
4
            string_col.insert_data(batch_result.data(), batch_result.size());
123
4
        }
124
2
        return Status::OK();
125
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
Line
Count
Source
119
2
                                IColumn& col_result) const {
120
2
        auto& string_col = assert_cast<ColumnString&>(col_result);
121
4
        for (const auto& batch_result : batch_results) {
122
4
            string_col.insert_data(batch_result.data(), batch_result.size());
123
4
        }
124
2
        return Status::OK();
125
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE
126
127
86
    static void normalize_endpoint(TAIResource& config) {
128
        // 1. If users configure only the version root like `.../v1` or `.../v1beta`, append
129
        //    `models/<model>:batchEmbedContents` for `embed`, and `models/<model>:generateContent`
130
        //    for other AI scalar functions.
131
        // 2. `:embedContent` -> `:batchEmbedContents`
132
86
        if (iequal(config.provider_type, "GEMINI")) {
133
4
            if (iequal(Derived::name, "embed") && config.endpoint.ends_with(":embedContent")) {
134
1
                static constexpr std::string_view legacy_suffix = ":embedContent";
135
1
                config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
136
1
                                        legacy_suffix.size(), ":batchEmbedContents");
137
1
                return;
138
1
            }
139
140
3
            if (!config.endpoint.ends_with("v1") && !config.endpoint.ends_with("v1beta")) {
141
1
                return;
142
1
            }
143
144
2
            std::string model_name = config.model_name;
145
2
            if (!model_name.starts_with("models/")) {
146
2
                model_name = "models/" + model_name;
147
2
            }
148
149
2
            config.endpoint += "/";
150
2
            config.endpoint += model_name;
151
2
            config.endpoint +=
152
2
                    iequal(Derived::name, "embed") ? ":batchEmbedContents" : ":generateContent";
153
2
            return;
154
3
        }
155
156
        // The endpoint `v1/completions` does not support `system_prompt`.
157
        // To ensure a clear structure and stable AI results.
158
        // Convert from `v1/completions` to `v1/chat/completions`
159
82
        if (config.endpoint.ends_with("v1/completions")) {
160
1
            static constexpr std::string_view legacy_suffix = "v1/completions";
161
1
            config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
162
1
                                    legacy_suffix.size(), "v1/chat/completions");
163
1
        }
164
82
    }
_ZN5doris10AIFunctionINS_13FunctionEmbedEE18normalize_endpointERNS_11TAIResourceE
Line
Count
Source
127
12
    static void normalize_endpoint(TAIResource& config) {
128
        // 1. If users configure only the version root like `.../v1` or `.../v1beta`, append
129
        //    `models/<model>:batchEmbedContents` for `embed`, and `models/<model>:generateContent`
130
        //    for other AI scalar functions.
131
        // 2. `:embedContent` -> `:batchEmbedContents`
132
12
        if (iequal(config.provider_type, "GEMINI")) {
133
2
            if (iequal(Derived::name, "embed") && config.endpoint.ends_with(":embedContent")) {
134
1
                static constexpr std::string_view legacy_suffix = ":embedContent";
135
1
                config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
136
1
                                        legacy_suffix.size(), ":batchEmbedContents");
137
1
                return;
138
1
            }
139
140
1
            if (!config.endpoint.ends_with("v1") && !config.endpoint.ends_with("v1beta")) {
141
0
                return;
142
0
            }
143
144
1
            std::string model_name = config.model_name;
145
1
            if (!model_name.starts_with("models/")) {
146
1
                model_name = "models/" + model_name;
147
1
            }
148
149
1
            config.endpoint += "/";
150
1
            config.endpoint += model_name;
151
1
            config.endpoint +=
152
1
                    iequal(Derived::name, "embed") ? ":batchEmbedContents" : ":generateContent";
153
1
            return;
154
1
        }
155
156
        // The endpoint `v1/completions` does not support `system_prompt`.
157
        // To ensure a clear structure and stable AI results.
158
        // Convert from `v1/completions` to `v1/chat/completions`
159
10
        if (config.endpoint.ends_with("v1/completions")) {
160
0
            static constexpr std::string_view legacy_suffix = "v1/completions";
161
0
            config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
162
0
                                    legacy_suffix.size(), "v1/chat/completions");
163
0
        }
164
10
    }
_ZN5doris10AIFunctionINS_16FunctionAIFilterEE18normalize_endpointERNS_11TAIResourceE
Line
Count
Source
127
46
    static void normalize_endpoint(TAIResource& config) {
128
        // 1. If users configure only the version root like `.../v1` or `.../v1beta`, append
129
        //    `models/<model>:batchEmbedContents` for `embed`, and `models/<model>:generateContent`
130
        //    for other AI scalar functions.
131
        // 2. `:embedContent` -> `:batchEmbedContents`
132
46
        if (iequal(config.provider_type, "GEMINI")) {
133
0
            if (iequal(Derived::name, "embed") && config.endpoint.ends_with(":embedContent")) {
134
0
                static constexpr std::string_view legacy_suffix = ":embedContent";
135
0
                config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
136
0
                                        legacy_suffix.size(), ":batchEmbedContents");
137
0
                return;
138
0
            }
139
140
0
            if (!config.endpoint.ends_with("v1") && !config.endpoint.ends_with("v1beta")) {
141
0
                return;
142
0
            }
143
144
0
            std::string model_name = config.model_name;
145
0
            if (!model_name.starts_with("models/")) {
146
0
                model_name = "models/" + model_name;
147
0
            }
148
149
0
            config.endpoint += "/";
150
0
            config.endpoint += model_name;
151
0
            config.endpoint +=
152
0
                    iequal(Derived::name, "embed") ? ":batchEmbedContents" : ":generateContent";
153
0
            return;
154
0
        }
155
156
        // The endpoint `v1/completions` does not support `system_prompt`.
157
        // To ensure a clear structure and stable AI results.
158
        // Convert from `v1/completions` to `v1/chat/completions`
159
46
        if (config.endpoint.ends_with("v1/completions")) {
160
0
            static constexpr std::string_view legacy_suffix = "v1/completions";
161
0
            config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
162
0
                                    legacy_suffix.size(), "v1/chat/completions");
163
0
        }
164
46
    }
Unexecuted instantiation: _ZN5doris10AIFunctionINS_20FunctionAIFixGrammarEE18normalize_endpointERNS_11TAIResourceE
_ZN5doris10AIFunctionINS_19FunctionAISentimentEE18normalize_endpointERNS_11TAIResourceE
Line
Count
Source
127
7
    static void normalize_endpoint(TAIResource& config) {
128
        // 1. If users configure only the version root like `.../v1` or `.../v1beta`, append
129
        //    `models/<model>:batchEmbedContents` for `embed`, and `models/<model>:generateContent`
130
        //    for other AI scalar functions.
131
        // 2. `:embedContent` -> `:batchEmbedContents`
132
7
        if (iequal(config.provider_type, "GEMINI")) {
133
2
            if (iequal(Derived::name, "embed") && config.endpoint.ends_with(":embedContent")) {
134
0
                static constexpr std::string_view legacy_suffix = ":embedContent";
135
0
                config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
136
0
                                        legacy_suffix.size(), ":batchEmbedContents");
137
0
                return;
138
0
            }
139
140
2
            if (!config.endpoint.ends_with("v1") && !config.endpoint.ends_with("v1beta")) {
141
1
                return;
142
1
            }
143
144
1
            std::string model_name = config.model_name;
145
1
            if (!model_name.starts_with("models/")) {
146
1
                model_name = "models/" + model_name;
147
1
            }
148
149
1
            config.endpoint += "/";
150
1
            config.endpoint += model_name;
151
1
            config.endpoint +=
152
1
                    iequal(Derived::name, "embed") ? ":batchEmbedContents" : ":generateContent";
153
1
            return;
154
2
        }
155
156
        // The endpoint `v1/completions` does not support `system_prompt`.
157
        // To ensure a clear structure and stable AI results.
158
        // Convert from `v1/completions` to `v1/chat/completions`
159
5
        if (config.endpoint.ends_with("v1/completions")) {
160
1
            static constexpr std::string_view legacy_suffix = "v1/completions";
161
1
            config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
162
1
                                    legacy_suffix.size(), "v1/chat/completions");
163
1
        }
164
5
    }
Unexecuted instantiation: _ZN5doris10AIFunctionINS_19FunctionAISummarizeEE18normalize_endpointERNS_11TAIResourceE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_18FunctionAIClassifyEE18normalize_endpointERNS_11TAIResourceE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_17FunctionAIExtractEE18normalize_endpointERNS_11TAIResourceE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_18FunctionAIGenerateEE18normalize_endpointERNS_11TAIResourceE
Unexecuted instantiation: _ZN5doris10AIFunctionINS_14FunctionAIMaskEE18normalize_endpointERNS_11TAIResourceE
_ZN5doris10AIFunctionINS_20FunctionAISimilarityEE18normalize_endpointERNS_11TAIResourceE
Line
Count
Source
127
21
    static void normalize_endpoint(TAIResource& config) {
128
        // 1. If users configure only the version root like `.../v1` or `.../v1beta`, append
129
        //    `models/<model>:batchEmbedContents` for `embed`, and `models/<model>:generateContent`
130
        //    for other AI scalar functions.
131
        // 2. `:embedContent` -> `:batchEmbedContents`
132
21
        if (iequal(config.provider_type, "GEMINI")) {
133
0
            if (iequal(Derived::name, "embed") && config.endpoint.ends_with(":embedContent")) {
134
0
                static constexpr std::string_view legacy_suffix = ":embedContent";
135
0
                config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
136
0
                                        legacy_suffix.size(), ":batchEmbedContents");
137
0
                return;
138
0
            }
139
140
0
            if (!config.endpoint.ends_with("v1") && !config.endpoint.ends_with("v1beta")) {
141
0
                return;
142
0
            }
143
144
0
            std::string model_name = config.model_name;
145
0
            if (!model_name.starts_with("models/")) {
146
0
                model_name = "models/" + model_name;
147
0
            }
148
149
0
            config.endpoint += "/";
150
0
            config.endpoint += model_name;
151
0
            config.endpoint +=
152
0
                    iequal(Derived::name, "embed") ? ":batchEmbedContents" : ":generateContent";
153
0
            return;
154
0
        }
155
156
        // The endpoint `v1/completions` does not support `system_prompt`.
157
        // To ensure a clear structure and stable AI results.
158
        // Convert from `v1/completions` to `v1/chat/completions`
159
21
        if (config.endpoint.ends_with("v1/completions")) {
160
0
            static constexpr std::string_view legacy_suffix = "v1/completions";
161
0
            config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(),
162
0
                                    legacy_suffix.size(), "v1/chat/completions");
163
0
        }
164
21
    }
Unexecuted instantiation: _ZN5doris10AIFunctionINS_19FunctionAITranslateEE18normalize_endpointERNS_11TAIResourceE
165
166
    // Executes one HTTP POST request and validates transport-level success.
167
    Status do_send_request(HttpClient* client, const std::string& request_body,
168
                           std::string& response, const TAIResource& config,
169
3
                           std::shared_ptr<AIAdapter>& adapter, FunctionContext* context) const {
170
3
        RETURN_IF_ERROR(client->init(config.endpoint, false));
171
172
3
        QueryContext* query_ctx = context->state()->get_query_ctx();
173
3
        int64_t remaining_query_time = query_ctx->get_remaining_query_time_seconds();
174
3
        if (remaining_query_time <= 0) {
175
0
            return Status::TimedOut("Query timeout exceeded before AI request");
176
0
        }
177
178
3
        client->set_timeout_ms(remaining_query_time * 1000);
179
180
3
        if (!config.api_key.empty()) {
181
3
            RETURN_IF_ERROR(adapter->set_authentication(client));
182
3
        }
183
184
3
        Status st = client->execute_post_request(request_body, &response);
185
3
        long http_status = client->get_http_status();
186
187
3
        if (!st.ok()) {
188
1
            LOG(INFO) << "AI HTTP request failed before status validation, provider="
189
1
                      << config.provider_type << ", model=" << config.model_name
190
1
                      << ", endpoint=" << mask_token(config.endpoint)
191
1
                      << ", exec_status=" << st.to_string() << ", response_body=" << response;
192
1
            return st;
193
1
        }
194
2
        if (http_status != 200) {
195
1
            return Status::HttpError(
196
1
                    "http status code is not 200, code={}, url={}, response_body={}", http_status,
197
1
                    mask_token(config.endpoint), response);
198
1
        }
199
1
        return Status::OK();
200
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Line
Count
Source
169
3
                           std::shared_ptr<AIAdapter>& adapter, FunctionContext* context) const {
170
3
        RETURN_IF_ERROR(client->init(config.endpoint, false));
171
172
3
        QueryContext* query_ctx = context->state()->get_query_ctx();
173
3
        int64_t remaining_query_time = query_ctx->get_remaining_query_time_seconds();
174
3
        if (remaining_query_time <= 0) {
175
0
            return Status::TimedOut("Query timeout exceeded before AI request");
176
0
        }
177
178
3
        client->set_timeout_ms(remaining_query_time * 1000);
179
180
3
        if (!config.api_key.empty()) {
181
3
            RETURN_IF_ERROR(adapter->set_authentication(client));
182
3
        }
183
184
3
        Status st = client->execute_post_request(request_body, &response);
185
3
        long http_status = client->get_http_status();
186
187
3
        if (!st.ok()) {
188
1
            LOG(INFO) << "AI HTTP request failed before status validation, provider="
189
1
                      << config.provider_type << ", model=" << config.model_name
190
1
                      << ", endpoint=" << mask_token(config.endpoint)
191
1
                      << ", exec_status=" << st.to_string() << ", response_body=" << response;
192
1
            return st;
193
1
        }
194
2
        if (http_status != 200) {
195
1
            return Status::HttpError(
196
1
                    "http status code is not 200, code={}, url={}, response_body={}", http_status,
197
1
                    mask_token(config.endpoint), response);
198
1
        }
199
1
        return Status::OK();
200
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
201
202
    // Sends the request with retry mechanism for handling transient failures
203
    Status send_request_to_llm(const std::string& request_body, std::string& response,
204
                               const TAIResource& config, std::shared_ptr<AIAdapter>& adapter,
205
0
                               FunctionContext* context) const {
206
0
        return HttpClient::execute_with_retry(config.max_retries, config.retry_delay_second,
207
0
                                              [this, &request_body, &response, &config, &adapter,
208
0
                                               context](HttpClient* client) -> Status {
209
0
                                                  return this->do_send_request(client, request_body,
210
0
                                                                               response, config,
211
0
                                                                               adapter, context);
212
0
                                              });
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_13FunctionEmbedEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_16FunctionAIFilterEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_19FunctionAISentimentEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_19FunctionAISummarizeEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_18FunctionAIClassifyEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_17FunctionAIExtractEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_18FunctionAIGenerateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_14FunctionAIMaskEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_20FunctionAISimilarityEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_19FunctionAITranslateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_
213
0
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
214
215
    // Provider-reusable helper for string-returning functions.
216
    // Estimates one batch entry size using the raw prompt length plus the fixed JSON wrapper cost.
217
89
    size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const {
218
89
        static constexpr size_t json_wrapper_size = 20;
219
89
        return prompt.size() + std::to_string(idx).size() + json_wrapper_size;
220
89
    }
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
217
62
    size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const {
218
62
        static constexpr size_t json_wrapper_size = 20;
219
62
        return prompt.size() + std::to_string(idx).size() + json_wrapper_size;
220
62
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
217
4
    size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const {
218
4
        static constexpr size_t json_wrapper_size = 20;
219
4
        return prompt.size() + std::to_string(idx).size() + json_wrapper_size;
220
4
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
217
23
    size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const {
218
23
        static constexpr size_t json_wrapper_size = 20;
219
23
        return prompt.size() + std::to_string(idx).size() + json_wrapper_size;
220
23
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
221
222
    // Provider-reusable helper for string-returning functions.
223
    // Executes one batch request and parses the provider result into one string per input row.
224
    Status execute_batch_request(const std::vector<std::string>& batch_prompts,
225
                                 std::vector<std::string>& results, const TAIResource& config,
226
                                 std::shared_ptr<AIAdapter>& adapter,
227
0
                                 FunctionContext* context) const {
228
#ifdef BE_TEST
229
        const char* test_result = std::getenv("AI_TEST_RESULT");
230
        if (test_result != nullptr) {
231
            std::vector<std::string> parsed_test_response;
232
            RETURN_IF_ERROR(
233
                    adapter->parse_response(std::string(test_result), parsed_test_response));
234
            if (parsed_test_response.empty()) {
235
                return Status::InternalError("AI returned empty result");
236
            }
237
            if (parsed_test_response.size() != batch_prompts.size()) {
238
                return Status::RuntimeError(
239
                        "Failed to parse {} batch result, expected {} items but got {}", get_name(),
240
                        batch_prompts.size(), parsed_test_response.size());
241
            }
242
            results = std::move(parsed_test_response);
243
            return Status::OK();
244
        }
245
        if (config.provider_type == "MOCK") {
246
            results.clear();
247
            results.reserve(batch_prompts.size());
248
            for (const auto& prompt : batch_prompts) {
249
                results.emplace_back("this is a mock response. " + prompt);
250
            }
251
            return Status::OK();
252
        }
253
#endif
254
255
0
        std::string batch_prompt;
256
0
        RETURN_IF_ERROR(build_batch_prompt(batch_prompts, batch_prompt));
257
258
0
        std::vector<std::string> inputs = {batch_prompt};
259
0
        std::vector<std::string> parsed_response;
260
261
0
        std::string request_body;
262
0
        RETURN_IF_ERROR(adapter->build_request_payload(
263
0
                inputs, assert_cast<const Derived&>(*this).system_prompt, request_body));
264
265
0
        std::string response;
266
0
        RETURN_IF_ERROR(send_request_to_llm(request_body, response, config, adapter, context));
267
0
        RETURN_IF_ERROR(adapter->parse_response(response, parsed_response));
268
0
        if (parsed_response.empty()) {
269
0
            return Status::InternalError("AI returned empty result");
270
0
        }
271
0
        if (parsed_response.size() != batch_prompts.size()) {
272
0
            LOG(WARNING) << "AI batch result size mismatch, function=" << get_name()
273
0
                         << ", provider=" << config.provider_type << ", model=" << config.model_name
274
0
                         << ", expected_rows=" << batch_prompts.size()
275
0
                         << ", actual_rows=" << parsed_response.size()
276
0
                         << ", response_body=" << response;
277
0
            return Status::RuntimeError(
278
0
                    "Failed to parse {} batch result, expected {} items but got {}", get_name(),
279
0
                    batch_prompts.size(), parsed_response.size());
280
0
        }
281
0
        results = std::move(parsed_response);
282
0
        return Status::OK();
283
0
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE
284
285
    // Provider-reusable helper for string-returning functions.
286
    // Runs the common batch execution flow; derived classes only need to define how one batch of
287
    // string results is inserted into the final output column.
288
    Status execute_batched_prompts(FunctionContext* context, Block& block,
289
                                   const ColumnNumbers& arguments, size_t input_rows_count,
290
                                   const TAIResource& config, std::shared_ptr<AIAdapter>& adapter,
291
69
                                   IColumn& col_result) const {
292
69
        std::vector<std::string> batch_prompts;
293
69
        size_t current_batch_size = 2; // []
294
69
        const size_t max_batch_prompt_size =
295
69
                static_cast<size_t>(get_ai_context_window_size(context));
296
297
158
        for (size_t i = 0; i < input_rows_count; ++i) {
298
89
            std::string prompt;
299
89
            RETURN_IF_ERROR(
300
89
                    assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt));
301
302
89
            size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt);
303
89
            if (entry_size > max_batch_prompt_size) {
304
4
                if (!batch_prompts.empty()) {
305
1
                    std::vector<std::string> batch_results;
306
1
                    RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results,
307
1
                                                                config, adapter, context));
308
1
                    RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
309
1
                            batch_results, col_result));
310
1
                    batch_prompts.clear();
311
1
                    current_batch_size = 2;
312
1
                }
313
314
4
                std::vector<std::string> single_prompts;
315
4
                single_prompts.emplace_back(std::move(prompt));
316
4
                std::vector<std::string> single_results;
317
4
                RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config,
318
4
                                                            adapter, context));
319
4
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
320
4
                        single_results, col_result));
321
4
                continue;
322
4
            }
323
324
85
            size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1);
325
85
            if (!batch_prompts.empty() &&
326
85
                current_batch_size + additional_size > max_batch_prompt_size) {
327
2
                std::vector<std::string> batch_results;
328
2
                RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
329
2
                                                            adapter, context));
330
2
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
331
2
                        batch_results, col_result));
332
2
                batch_prompts.clear();
333
2
                current_batch_size = 2;
334
2
                additional_size = entry_size;
335
2
            }
336
337
85
            batch_prompts.emplace_back(std::move(prompt));
338
85
            current_batch_size += additional_size;
339
85
        }
340
341
69
        if (!batch_prompts.empty()) {
342
66
            std::vector<std::string> batch_results;
343
66
            RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
344
66
                                                        adapter, context));
345
60
            RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results,
346
60
                                                                                    col_result));
347
60
        }
348
36
        return Status::OK();
349
69
    }
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Line
Count
Source
291
46
                                   IColumn& col_result) const {
292
46
        std::vector<std::string> batch_prompts;
293
46
        size_t current_batch_size = 2; // []
294
46
        const size_t max_batch_prompt_size =
295
46
                static_cast<size_t>(get_ai_context_window_size(context));
296
297
108
        for (size_t i = 0; i < input_rows_count; ++i) {
298
62
            std::string prompt;
299
62
            RETURN_IF_ERROR(
300
62
                    assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt));
301
302
62
            size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt);
303
62
            if (entry_size > max_batch_prompt_size) {
304
4
                if (!batch_prompts.empty()) {
305
1
                    std::vector<std::string> batch_results;
306
1
                    RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results,
307
1
                                                                config, adapter, context));
308
1
                    RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
309
1
                            batch_results, col_result));
310
1
                    batch_prompts.clear();
311
1
                    current_batch_size = 2;
312
1
                }
313
314
4
                std::vector<std::string> single_prompts;
315
4
                single_prompts.emplace_back(std::move(prompt));
316
4
                std::vector<std::string> single_results;
317
4
                RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config,
318
4
                                                            adapter, context));
319
4
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
320
4
                        single_results, col_result));
321
4
                continue;
322
4
            }
323
324
58
            size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1);
325
58
            if (!batch_prompts.empty() &&
326
58
                current_batch_size + additional_size > max_batch_prompt_size) {
327
2
                std::vector<std::string> batch_results;
328
2
                RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
329
2
                                                            adapter, context));
330
2
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
331
2
                        batch_results, col_result));
332
2
                batch_prompts.clear();
333
2
                current_batch_size = 2;
334
2
                additional_size = entry_size;
335
2
            }
336
337
58
            batch_prompts.emplace_back(std::move(prompt));
338
58
            current_batch_size += additional_size;
339
58
        }
340
341
46
        if (!batch_prompts.empty()) {
342
43
            std::vector<std::string> batch_results;
343
43
            RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
344
43
                                                        adapter, context));
345
37
            RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results,
346
37
                                                                                    col_result));
347
37
        }
348
19
        return Status::OK();
349
46
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Line
Count
Source
291
2
                                   IColumn& col_result) const {
292
2
        std::vector<std::string> batch_prompts;
293
2
        size_t current_batch_size = 2; // []
294
2
        const size_t max_batch_prompt_size =
295
2
                static_cast<size_t>(get_ai_context_window_size(context));
296
297
6
        for (size_t i = 0; i < input_rows_count; ++i) {
298
4
            std::string prompt;
299
4
            RETURN_IF_ERROR(
300
4
                    assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt));
301
302
4
            size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt);
303
4
            if (entry_size > max_batch_prompt_size) {
304
0
                if (!batch_prompts.empty()) {
305
0
                    std::vector<std::string> batch_results;
306
0
                    RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results,
307
0
                                                                config, adapter, context));
308
0
                    RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
309
0
                            batch_results, col_result));
310
0
                    batch_prompts.clear();
311
0
                    current_batch_size = 2;
312
0
                }
313
314
0
                std::vector<std::string> single_prompts;
315
0
                single_prompts.emplace_back(std::move(prompt));
316
0
                std::vector<std::string> single_results;
317
0
                RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config,
318
0
                                                            adapter, context));
319
0
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
320
0
                        single_results, col_result));
321
0
                continue;
322
0
            }
323
324
4
            size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1);
325
4
            if (!batch_prompts.empty() &&
326
4
                current_batch_size + additional_size > max_batch_prompt_size) {
327
0
                std::vector<std::string> batch_results;
328
0
                RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
329
0
                                                            adapter, context));
330
0
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
331
0
                        batch_results, col_result));
332
0
                batch_prompts.clear();
333
0
                current_batch_size = 2;
334
0
                additional_size = entry_size;
335
0
            }
336
337
4
            batch_prompts.emplace_back(std::move(prompt));
338
4
            current_batch_size += additional_size;
339
4
        }
340
341
2
        if (!batch_prompts.empty()) {
342
2
            std::vector<std::string> batch_results;
343
2
            RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
344
2
                                                        adapter, context));
345
2
            RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results,
346
2
                                                                                    col_result));
347
2
        }
348
2
        return Status::OK();
349
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
Line
Count
Source
291
21
                                   IColumn& col_result) const {
292
21
        std::vector<std::string> batch_prompts;
293
21
        size_t current_batch_size = 2; // []
294
21
        const size_t max_batch_prompt_size =
295
21
                static_cast<size_t>(get_ai_context_window_size(context));
296
297
44
        for (size_t i = 0; i < input_rows_count; ++i) {
298
23
            std::string prompt;
299
23
            RETURN_IF_ERROR(
300
23
                    assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt));
301
302
23
            size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt);
303
23
            if (entry_size > max_batch_prompt_size) {
304
0
                if (!batch_prompts.empty()) {
305
0
                    std::vector<std::string> batch_results;
306
0
                    RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results,
307
0
                                                                config, adapter, context));
308
0
                    RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
309
0
                            batch_results, col_result));
310
0
                    batch_prompts.clear();
311
0
                    current_batch_size = 2;
312
0
                }
313
314
0
                std::vector<std::string> single_prompts;
315
0
                single_prompts.emplace_back(std::move(prompt));
316
0
                std::vector<std::string> single_results;
317
0
                RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config,
318
0
                                                            adapter, context));
319
0
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
320
0
                        single_results, col_result));
321
0
                continue;
322
0
            }
323
324
23
            size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1);
325
23
            if (!batch_prompts.empty() &&
326
23
                current_batch_size + additional_size > max_batch_prompt_size) {
327
0
                std::vector<std::string> batch_results;
328
0
                RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
329
0
                                                            adapter, context));
330
0
                RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(
331
0
                        batch_results, col_result));
332
0
                batch_prompts.clear();
333
0
                current_batch_size = 2;
334
0
                additional_size = entry_size;
335
0
            }
336
337
23
            batch_prompts.emplace_back(std::move(prompt));
338
23
            current_batch_size += additional_size;
339
23
        }
340
341
21
        if (!batch_prompts.empty()) {
342
21
            std::vector<std::string> batch_results;
343
21
            RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config,
344
21
                                                        adapter, context));
345
21
            RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results,
346
21
                                                                                    col_result));
347
21
        }
348
15
        return Status::OK();
349
21
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE
350
351
private:
352
    // The ai resource must be literal
353
    Status _init_from_resource(FunctionContext* context, const Block& block,
354
                               const ColumnNumbers& arguments, TAIResource& config,
355
79
                               std::shared_ptr<AIAdapter>& adapter) const {
356
79
        const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]);
357
79
        StringRef resource_name_ref = resource_column.column->get_data_at(0);
358
79
        std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size);
359
360
79
        const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources =
361
79
                context->state()->get_query_ctx()->get_ai_resources();
362
79
        DORIS_CHECK(ai_resources);
363
79
        auto it = ai_resources->find(resource_name);
364
79
        DORIS_CHECK(it != ai_resources->end());
365
79
        config = it->second;
366
367
79
        normalize_endpoint(config);
368
369
79
        adapter = AIAdapterFactory::create_adapter(config.provider_type);
370
79
        DORIS_CHECK(adapter);
371
372
79
        adapter->init(config);
373
79
        return Status::OK();
374
79
    }
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
355
10
                               std::shared_ptr<AIAdapter>& adapter) const {
356
10
        const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]);
357
10
        StringRef resource_name_ref = resource_column.column->get_data_at(0);
358
10
        std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size);
359
360
10
        const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources =
361
10
                context->state()->get_query_ctx()->get_ai_resources();
362
10
        DORIS_CHECK(ai_resources);
363
10
        auto it = ai_resources->find(resource_name);
364
10
        DORIS_CHECK(it != ai_resources->end());
365
10
        config = it->second;
366
367
10
        normalize_endpoint(config);
368
369
10
        adapter = AIAdapterFactory::create_adapter(config.provider_type);
370
10
        DORIS_CHECK(adapter);
371
372
10
        adapter->init(config);
373
10
        return Status::OK();
374
10
    }
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
355
46
                               std::shared_ptr<AIAdapter>& adapter) const {
356
46
        const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]);
357
46
        StringRef resource_name_ref = resource_column.column->get_data_at(0);
358
46
        std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size);
359
360
46
        const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources =
361
46
                context->state()->get_query_ctx()->get_ai_resources();
362
46
        DORIS_CHECK(ai_resources);
363
46
        auto it = ai_resources->find(resource_name);
364
46
        DORIS_CHECK(it != ai_resources->end());
365
46
        config = it->second;
366
367
46
        normalize_endpoint(config);
368
369
46
        adapter = AIAdapterFactory::create_adapter(config.provider_type);
370
46
        DORIS_CHECK(adapter);
371
372
46
        adapter->init(config);
373
46
        return Status::OK();
374
46
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
355
2
                               std::shared_ptr<AIAdapter>& adapter) const {
356
2
        const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]);
357
2
        StringRef resource_name_ref = resource_column.column->get_data_at(0);
358
2
        std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size);
359
360
2
        const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources =
361
2
                context->state()->get_query_ctx()->get_ai_resources();
362
2
        DORIS_CHECK(ai_resources);
363
2
        auto it = ai_resources->find(resource_name);
364
2
        DORIS_CHECK(it != ai_resources->end());
365
2
        config = it->second;
366
367
2
        normalize_endpoint(config);
368
369
2
        adapter = AIAdapterFactory::create_adapter(config.provider_type);
370
2
        DORIS_CHECK(adapter);
371
372
2
        adapter->init(config);
373
2
        return Status::OK();
374
2
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
Line
Count
Source
355
21
                               std::shared_ptr<AIAdapter>& adapter) const {
356
21
        const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]);
357
21
        StringRef resource_name_ref = resource_column.column->get_data_at(0);
358
21
        std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size);
359
360
21
        const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources =
361
21
                context->state()->get_query_ctx()->get_ai_resources();
362
21
        DORIS_CHECK(ai_resources);
363
21
        auto it = ai_resources->find(resource_name);
364
21
        DORIS_CHECK(it != ai_resources->end());
365
21
        config = it->second;
366
367
21
        normalize_endpoint(config);
368
369
21
        adapter = AIAdapterFactory::create_adapter(config.provider_type);
370
21
        DORIS_CHECK(adapter);
371
372
21
        adapter->init(config);
373
21
        return Status::OK();
374
21
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE
375
376
    // Serializes one text batch into the shared JSON-array prompt format consumed by LLM
377
    // providers for batch string functions.
378
    Status build_batch_prompt(const std::vector<std::string>& batch_prompts,
379
0
                              std::string& prompt) const {
380
0
        rapidjson::StringBuffer buffer;
381
0
        rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
382
383
0
        writer.StartArray();
384
0
        for (size_t i = 0; i < batch_prompts.size(); ++i) {
385
0
            writer.StartObject();
386
0
            writer.Key("idx");
387
0
            writer.Uint64(i);
388
0
            writer.Key("input");
389
0
            writer.String(batch_prompts[i].data(),
390
0
                          static_cast<rapidjson::SizeType>(batch_prompts[i].size()));
391
0
            writer.EndObject();
392
0
        }
393
0
        writer.EndArray();
394
395
0
        prompt = buffer.GetString();
396
0
        return Status::OK();
397
0
    }
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_
398
};
399
400
} // namespace doris