Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <gen_cpp/FrontendService.h> |
21 | | #include <gen_cpp/PaloInternalService_types.h> |
22 | | #include <glog/logging.h> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <cstdlib> |
26 | | #include <memory> |
27 | | #include <string> |
28 | | #include <type_traits> |
29 | | #include <utility> |
30 | | #include <vector> |
31 | | |
32 | | #include "common/config.h" |
33 | | #include "common/status.h" |
34 | | #include "core/column/column_array.h" |
35 | | #include "core/column/column_const.h" |
36 | | #include "core/column/column_nullable.h" |
37 | | #include "core/cow.h" |
38 | | #include "core/data_type/data_type_array.h" |
39 | | #include "core/data_type/data_type_number.h" |
40 | | #include "core/data_type/define_primitive_type.h" |
41 | | #include "core/data_type/primitive_type.h" |
42 | | #include "exprs/function/ai/ai_adapter.h" |
43 | | #include "exprs/function/function.h" |
44 | | #include "runtime/query_context.h" |
45 | | #include "runtime/runtime_state.h" |
46 | | #include "service/http/http_client.h" |
47 | | #include "util/security.h" |
48 | | #include "util/string_util.h" |
49 | | #include "util/threadpool.h" |
50 | | |
51 | | namespace doris { |
52 | | |
53 | | // Base class for AI-based functions |
54 | | template <typename Derived> |
55 | | class AIFunction : public IFunction { |
56 | | public: |
57 | | static constexpr size_t max_batch_prompt_size = 128 * 1024; |
58 | | |
59 | 16 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; }_ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_17FunctionAIExtractEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_19FunctionAISentimentEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_14FunctionAIMaskEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_19FunctionAITranslateEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 6 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE8get_nameB5cxx11Ev Line | Count | Source | 59 | 1 | std::string get_name() const override { return assert_cast<const Derived&>(*this).name; } |
|
60 | | |
61 | | // If the user doesn't provide the first arg, `resource_name` |
62 | | // FE will add the `resource_name` to the arguments list using the Session Variable. |
63 | | // So the value here should be the maximum number that the function can accept. |
64 | 0 | size_t get_number_of_arguments() const override { |
65 | 0 | return assert_cast<const Derived&>(*this).number_of_arguments; |
66 | 0 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE23get_number_of_argumentsEv |
67 | | |
68 | 0 | bool is_blockable() const override { return true; }Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE12is_blockableEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE12is_blockableEv |
69 | | |
70 | | virtual Status build_prompt(const Block& block, const ColumnNumbers& arguments, size_t row_num, |
71 | 73 | std::string& prompt) const { |
72 | 73 | const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]); |
73 | 73 | StringRef text_ref = text_column.column->get_data_at(row_num); |
74 | 73 | prompt = std::string(text_ref.data, text_ref.size); |
75 | | |
76 | 73 | return Status::OK(); |
77 | 73 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 71 | 5 | std::string& prompt) const { | 72 | 5 | const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]); | 73 | 5 | StringRef text_ref = text_column.column->get_data_at(row_num); | 74 | 5 | prompt = std::string(text_ref.data, text_ref.size); | 75 | | | 76 | 5 | return Status::OK(); | 77 | 5 | } |
_ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 71 | 1 | std::string& prompt) const { | 72 | 1 | const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]); | 73 | 1 | StringRef text_ref = text_column.column->get_data_at(row_num); | 74 | 1 | prompt = std::string(text_ref.data, text_ref.size); | 75 | | | 76 | 1 | return Status::OK(); | 77 | 1 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 71 | 1 | std::string& prompt) const { | 72 | 1 | const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]); | 73 | 1 | StringRef text_ref = text_column.column->get_data_at(row_num); | 74 | 1 | prompt = std::string(text_ref.data, text_ref.size); | 75 | | | 76 | 1 | return Status::OK(); | 77 | 1 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 71 | 59 | std::string& prompt) const { | 72 | 59 | const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]); | 73 | 59 | StringRef text_ref = text_column.column->get_data_at(row_num); | 74 | 59 | prompt = std::string(text_ref.data, text_ref.size); | 75 | | | 76 | 59 | return Status::OK(); | 77 | 59 | } |
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE12build_promptERKNS_5BlockERKSt6vectorIjSaIjEEmRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 71 | 7 | std::string& prompt) const { | 72 | 7 | const ColumnWithTypeAndName& text_column = block.get_by_position(arguments[1]); | 73 | 7 | StringRef text_ref = text_column.column->get_data_at(row_num); | 74 | 7 | prompt = std::string(text_ref.data, text_ref.size); | 75 | | | 76 | 7 | return Status::OK(); | 77 | 7 | } |
|
78 | | |
79 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
80 | 79 | uint32_t result, size_t input_rows_count) const override { |
81 | 79 | TAIResource config; |
82 | 79 | std::shared_ptr<AIAdapter> adapter; |
83 | 79 | if (Status status = this->_init_from_resource(context, block, arguments, config, adapter); |
84 | 79 | !status.ok()) { |
85 | 2 | return status; |
86 | 2 | } |
87 | | |
88 | 77 | return assert_cast<const Derived&>(*this).execute_with_adapter( |
89 | 77 | context, block, arguments, result, input_rows_count, config, adapter); |
90 | 79 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 80 | 4 | uint32_t result, size_t input_rows_count) const override { | 81 | 4 | TAIResource config; | 82 | 4 | std::shared_ptr<AIAdapter> adapter; | 83 | 4 | if (Status status = this->_init_from_resource(context, block, arguments, config, adapter); | 84 | 4 | !status.ok()) { | 85 | 2 | return status; | 86 | 2 | } | 87 | | | 88 | 2 | return assert_cast<const Derived&>(*this).execute_with_adapter( | 89 | 2 | context, block, arguments, result, input_rows_count, config, adapter); | 90 | 4 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 80 | 21 | uint32_t result, size_t input_rows_count) const override { | 81 | 21 | TAIResource config; | 82 | 21 | std::shared_ptr<AIAdapter> adapter; | 83 | 21 | if (Status status = this->_init_from_resource(context, block, arguments, config, adapter); | 84 | 21 | !status.ok()) { | 85 | 0 | return status; | 86 | 0 | } | 87 | | | 88 | 21 | return assert_cast<const Derived&>(*this).execute_with_adapter( | 89 | 21 | context, block, arguments, result, input_rows_count, config, adapter); | 90 | 21 | } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 80 | 44 | uint32_t result, size_t input_rows_count) const override { | 81 | 44 | TAIResource config; | 82 | 44 | std::shared_ptr<AIAdapter> adapter; | 83 | 44 | if (Status status = this->_init_from_resource(context, block, arguments, config, adapter); | 84 | 44 | !status.ok()) { | 85 | 0 | return status; | 86 | 0 | } | 87 | | | 88 | 44 | return assert_cast<const Derived&>(*this).execute_with_adapter( | 89 | 44 | context, block, arguments, result, input_rows_count, config, adapter); | 90 | 44 | } |
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 80 | 10 | uint32_t result, size_t input_rows_count) const override { | 81 | 10 | TAIResource config; | 82 | 10 | std::shared_ptr<AIAdapter> adapter; | 83 | 10 | if (Status status = this->_init_from_resource(context, block, arguments, config, adapter); | 84 | 10 | !status.ok()) { | 85 | 0 | return status; | 86 | 0 | } | 87 | | | 88 | 10 | return assert_cast<const Derived&>(*this).execute_with_adapter( | 89 | 10 | context, block, arguments, result, input_rows_count, config, adapter); | 90 | 10 | } |
|
91 | | |
92 | | protected: |
93 | | // Derived classes can override this method for non-text/default behavior. |
94 | | // The base implementation handles all string-input/string-output batchable functions. |
95 | | Status execute_with_adapter(FunctionContext* context, Block& block, |
96 | | const ColumnNumbers& arguments, uint32_t result, |
97 | | size_t input_rows_count, const TAIResource& config, |
98 | 67 | std::shared_ptr<AIAdapter>& adapter) const { |
99 | 67 | auto col_result = assert_cast<const Derived&>(*this).create_result_column(); |
100 | 67 | RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config, |
101 | 67 | adapter, *col_result)); |
102 | | |
103 | 34 | block.replace_by_position(result, std::move(col_result)); |
104 | 34 | return Status::OK(); |
105 | 67 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 98 | 2 | std::shared_ptr<AIAdapter>& adapter) const { | 99 | 2 | auto col_result = assert_cast<const Derived&>(*this).create_result_column(); | 100 | 2 | RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config, | 101 | 2 | adapter, *col_result)); | 102 | | | 103 | 2 | block.replace_by_position(result, std::move(col_result)); | 104 | 2 | return Status::OK(); | 105 | 2 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 98 | 21 | std::shared_ptr<AIAdapter>& adapter) const { | 99 | 21 | auto col_result = assert_cast<const Derived&>(*this).create_result_column(); | 100 | 21 | RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config, | 101 | 21 | adapter, *col_result)); | 102 | | | 103 | 15 | block.replace_by_position(result, std::move(col_result)); | 104 | 15 | return Status::OK(); | 105 | 21 | } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE20execute_with_adapterEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 98 | 44 | std::shared_ptr<AIAdapter>& adapter) const { | 99 | 44 | auto col_result = assert_cast<const Derived&>(*this).create_result_column(); | 100 | 44 | RETURN_IF_ERROR(execute_batched_prompts(context, block, arguments, input_rows_count, config, | 101 | 44 | adapter, *col_result)); | 102 | | | 103 | 17 | block.replace_by_position(result, std::move(col_result)); | 104 | 17 | return Status::OK(); | 105 | 44 | } |
|
106 | | |
107 | 2 | MutableColumnPtr create_result_column() const { return ColumnString::create(); }Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE20create_result_columnEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE20create_result_columnEv _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE20create_result_columnEv Line | Count | Source | 107 | 2 | MutableColumnPtr create_result_column() const { return ColumnString::create(); } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE20create_result_columnEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE20create_result_columnEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE20create_result_columnEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE20create_result_columnEv Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE20create_result_columnEv |
108 | | |
109 | | // Provider-reusable hook for AI functions(string) -> string. |
110 | | Status append_batch_results(const std::vector<std::string>& batch_results, |
111 | 2 | IColumn& col_result) const { |
112 | 2 | auto& string_col = assert_cast<ColumnString&>(col_result); |
113 | 4 | for (const auto& batch_result : batch_results) { |
114 | 4 | string_col.insert_data(batch_result.data(), batch_result.size()); |
115 | 4 | } |
116 | 2 | return Status::OK(); |
117 | 2 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE Line | Count | Source | 111 | 2 | IColumn& col_result) const { | 112 | 2 | auto& string_col = assert_cast<ColumnString&>(col_result); | 113 | 4 | for (const auto& batch_result : batch_results) { | 114 | 4 | string_col.insert_data(batch_result.data(), batch_result.size()); | 115 | 4 | } | 116 | 2 | return Status::OK(); | 117 | 2 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE20append_batch_resultsERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERNS_7IColumnE |
118 | | |
119 | | // The endpoint `v1/completions` does not support `system_prompt`. |
120 | | // To ensure a clear structure and stable AI results. |
121 | | // Convert from `v1/completions` to `v1/chat/completions` |
122 | 80 | static void normalize_endpoint(TAIResource& config) { |
123 | 80 | if (config.endpoint.ends_with("v1/completions")) { |
124 | 1 | static constexpr std::string_view legacy_suffix = "v1/completions"; |
125 | 1 | config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(), |
126 | 1 | legacy_suffix.size(), "v1/chat/completions"); |
127 | 1 | } |
128 | 80 | } Unexecuted instantiation: _ZN5doris10AIFunctionINS_18FunctionAIClassifyEE18normalize_endpointERNS_11TAIResourceE Unexecuted instantiation: _ZN5doris10AIFunctionINS_17FunctionAIExtractEE18normalize_endpointERNS_11TAIResourceE _ZN5doris10AIFunctionINS_19FunctionAISentimentEE18normalize_endpointERNS_11TAIResourceE Line | Count | Source | 122 | 5 | static void normalize_endpoint(TAIResource& config) { | 123 | 5 | if (config.endpoint.ends_with("v1/completions")) { | 124 | 1 | static constexpr std::string_view legacy_suffix = "v1/completions"; | 125 | 1 | config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(), | 126 | 1 | legacy_suffix.size(), "v1/chat/completions"); | 127 | 1 | } | 128 | 5 | } |
Unexecuted instantiation: _ZN5doris10AIFunctionINS_19FunctionAISummarizeEE18normalize_endpointERNS_11TAIResourceE Unexecuted instantiation: _ZN5doris10AIFunctionINS_14FunctionAIMaskEE18normalize_endpointERNS_11TAIResourceE Unexecuted instantiation: _ZN5doris10AIFunctionINS_18FunctionAIGenerateEE18normalize_endpointERNS_11TAIResourceE Unexecuted instantiation: _ZN5doris10AIFunctionINS_20FunctionAIFixGrammarEE18normalize_endpointERNS_11TAIResourceE Unexecuted instantiation: _ZN5doris10AIFunctionINS_19FunctionAITranslateEE18normalize_endpointERNS_11TAIResourceE _ZN5doris10AIFunctionINS_20FunctionAISimilarityEE18normalize_endpointERNS_11TAIResourceE Line | Count | Source | 122 | 21 | static void normalize_endpoint(TAIResource& config) { | 123 | 21 | if (config.endpoint.ends_with("v1/completions")) { | 124 | 0 | static constexpr std::string_view legacy_suffix = "v1/completions"; | 125 | 0 | config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(), | 126 | 0 | legacy_suffix.size(), "v1/chat/completions"); | 127 | 0 | } | 128 | 21 | } |
_ZN5doris10AIFunctionINS_16FunctionAIFilterEE18normalize_endpointERNS_11TAIResourceE Line | Count | Source | 122 | 44 | static void normalize_endpoint(TAIResource& config) { | 123 | 44 | if (config.endpoint.ends_with("v1/completions")) { | 124 | 0 | static constexpr std::string_view legacy_suffix = "v1/completions"; | 125 | 0 | config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(), | 126 | 0 | legacy_suffix.size(), "v1/chat/completions"); | 127 | 0 | } | 128 | 44 | } |
_ZN5doris10AIFunctionINS_13FunctionEmbedEE18normalize_endpointERNS_11TAIResourceE Line | Count | Source | 122 | 10 | static void normalize_endpoint(TAIResource& config) { | 123 | 10 | if (config.endpoint.ends_with("v1/completions")) { | 124 | 0 | static constexpr std::string_view legacy_suffix = "v1/completions"; | 125 | 0 | config.endpoint.replace(config.endpoint.size() - legacy_suffix.size(), | 126 | 0 | legacy_suffix.size(), "v1/chat/completions"); | 127 | 0 | } | 128 | 10 | } |
|
129 | | |
130 | | // Executes one HTTP POST request and validates transport-level success. |
131 | | Status do_send_request(HttpClient* client, const std::string& request_body, |
132 | | std::string& response, const TAIResource& config, |
133 | 3 | std::shared_ptr<AIAdapter>& adapter, FunctionContext* context) const { |
134 | 3 | RETURN_IF_ERROR(client->init(config.endpoint, false)); |
135 | | |
136 | 3 | QueryContext* query_ctx = context->state()->get_query_ctx(); |
137 | 3 | int64_t remaining_query_time = query_ctx->get_remaining_query_time_seconds(); |
138 | 3 | if (remaining_query_time <= 0) { |
139 | 0 | return Status::TimedOut("Query timeout exceeded before AI request"); |
140 | 0 | } |
141 | | |
142 | 3 | client->set_timeout_ms(remaining_query_time * 1000); |
143 | | |
144 | 3 | if (!config.api_key.empty()) { |
145 | 3 | RETURN_IF_ERROR(adapter->set_authentication(client)); |
146 | 3 | } |
147 | | |
148 | 3 | Status st = client->execute_post_request(request_body, &response); |
149 | 3 | long http_status = client->get_http_status(); |
150 | | |
151 | 3 | if (!st.ok()) { |
152 | 1 | LOG(INFO) << "AI HTTP request failed before status validation, provider=" |
153 | 1 | << config.provider_type << ", model=" << config.model_name |
154 | 1 | << ", endpoint=" << mask_token(config.endpoint) |
155 | 1 | << ", exec_status=" << st.to_string() << ", response_body=" << response; |
156 | 1 | return st; |
157 | 1 | } |
158 | 2 | if (http_status != 200) { |
159 | 1 | return Status::HttpError( |
160 | 1 | "http status code is not 200, code={}, url={}, response_body={}", http_status, |
161 | 1 | mask_token(config.endpoint), response); |
162 | 1 | } |
163 | 1 | return Status::OK(); |
164 | 2 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Line | Count | Source | 133 | 3 | std::shared_ptr<AIAdapter>& adapter, FunctionContext* context) const { | 134 | 3 | RETURN_IF_ERROR(client->init(config.endpoint, false)); | 135 | | | 136 | 3 | QueryContext* query_ctx = context->state()->get_query_ctx(); | 137 | 3 | int64_t remaining_query_time = query_ctx->get_remaining_query_time_seconds(); | 138 | 3 | if (remaining_query_time <= 0) { | 139 | 0 | return Status::TimedOut("Query timeout exceeded before AI request"); | 140 | 0 | } | 141 | | | 142 | 3 | client->set_timeout_ms(remaining_query_time * 1000); | 143 | | | 144 | 3 | if (!config.api_key.empty()) { | 145 | 3 | RETURN_IF_ERROR(adapter->set_authentication(client)); | 146 | 3 | } | 147 | | | 148 | 3 | Status st = client->execute_post_request(request_body, &response); | 149 | 3 | long http_status = client->get_http_status(); | 150 | | | 151 | 3 | if (!st.ok()) { | 152 | 1 | LOG(INFO) << "AI HTTP request failed before status validation, provider=" | 153 | 1 | << config.provider_type << ", model=" << config.model_name | 154 | 1 | << ", endpoint=" << mask_token(config.endpoint) | 155 | 1 | << ", exec_status=" << st.to_string() << ", response_body=" << response; | 156 | 1 | return st; | 157 | 1 | } | 158 | 2 | if (http_status != 200) { | 159 | 1 | return Status::HttpError( | 160 | 1 | "http status code is not 200, code={}, url={}, response_body={}", http_status, | 161 | 1 | mask_token(config.endpoint), response); | 162 | 1 | } | 163 | 1 | return Status::OK(); | 164 | 2 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE15do_send_requestEPNS_10HttpClientERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERSA_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE |
165 | | |
166 | | // Sends the request with retry mechanism for handling transient failures |
167 | | Status send_request_to_llm(const std::string& request_body, std::string& response, |
168 | | const TAIResource& config, std::shared_ptr<AIAdapter>& adapter, |
169 | 0 | FunctionContext* context) const { |
170 | 0 | return HttpClient::execute_with_retry(config.max_retries, config.retry_delay_second, |
171 | 0 | [this, &request_body, &response, &config, &adapter, |
172 | 0 | context](HttpClient* client) -> Status { |
173 | 0 | return this->do_send_request(client, request_body, |
174 | 0 | response, config, |
175 | 0 | adapter, context); |
176 | 0 | }); Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_19FunctionAISummarizeEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_19FunctionAISentimentEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_14FunctionAIMaskEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_18FunctionAIGenerateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_17FunctionAIExtractEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_18FunctionAIClassifyEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_19FunctionAITranslateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_20FunctionAISimilarityEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_16FunctionAIFilterEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ Unexecuted instantiation: _ZZNK5doris10AIFunctionINS_13FunctionEmbedEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextEENKUlPNS_10HttpClientEE_clESM_ |
177 | 0 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_13FunctionEmbedEE19send_request_to_llmERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERS8_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE |
178 | | |
179 | | // Provider-reusable helper for string-returning functions. |
180 | | // Estimates one batch entry size using the raw prompt length plus the fixed JSON wrapper cost. |
181 | 85 | size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const { |
182 | 85 | static constexpr size_t json_wrapper_size = 20; |
183 | 85 | return prompt.size() + std::to_string(idx).size() + json_wrapper_size; |
184 | 85 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 181 | 4 | size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const { | 182 | 4 | static constexpr size_t json_wrapper_size = 20; | 183 | 4 | return prompt.size() + std::to_string(idx).size() + json_wrapper_size; | 184 | 4 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 181 | 23 | size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const { | 182 | 23 | static constexpr size_t json_wrapper_size = 20; | 183 | 23 | return prompt.size() + std::to_string(idx).size() + json_wrapper_size; | 184 | 23 | } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE25estimate_batch_entry_sizeEmRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 181 | 58 | size_t estimate_batch_entry_size(size_t idx, const std::string& prompt) const { | 182 | 58 | static constexpr size_t json_wrapper_size = 20; | 183 | 58 | return prompt.size() + std::to_string(idx).size() + json_wrapper_size; | 184 | 58 | } |
|
185 | | |
186 | | // Provider-reusable helper for string-returning functions. |
187 | | // Executes one batch request and parses the provider result into one string per input row. |
188 | | Status execute_batch_request(const std::vector<std::string>& batch_prompts, |
189 | | std::vector<std::string>& results, const TAIResource& config, |
190 | | std::shared_ptr<AIAdapter>& adapter, |
191 | 69 | FunctionContext* context) const { |
192 | 69 | #ifdef BE_TEST |
193 | 69 | const char* test_result = std::getenv("AI_TEST_RESULT"); |
194 | 69 | if (test_result != nullptr) { |
195 | 68 | std::vector<std::string> parsed_test_response; |
196 | 68 | RETURN_IF_ERROR( |
197 | 68 | adapter->parse_response(std::string(test_result), parsed_test_response)); |
198 | 67 | if (parsed_test_response.empty()) { |
199 | 0 | return Status::InternalError("AI returned empty result"); |
200 | 0 | } |
201 | 67 | if (parsed_test_response.size() != batch_prompts.size()) { |
202 | 5 | return Status::RuntimeError( |
203 | 5 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), |
204 | 5 | batch_prompts.size(), parsed_test_response.size()); |
205 | 5 | } |
206 | 62 | results = std::move(parsed_test_response); |
207 | 62 | return Status::OK(); |
208 | 67 | } |
209 | 1 | if (config.provider_type == "MOCK") { |
210 | 1 | results.clear(); |
211 | 1 | results.reserve(batch_prompts.size()); |
212 | 1 | for (const auto& prompt : batch_prompts) { |
213 | 1 | results.emplace_back("this is a mock response. " + prompt); |
214 | 1 | } |
215 | 1 | return Status::OK(); |
216 | 1 | } |
217 | 0 | #endif |
218 | | |
219 | 0 | std::string batch_prompt; |
220 | 0 | RETURN_IF_ERROR(build_batch_prompt(batch_prompts, batch_prompt)); |
221 | | |
222 | 0 | std::vector<std::string> inputs = {batch_prompt}; |
223 | 0 | std::vector<std::string> parsed_response; |
224 | |
|
225 | 0 | std::string request_body; |
226 | 0 | RETURN_IF_ERROR(adapter->build_request_payload( |
227 | 0 | inputs, assert_cast<const Derived&>(*this).system_prompt, request_body)); |
228 | | |
229 | 0 | std::string response; |
230 | 0 | RETURN_IF_ERROR(send_request_to_llm(request_body, response, config, adapter, context)); |
231 | 0 | RETURN_IF_ERROR(adapter->parse_response(response, parsed_response)); |
232 | 0 | if (parsed_response.empty()) { |
233 | 0 | return Status::InternalError("AI returned empty result"); |
234 | 0 | } |
235 | 0 | if (parsed_response.size() != batch_prompts.size()) { |
236 | 0 | return Status::RuntimeError( |
237 | 0 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), |
238 | 0 | batch_prompts.size(), parsed_response.size()); |
239 | 0 | } |
240 | 0 | results = std::move(parsed_response); |
241 | 0 | return Status::OK(); |
242 | 0 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Line | Count | Source | 191 | 2 | FunctionContext* context) const { | 192 | 2 | #ifdef BE_TEST | 193 | 2 | const char* test_result = std::getenv("AI_TEST_RESULT"); | 194 | 2 | if (test_result != nullptr) { | 195 | 1 | std::vector<std::string> parsed_test_response; | 196 | 1 | RETURN_IF_ERROR( | 197 | 1 | adapter->parse_response(std::string(test_result), parsed_test_response)); | 198 | 1 | if (parsed_test_response.empty()) { | 199 | 0 | return Status::InternalError("AI returned empty result"); | 200 | 0 | } | 201 | 1 | if (parsed_test_response.size() != batch_prompts.size()) { | 202 | 0 | return Status::RuntimeError( | 203 | 0 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), | 204 | 0 | batch_prompts.size(), parsed_test_response.size()); | 205 | 0 | } | 206 | 1 | results = std::move(parsed_test_response); | 207 | 1 | return Status::OK(); | 208 | 1 | } | 209 | 1 | if (config.provider_type == "MOCK") { | 210 | 1 | results.clear(); | 211 | 1 | results.reserve(batch_prompts.size()); | 212 | 1 | for (const auto& prompt : batch_prompts) { | 213 | 1 | results.emplace_back("this is a mock response. " + prompt); | 214 | 1 | } | 215 | 1 | return Status::OK(); | 216 | 1 | } | 217 | 0 | #endif | 218 | | | 219 | 0 | std::string batch_prompt; | 220 | 0 | RETURN_IF_ERROR(build_batch_prompt(batch_prompts, batch_prompt)); | 221 | | | 222 | 0 | std::vector<std::string> inputs = {batch_prompt}; | 223 | 0 | std::vector<std::string> parsed_response; | 224 | |
| 225 | 0 | std::string request_body; | 226 | 0 | RETURN_IF_ERROR(adapter->build_request_payload( | 227 | 0 | inputs, assert_cast<const Derived&>(*this).system_prompt, request_body)); | 228 | | | 229 | 0 | std::string response; | 230 | 0 | RETURN_IF_ERROR(send_request_to_llm(request_body, response, config, adapter, context)); | 231 | 0 | RETURN_IF_ERROR(adapter->parse_response(response, parsed_response)); | 232 | 0 | if (parsed_response.empty()) { | 233 | 0 | return Status::InternalError("AI returned empty result"); | 234 | 0 | } | 235 | 0 | if (parsed_response.size() != batch_prompts.size()) { | 236 | 0 | return Status::RuntimeError( | 237 | 0 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), | 238 | 0 | batch_prompts.size(), parsed_response.size()); | 239 | 0 | } | 240 | 0 | results = std::move(parsed_response); | 241 | 0 | return Status::OK(); | 242 | 0 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Line | Count | Source | 191 | 21 | FunctionContext* context) const { | 192 | 21 | #ifdef BE_TEST | 193 | 21 | const char* test_result = std::getenv("AI_TEST_RESULT"); | 194 | 21 | if (test_result != nullptr) { | 195 | 21 | std::vector<std::string> parsed_test_response; | 196 | 21 | RETURN_IF_ERROR( | 197 | 21 | adapter->parse_response(std::string(test_result), parsed_test_response)); | 198 | 21 | if (parsed_test_response.empty()) { | 199 | 0 | return Status::InternalError("AI returned empty result"); | 200 | 0 | } | 201 | 21 | if (parsed_test_response.size() != batch_prompts.size()) { | 202 | 0 | return Status::RuntimeError( | 203 | 0 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), | 204 | 0 | batch_prompts.size(), parsed_test_response.size()); | 205 | 0 | } | 206 | 21 | results = std::move(parsed_test_response); | 207 | 21 | return Status::OK(); | 208 | 21 | } | 209 | 0 | if (config.provider_type == "MOCK") { | 210 | 0 | results.clear(); | 211 | 0 | results.reserve(batch_prompts.size()); | 212 | 0 | for (const auto& prompt : batch_prompts) { | 213 | 0 | results.emplace_back("this is a mock response. " + prompt); | 214 | 0 | } | 215 | 0 | return Status::OK(); | 216 | 0 | } | 217 | 0 | #endif | 218 | | | 219 | 0 | std::string batch_prompt; | 220 | 0 | RETURN_IF_ERROR(build_batch_prompt(batch_prompts, batch_prompt)); | 221 | | | 222 | 0 | std::vector<std::string> inputs = {batch_prompt}; | 223 | 0 | std::vector<std::string> parsed_response; | 224 | |
| 225 | 0 | std::string request_body; | 226 | 0 | RETURN_IF_ERROR(adapter->build_request_payload( | 227 | 0 | inputs, assert_cast<const Derived&>(*this).system_prompt, request_body)); | 228 | | | 229 | 0 | std::string response; | 230 | 0 | RETURN_IF_ERROR(send_request_to_llm(request_body, response, config, adapter, context)); | 231 | 0 | RETURN_IF_ERROR(adapter->parse_response(response, parsed_response)); | 232 | 0 | if (parsed_response.empty()) { | 233 | 0 | return Status::InternalError("AI returned empty result"); | 234 | 0 | } | 235 | 0 | if (parsed_response.size() != batch_prompts.size()) { | 236 | 0 | return Status::RuntimeError( | 237 | 0 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), | 238 | 0 | batch_prompts.size(), parsed_response.size()); | 239 | 0 | } | 240 | 0 | results = std::move(parsed_response); | 241 | 0 | return Status::OK(); | 242 | 0 | } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE21execute_batch_requestERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERSB_RKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEEPNS_15FunctionContextE Line | Count | Source | 191 | 46 | FunctionContext* context) const { | 192 | 46 | #ifdef BE_TEST | 193 | 46 | const char* test_result = std::getenv("AI_TEST_RESULT"); | 194 | 46 | if (test_result != nullptr) { | 195 | 46 | std::vector<std::string> parsed_test_response; | 196 | 46 | RETURN_IF_ERROR( | 197 | 46 | adapter->parse_response(std::string(test_result), parsed_test_response)); | 198 | 45 | if (parsed_test_response.empty()) { | 199 | 0 | return Status::InternalError("AI returned empty result"); | 200 | 0 | } | 201 | 45 | if (parsed_test_response.size() != batch_prompts.size()) { | 202 | 5 | return Status::RuntimeError( | 203 | 5 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), | 204 | 5 | batch_prompts.size(), parsed_test_response.size()); | 205 | 5 | } | 206 | 40 | results = std::move(parsed_test_response); | 207 | 40 | return Status::OK(); | 208 | 45 | } | 209 | 0 | if (config.provider_type == "MOCK") { | 210 | 0 | results.clear(); | 211 | 0 | results.reserve(batch_prompts.size()); | 212 | 0 | for (const auto& prompt : batch_prompts) { | 213 | 0 | results.emplace_back("this is a mock response. " + prompt); | 214 | 0 | } | 215 | 0 | return Status::OK(); | 216 | 0 | } | 217 | 0 | #endif | 218 | | | 219 | 0 | std::string batch_prompt; | 220 | 0 | RETURN_IF_ERROR(build_batch_prompt(batch_prompts, batch_prompt)); | 221 | | | 222 | 0 | std::vector<std::string> inputs = {batch_prompt}; | 223 | 0 | std::vector<std::string> parsed_response; | 224 | |
| 225 | 0 | std::string request_body; | 226 | 0 | RETURN_IF_ERROR(adapter->build_request_payload( | 227 | 0 | inputs, assert_cast<const Derived&>(*this).system_prompt, request_body)); | 228 | | | 229 | 0 | std::string response; | 230 | 0 | RETURN_IF_ERROR(send_request_to_llm(request_body, response, config, adapter, context)); | 231 | 0 | RETURN_IF_ERROR(adapter->parse_response(response, parsed_response)); | 232 | 0 | if (parsed_response.empty()) { | 233 | 0 | return Status::InternalError("AI returned empty result"); | 234 | 0 | } | 235 | 0 | if (parsed_response.size() != batch_prompts.size()) { | 236 | 0 | return Status::RuntimeError( | 237 | 0 | "Failed to parse {} batch result, expected {} items but got {}", get_name(), | 238 | 0 | batch_prompts.size(), parsed_response.size()); | 239 | 0 | } | 240 | 0 | results = std::move(parsed_response); | 241 | 0 | return Status::OK(); | 242 | 0 | } |
|
243 | | |
244 | | // Provider-reusable helper for string-returning functions. |
245 | | // Runs the common batch execution flow; derived classes only need to define how one batch of |
246 | | // string results is inserted into the final output column. |
247 | | Status execute_batched_prompts(FunctionContext* context, Block& block, |
248 | | const ColumnNumbers& arguments, size_t input_rows_count, |
249 | | const TAIResource& config, std::shared_ptr<AIAdapter>& adapter, |
250 | 67 | IColumn& col_result) const { |
251 | 67 | std::vector<std::string> batch_prompts; |
252 | 67 | size_t current_batch_size = 2; // [] |
253 | | |
254 | 152 | for (size_t i = 0; i < input_rows_count; ++i) { |
255 | 85 | std::string prompt; |
256 | 85 | RETURN_IF_ERROR( |
257 | 85 | assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt)); |
258 | | |
259 | 85 | size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt); |
260 | 85 | if (entry_size > max_batch_prompt_size) { |
261 | 1 | if (!batch_prompts.empty()) { |
262 | 0 | std::vector<std::string> batch_results; |
263 | 0 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, |
264 | 0 | config, adapter, context)); |
265 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( |
266 | 0 | batch_results, col_result)); |
267 | 0 | batch_prompts.clear(); |
268 | 0 | current_batch_size = 2; |
269 | 0 | } |
270 | | |
271 | 1 | std::vector<std::string> single_prompts; |
272 | 1 | single_prompts.emplace_back(std::move(prompt)); |
273 | 1 | std::vector<std::string> single_results; |
274 | 1 | RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config, |
275 | 1 | adapter, context)); |
276 | 1 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( |
277 | 1 | single_results, col_result)); |
278 | 1 | continue; |
279 | 1 | } |
280 | | |
281 | 84 | size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1); |
282 | 84 | if (!batch_prompts.empty() && |
283 | 84 | current_batch_size + additional_size > max_batch_prompt_size) { |
284 | 2 | std::vector<std::string> batch_results; |
285 | 2 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, |
286 | 2 | adapter, context)); |
287 | 2 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( |
288 | 2 | batch_results, col_result)); |
289 | 2 | batch_prompts.clear(); |
290 | 2 | current_batch_size = 2; |
291 | 2 | additional_size = entry_size; |
292 | 2 | } |
293 | | |
294 | 84 | batch_prompts.emplace_back(std::move(prompt)); |
295 | 84 | current_batch_size += additional_size; |
296 | 84 | } |
297 | | |
298 | 67 | if (!batch_prompts.empty()) { |
299 | 66 | std::vector<std::string> batch_results; |
300 | 66 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, |
301 | 66 | adapter, context)); |
302 | 60 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results, |
303 | 60 | col_result)); |
304 | 60 | } |
305 | 34 | return Status::OK(); |
306 | 67 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Line | Count | Source | 250 | 2 | IColumn& col_result) const { | 251 | 2 | std::vector<std::string> batch_prompts; | 252 | 2 | size_t current_batch_size = 2; // [] | 253 | | | 254 | 6 | for (size_t i = 0; i < input_rows_count; ++i) { | 255 | 4 | std::string prompt; | 256 | 4 | RETURN_IF_ERROR( | 257 | 4 | assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt)); | 258 | | | 259 | 4 | size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt); | 260 | 4 | if (entry_size > max_batch_prompt_size) { | 261 | 0 | if (!batch_prompts.empty()) { | 262 | 0 | std::vector<std::string> batch_results; | 263 | 0 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, | 264 | 0 | config, adapter, context)); | 265 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 266 | 0 | batch_results, col_result)); | 267 | 0 | batch_prompts.clear(); | 268 | 0 | current_batch_size = 2; | 269 | 0 | } | 270 | | | 271 | 0 | std::vector<std::string> single_prompts; | 272 | 0 | single_prompts.emplace_back(std::move(prompt)); | 273 | 0 | std::vector<std::string> single_results; | 274 | 0 | RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config, | 275 | 0 | adapter, context)); | 276 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 277 | 0 | single_results, col_result)); | 278 | 0 | continue; | 279 | 0 | } | 280 | | | 281 | 4 | size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1); | 282 | 4 | if (!batch_prompts.empty() && | 283 | 4 | current_batch_size + additional_size > max_batch_prompt_size) { | 284 | 0 | std::vector<std::string> batch_results; | 285 | 0 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, | 286 | 0 | adapter, context)); | 287 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 288 | 0 | batch_results, col_result)); | 289 | 0 | batch_prompts.clear(); | 290 | 0 | current_batch_size = 2; | 291 | 0 | additional_size = entry_size; | 292 | 0 | } | 293 | | | 294 | 4 | batch_prompts.emplace_back(std::move(prompt)); | 295 | 4 | current_batch_size += additional_size; | 296 | 4 | } | 297 | | | 298 | 2 | if (!batch_prompts.empty()) { | 299 | 2 | std::vector<std::string> batch_results; | 300 | 2 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, | 301 | 2 | adapter, context)); | 302 | 2 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results, | 303 | 2 | col_result)); | 304 | 2 | } | 305 | 2 | return Status::OK(); | 306 | 2 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Line | Count | Source | 250 | 21 | IColumn& col_result) const { | 251 | 21 | std::vector<std::string> batch_prompts; | 252 | 21 | size_t current_batch_size = 2; // [] | 253 | | | 254 | 44 | for (size_t i = 0; i < input_rows_count; ++i) { | 255 | 23 | std::string prompt; | 256 | 23 | RETURN_IF_ERROR( | 257 | 23 | assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt)); | 258 | | | 259 | 23 | size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt); | 260 | 23 | if (entry_size > max_batch_prompt_size) { | 261 | 0 | if (!batch_prompts.empty()) { | 262 | 0 | std::vector<std::string> batch_results; | 263 | 0 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, | 264 | 0 | config, adapter, context)); | 265 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 266 | 0 | batch_results, col_result)); | 267 | 0 | batch_prompts.clear(); | 268 | 0 | current_batch_size = 2; | 269 | 0 | } | 270 | | | 271 | 0 | std::vector<std::string> single_prompts; | 272 | 0 | single_prompts.emplace_back(std::move(prompt)); | 273 | 0 | std::vector<std::string> single_results; | 274 | 0 | RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config, | 275 | 0 | adapter, context)); | 276 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 277 | 0 | single_results, col_result)); | 278 | 0 | continue; | 279 | 0 | } | 280 | | | 281 | 23 | size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1); | 282 | 23 | if (!batch_prompts.empty() && | 283 | 23 | current_batch_size + additional_size > max_batch_prompt_size) { | 284 | 0 | std::vector<std::string> batch_results; | 285 | 0 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, | 286 | 0 | adapter, context)); | 287 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 288 | 0 | batch_results, col_result)); | 289 | 0 | batch_prompts.clear(); | 290 | 0 | current_batch_size = 2; | 291 | 0 | additional_size = entry_size; | 292 | 0 | } | 293 | | | 294 | 23 | batch_prompts.emplace_back(std::move(prompt)); | 295 | 23 | current_batch_size += additional_size; | 296 | 23 | } | 297 | | | 298 | 21 | if (!batch_prompts.empty()) { | 299 | 21 | std::vector<std::string> batch_results; | 300 | 21 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, | 301 | 21 | adapter, context)); | 302 | 21 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results, | 303 | 21 | col_result)); | 304 | 21 | } | 305 | 15 | return Status::OK(); | 306 | 21 | } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE23execute_batched_promptsEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEmRKNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEERNS_7IColumnE Line | Count | Source | 250 | 44 | IColumn& col_result) const { | 251 | 44 | std::vector<std::string> batch_prompts; | 252 | 44 | size_t current_batch_size = 2; // [] | 253 | | | 254 | 102 | for (size_t i = 0; i < input_rows_count; ++i) { | 255 | 58 | std::string prompt; | 256 | 58 | RETURN_IF_ERROR( | 257 | 58 | assert_cast<const Derived&>(*this).build_prompt(block, arguments, i, prompt)); | 258 | | | 259 | 58 | size_t entry_size = estimate_batch_entry_size(batch_prompts.size(), prompt); | 260 | 58 | if (entry_size > max_batch_prompt_size) { | 261 | 1 | if (!batch_prompts.empty()) { | 262 | 0 | std::vector<std::string> batch_results; | 263 | 0 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, | 264 | 0 | config, adapter, context)); | 265 | 0 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 266 | 0 | batch_results, col_result)); | 267 | 0 | batch_prompts.clear(); | 268 | 0 | current_batch_size = 2; | 269 | 0 | } | 270 | | | 271 | 1 | std::vector<std::string> single_prompts; | 272 | 1 | single_prompts.emplace_back(std::move(prompt)); | 273 | 1 | std::vector<std::string> single_results; | 274 | 1 | RETURN_IF_ERROR(this->execute_batch_request(single_prompts, single_results, config, | 275 | 1 | adapter, context)); | 276 | 1 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 277 | 1 | single_results, col_result)); | 278 | 1 | continue; | 279 | 1 | } | 280 | | | 281 | 57 | size_t additional_size = entry_size + (batch_prompts.empty() ? 0 : 1); | 282 | 57 | if (!batch_prompts.empty() && | 283 | 57 | current_batch_size + additional_size > max_batch_prompt_size) { | 284 | 2 | std::vector<std::string> batch_results; | 285 | 2 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, | 286 | 2 | adapter, context)); | 287 | 2 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results( | 288 | 2 | batch_results, col_result)); | 289 | 2 | batch_prompts.clear(); | 290 | 2 | current_batch_size = 2; | 291 | 2 | additional_size = entry_size; | 292 | 2 | } | 293 | | | 294 | 57 | batch_prompts.emplace_back(std::move(prompt)); | 295 | 57 | current_batch_size += additional_size; | 296 | 57 | } | 297 | | | 298 | 44 | if (!batch_prompts.empty()) { | 299 | 43 | std::vector<std::string> batch_results; | 300 | 43 | RETURN_IF_ERROR(this->execute_batch_request(batch_prompts, batch_results, config, | 301 | 43 | adapter, context)); | 302 | 37 | RETURN_IF_ERROR(assert_cast<const Derived&>(*this).append_batch_results(batch_results, | 303 | 37 | col_result)); | 304 | 37 | } | 305 | 17 | return Status::OK(); | 306 | 44 | } |
|
307 | | |
308 | | private: |
309 | | // The ai resource must be literal |
310 | | Status _init_from_resource(FunctionContext* context, const Block& block, |
311 | | const ColumnNumbers& arguments, TAIResource& config, |
312 | 79 | std::shared_ptr<AIAdapter>& adapter) const { |
313 | 79 | const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]); |
314 | 79 | StringRef resource_name_ref = resource_column.column->get_data_at(0); |
315 | 79 | std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size); |
316 | | |
317 | 79 | const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources = |
318 | 79 | context->state()->get_query_ctx()->get_ai_resources(); |
319 | 79 | if (!ai_resources) { |
320 | 1 | return Status::InternalError("AI resources metadata missing in QueryContext"); |
321 | 1 | } |
322 | 78 | auto it = ai_resources->find(resource_name); |
323 | 78 | if (it == ai_resources->end()) { |
324 | 1 | return Status::InvalidArgument("AI resource not found: " + resource_name); |
325 | 1 | } |
326 | 77 | config = it->second; |
327 | | |
328 | 77 | normalize_endpoint(config); |
329 | | |
330 | 77 | adapter = AIAdapterFactory::create_adapter(config.provider_type); |
331 | 77 | if (!adapter) { |
332 | 0 | return Status::InvalidArgument("Unsupported AI provider type: " + config.provider_type); |
333 | 0 | } |
334 | 77 | adapter->init(config); |
335 | 77 | return Status::OK(); |
336 | 77 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 312 | 4 | std::shared_ptr<AIAdapter>& adapter) const { | 313 | 4 | const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]); | 314 | 4 | StringRef resource_name_ref = resource_column.column->get_data_at(0); | 315 | 4 | std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size); | 316 | | | 317 | 4 | const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources = | 318 | 4 | context->state()->get_query_ctx()->get_ai_resources(); | 319 | 4 | if (!ai_resources) { | 320 | 1 | return Status::InternalError("AI resources metadata missing in QueryContext"); | 321 | 1 | } | 322 | 3 | auto it = ai_resources->find(resource_name); | 323 | 3 | if (it == ai_resources->end()) { | 324 | 1 | return Status::InvalidArgument("AI resource not found: " + resource_name); | 325 | 1 | } | 326 | 2 | config = it->second; | 327 | | | 328 | 2 | normalize_endpoint(config); | 329 | | | 330 | 2 | adapter = AIAdapterFactory::create_adapter(config.provider_type); | 331 | 2 | if (!adapter) { | 332 | 0 | return Status::InvalidArgument("Unsupported AI provider type: " + config.provider_type); | 333 | 0 | } | 334 | 2 | adapter->init(config); | 335 | 2 | return Status::OK(); | 336 | 2 | } |
Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 312 | 21 | std::shared_ptr<AIAdapter>& adapter) const { | 313 | 21 | const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]); | 314 | 21 | StringRef resource_name_ref = resource_column.column->get_data_at(0); | 315 | 21 | std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size); | 316 | | | 317 | 21 | const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources = | 318 | 21 | context->state()->get_query_ctx()->get_ai_resources(); | 319 | 21 | if (!ai_resources) { | 320 | 0 | return Status::InternalError("AI resources metadata missing in QueryContext"); | 321 | 0 | } | 322 | 21 | auto it = ai_resources->find(resource_name); | 323 | 21 | if (it == ai_resources->end()) { | 324 | 0 | return Status::InvalidArgument("AI resource not found: " + resource_name); | 325 | 0 | } | 326 | 21 | config = it->second; | 327 | | | 328 | 21 | normalize_endpoint(config); | 329 | | | 330 | 21 | adapter = AIAdapterFactory::create_adapter(config.provider_type); | 331 | 21 | if (!adapter) { | 332 | 0 | return Status::InvalidArgument("Unsupported AI provider type: " + config.provider_type); | 333 | 0 | } | 334 | 21 | adapter->init(config); | 335 | 21 | return Status::OK(); | 336 | 21 | } |
_ZNK5doris10AIFunctionINS_16FunctionAIFilterEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 312 | 44 | std::shared_ptr<AIAdapter>& adapter) const { | 313 | 44 | const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]); | 314 | 44 | StringRef resource_name_ref = resource_column.column->get_data_at(0); | 315 | 44 | std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size); | 316 | | | 317 | 44 | const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources = | 318 | 44 | context->state()->get_query_ctx()->get_ai_resources(); | 319 | 44 | if (!ai_resources) { | 320 | 0 | return Status::InternalError("AI resources metadata missing in QueryContext"); | 321 | 0 | } | 322 | 44 | auto it = ai_resources->find(resource_name); | 323 | 44 | if (it == ai_resources->end()) { | 324 | 0 | return Status::InvalidArgument("AI resource not found: " + resource_name); | 325 | 0 | } | 326 | 44 | config = it->second; | 327 | | | 328 | 44 | normalize_endpoint(config); | 329 | | | 330 | 44 | adapter = AIAdapterFactory::create_adapter(config.provider_type); | 331 | 44 | if (!adapter) { | 332 | 0 | return Status::InvalidArgument("Unsupported AI provider type: " + config.provider_type); | 333 | 0 | } | 334 | 44 | adapter->init(config); | 335 | 44 | return Status::OK(); | 336 | 44 | } |
_ZNK5doris10AIFunctionINS_13FunctionEmbedEE19_init_from_resourceEPNS_15FunctionContextERKNS_5BlockERKSt6vectorIjSaIjEERNS_11TAIResourceERSt10shared_ptrINS_9AIAdapterEE Line | Count | Source | 312 | 10 | std::shared_ptr<AIAdapter>& adapter) const { | 313 | 10 | const ColumnWithTypeAndName& resource_column = block.get_by_position(arguments[0]); | 314 | 10 | StringRef resource_name_ref = resource_column.column->get_data_at(0); | 315 | 10 | std::string resource_name = std::string(resource_name_ref.data, resource_name_ref.size); | 316 | | | 317 | 10 | const std::shared_ptr<std::map<std::string, TAIResource>>& ai_resources = | 318 | 10 | context->state()->get_query_ctx()->get_ai_resources(); | 319 | 10 | if (!ai_resources) { | 320 | 0 | return Status::InternalError("AI resources metadata missing in QueryContext"); | 321 | 0 | } | 322 | 10 | auto it = ai_resources->find(resource_name); | 323 | 10 | if (it == ai_resources->end()) { | 324 | 0 | return Status::InvalidArgument("AI resource not found: " + resource_name); | 325 | 0 | } | 326 | 10 | config = it->second; | 327 | | | 328 | 10 | normalize_endpoint(config); | 329 | | | 330 | 10 | adapter = AIAdapterFactory::create_adapter(config.provider_type); | 331 | 10 | if (!adapter) { | 332 | 0 | return Status::InvalidArgument("Unsupported AI provider type: " + config.provider_type); | 333 | 0 | } | 334 | 10 | adapter->init(config); | 335 | 10 | return Status::OK(); | 336 | 10 | } |
|
337 | | |
338 | | // Serializes one text batch into the shared JSON-array prompt format consumed by LLM |
339 | | // providers for batch string functions. |
340 | | Status build_batch_prompt(const std::vector<std::string>& batch_prompts, |
341 | 0 | std::string& prompt) const { |
342 | 0 | rapidjson::StringBuffer buffer; |
343 | 0 | rapidjson::Writer<rapidjson::StringBuffer> writer(buffer); |
344 | |
|
345 | 0 | writer.StartArray(); |
346 | 0 | for (size_t i = 0; i < batch_prompts.size(); ++i) { |
347 | 0 | writer.StartObject(); |
348 | 0 | writer.Key("idx"); |
349 | 0 | writer.Uint64(i); |
350 | 0 | writer.Key("input"); |
351 | 0 | writer.String(batch_prompts[i].data(), |
352 | 0 | static_cast<rapidjson::SizeType>(batch_prompts[i].size())); |
353 | 0 | writer.EndObject(); |
354 | 0 | } |
355 | 0 | writer.EndArray(); |
356 | |
|
357 | 0 | prompt = buffer.GetString(); |
358 | 0 | return Status::OK(); |
359 | 0 | } Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIClassifyEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_17FunctionAIExtractEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISentimentEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAISummarizeEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_14FunctionAIMaskEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_18FunctionAIGenerateEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAIFixGrammarEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_19FunctionAITranslateEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_20FunctionAISimilarityEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ Unexecuted instantiation: _ZNK5doris10AIFunctionINS_16FunctionAIFilterEE18build_batch_promptERKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EERS9_ |
360 | | }; |
361 | | |
362 | | } // namespace doris |