Coverage Report

Created: 2026-04-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_basic.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <cstring>
20
#include <string>
21
22
#include "common/status.h"
23
#include "core/assert_cast.h"
24
#include "core/block/block.h"
25
#include "core/block/column_numbers.h"
26
#include "core/column/column_const.h"
27
#include "core/column/column_nullable.h"
28
#include "core/column/column_string.h"
29
#include "core/column/column_vector.h"
30
#include "core/data_type/data_type_nullable.h"
31
#include "core/data_type/data_type_number.h"
32
#include "core/data_type/data_type_string.h"
33
#include "core/string_ref.h"
34
#include "exec/common/stringop_substring.h"
35
#include "exec/common/template_helpers.hpp"
36
#include "exec/common/util.hpp"
37
#include "exprs/function/function.h"
38
#include "exprs/function/function_helpers.h"
39
#include "exprs/function/simple_function_factory.h"
40
#include "exprs/function_context.h"
41
#include "util/simd/vstring_function.h"
42
43
namespace doris {
44
#include "common/compile_check_avoid_begin.h"
45
class FunctionStrcmp : public IFunction {
46
public:
47
    static constexpr auto name = "strcmp";
48
49
21
    static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); }
50
51
1
    String get_name() const override { return name; }
52
53
12
    size_t get_number_of_arguments() const override { return 2; }
54
55
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
56
12
        return std::make_shared<DataTypeInt8>();
57
12
    }
58
59
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
60
10
                        uint32_t result, size_t input_rows_count) const override {
61
10
        const auto& [arg0_column, arg0_const] =
62
10
                unpack_if_const(block.get_by_position(arguments[0]).column);
63
10
        const auto& [arg1_column, arg1_const] =
64
10
                unpack_if_const(block.get_by_position(arguments[1]).column);
65
66
10
        auto result_column = ColumnInt8::create(input_rows_count);
67
68
10
        if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) {
69
10
            if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) {
70
10
                if (arg0_const) {
71
0
                    scalar_vector(arg0->get_data_at(0), *arg1, *result_column);
72
10
                } else if (arg1_const) {
73
0
                    vector_scalar(*arg0, arg1->get_data_at(0), *result_column);
74
10
                } else {
75
10
                    vector_vector(*arg0, *arg1, *result_column);
76
10
                }
77
10
            }
78
10
        }
79
80
10
        block.replace_by_position(result, std::move(result_column));
81
10
        return Status::OK();
82
10
    }
83
84
private:
85
0
    static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) {
86
0
        size_t size = vec1.size();
87
0
        for (size_t i = 0; i < size; ++i) {
88
0
            res.get_data()[i] = str.compare(vec1.get_data_at(i));
89
0
        }
90
0
    }
91
92
0
    static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) {
93
0
        size_t size = vec0.size();
94
0
        for (size_t i = 0; i < size; ++i) {
95
0
            res.get_data()[i] = vec0.get_data_at(i).compare(str);
96
0
        }
97
0
    }
98
99
10
    static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) {
100
10
        size_t size = vec0.size();
101
20
        for (size_t i = 0; i < size; ++i) {
102
10
            res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i));
103
10
        }
104
10
    }
105
};
106
107
template <typename Impl>
108
class FunctionSubstring : public IFunction {
109
public:
110
    static constexpr auto name = SubstringUtil::name;
111
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE8get_nameB5cxx11Ev
Line
Count
Source
111
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE8get_nameB5cxx11Ev
Line
Count
Source
111
1
    String get_name() const override { return name; }
112
28.4k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr3ImplEE6createEv
Line
Count
Source
112
28.3k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr2ImplEE6createEv
Line
Count
Source
112
94
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
113
114
28.3k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
115
28.3k
        return std::make_shared<DataTypeString>();
116
28.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
114
28.3k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
115
28.3k
        return std::make_shared<DataTypeString>();
116
28.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
114
85
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
115
85
        return std::make_shared<DataTypeString>();
116
85
    }
117
28.4k
    DataTypes get_variadic_argument_types_impl() const override {
118
28.4k
        return Impl::get_variadic_argument_types();
119
28.4k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
117
28.3k
    DataTypes get_variadic_argument_types_impl() const override {
118
28.3k
        return Impl::get_variadic_argument_types();
119
28.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
117
93
    DataTypes get_variadic_argument_types_impl() const override {
118
93
        return Impl::get_variadic_argument_types();
119
93
    }
120
28.3k
    size_t get_number_of_arguments() const override {
121
28.3k
        return get_variadic_argument_types_impl().size();
122
28.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE23get_number_of_argumentsEv
Line
Count
Source
120
28.3k
    size_t get_number_of_arguments() const override {
121
28.3k
        return get_variadic_argument_types_impl().size();
122
28.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE23get_number_of_argumentsEv
Line
Count
Source
120
85
    size_t get_number_of_arguments() const override {
121
85
        return get_variadic_argument_types_impl().size();
122
85
    }
123
124
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
125
28.0k
                        uint32_t result, size_t input_rows_count) const override {
126
28.0k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
127
28.0k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
125
28.0k
                        uint32_t result, size_t input_rows_count) const override {
126
28.0k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
127
28.0k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
125
55
                        uint32_t result, size_t input_rows_count) const override {
126
55
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
127
55
    }
128
};
129
130
struct Substr3Impl {
131
28.3k
    static DataTypes get_variadic_argument_types() {
132
28.3k
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
133
28.3k
                std::make_shared<DataTypeInt32>()};
134
28.3k
    }
135
136
    static Status execute_impl(FunctionContext* context, Block& block,
137
                               const ColumnNumbers& arguments, uint32_t result,
138
28.0k
                               size_t input_rows_count) {
139
28.0k
        SubstringUtil::substring_execute(block, arguments, result, input_rows_count);
140
28.0k
        return Status::OK();
141
28.0k
    }
142
};
143
144
struct Substr2Impl {
145
93
    static DataTypes get_variadic_argument_types() {
146
93
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
147
93
    }
148
149
    static Status execute_impl(FunctionContext* context, Block& block,
150
                               const ColumnNumbers& arguments, uint32_t result,
151
55
                               size_t input_rows_count) {
152
55
        auto col_len = ColumnInt32::create(input_rows_count);
153
55
        auto& strlen_data = col_len->get_data();
154
155
55
        ColumnPtr str_col;
156
55
        bool str_const;
157
55
        std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
158
159
55
        const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
160
161
55
        if (str_const) {
162
18
            std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
163
37
        } else {
164
101
            for (int i = 0; i < input_rows_count; ++i) {
165
64
                strlen_data[i] = str_offset[i] - str_offset[i - 1];
166
64
            }
167
37
        }
168
169
        // we complete the column2(strlen) with the default value - each row's strlen.
170
55
        block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"});
171
55
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
172
173
55
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
174
55
        return Status::OK();
175
55
    }
176
};
177
178
class FunctionLeft : public IFunction {
179
public:
180
    static constexpr auto name = "left";
181
309
    static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
182
1
    String get_name() const override { return name; }
183
300
    size_t get_number_of_arguments() const override { return 2; }
184
300
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
185
300
        return std::make_shared<DataTypeString>();
186
300
    }
187
188
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
189
304
                        uint32_t result, size_t input_rows_count) const override {
190
304
        DCHECK_EQ(arguments.size(), 2);
191
304
        auto res = ColumnString::create();
192
304
        bool col_const[2];
193
304
        ColumnPtr argument_columns[2];
194
911
        for (int i = 0; i < 2; ++i) {
195
607
            std::tie(argument_columns[i], col_const[i]) =
196
607
                    unpack_if_const(block.get_by_position(arguments[i]).column);
197
607
        }
198
199
304
        const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]);
200
304
        const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]);
201
304
        const auto is_ascii = str_col.is_ascii();
202
203
304
        std::visit(
204
304
                [&](auto is_ascii, auto str_const, auto len_const) {
205
304
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
304
                                                             input_rows_count);
207
304
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
204
66
                [&](auto is_ascii, auto str_const, auto len_const) {
205
66
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
66
                                                             input_rows_count);
207
66
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
204
8
                [&](auto is_ascii, auto str_const, auto len_const) {
205
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
8
                                                             input_rows_count);
207
8
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
204
8
                [&](auto is_ascii, auto str_const, auto len_const) {
205
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
8
                                                             input_rows_count);
207
8
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
204
106
                [&](auto is_ascii, auto str_const, auto len_const) {
205
106
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
106
                                                             input_rows_count);
207
106
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
204
80
                [&](auto is_ascii, auto str_const, auto len_const) {
205
80
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
80
                                                             input_rows_count);
207
80
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
204
36
                [&](auto is_ascii, auto str_const, auto len_const) {
205
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
206
36
                                                             input_rows_count);
207
36
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
208
304
                make_bool_variant(is_ascii), make_bool_variant(col_const[0]),
209
304
                make_bool_variant(col_const[1]));
210
211
304
        block.get_by_position(result).column = std::move(res);
212
304
        return Status::OK();
213
304
    }
214
215
    template <bool is_ascii, bool str_const, bool len_const>
216
    static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res,
217
304
                         size_t size) {
218
304
        auto& res_chars = res.get_chars();
219
304
        auto& res_offsets = res.get_offsets();
220
304
        res_offsets.resize(size);
221
304
        const auto& len_data = len_col.get_data();
222
223
304
        if constexpr (str_const) {
224
44
            res_chars.reserve(size * (str_col.get_chars().size()));
225
260
        } else {
226
260
            res_chars.reserve(str_col.get_chars().size());
227
260
        }
228
229
806
        for (int i = 0; i < size; ++i) {
230
502
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
502
            int len = len_data[index_check_const<len_const>(i)];
232
502
            if (len <= 0 || str.empty()) {
233
92
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
92
                continue;
235
92
            }
236
237
410
            const char* begin = str.begin();
238
410
            const char* p = begin;
239
240
410
            if constexpr (is_ascii) {
241
322
                p = begin + std::min(len, static_cast<int>(str.size));
242
322
            } else {
243
88
                const char* end = str.end();
244
604
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
516
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
516
                }
247
88
            }
248
249
410
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
410
                                                                    res_offsets);
251
410
        }
252
304
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
217
66
                         size_t size) {
218
66
        auto& res_chars = res.get_chars();
219
66
        auto& res_offsets = res.get_offsets();
220
66
        res_offsets.resize(size);
221
66
        const auto& len_data = len_col.get_data();
222
223
        if constexpr (str_const) {
224
            res_chars.reserve(size * (str_col.get_chars().size()));
225
66
        } else {
226
66
            res_chars.reserve(str_col.get_chars().size());
227
66
        }
228
229
186
        for (int i = 0; i < size; ++i) {
230
120
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
120
            int len = len_data[index_check_const<len_const>(i)];
232
120
            if (len <= 0 || str.empty()) {
233
46
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
46
                continue;
235
46
            }
236
237
74
            const char* begin = str.begin();
238
74
            const char* p = begin;
239
240
            if constexpr (is_ascii) {
241
                p = begin + std::min(len, static_cast<int>(str.size));
242
74
            } else {
243
74
                const char* end = str.end();
244
522
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
448
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
448
                }
247
74
            }
248
249
74
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
74
                                                                    res_offsets);
251
74
        }
252
66
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
217
8
                         size_t size) {
218
8
        auto& res_chars = res.get_chars();
219
8
        auto& res_offsets = res.get_offsets();
220
8
        res_offsets.resize(size);
221
8
        const auto& len_data = len_col.get_data();
222
223
        if constexpr (str_const) {
224
            res_chars.reserve(size * (str_col.get_chars().size()));
225
8
        } else {
226
8
            res_chars.reserve(str_col.get_chars().size());
227
8
        }
228
229
16
        for (int i = 0; i < size; ++i) {
230
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
8
            int len = len_data[index_check_const<len_const>(i)];
232
8
            if (len <= 0 || str.empty()) {
233
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
1
                continue;
235
1
            }
236
237
7
            const char* begin = str.begin();
238
7
            const char* p = begin;
239
240
            if constexpr (is_ascii) {
241
                p = begin + std::min(len, static_cast<int>(str.size));
242
7
            } else {
243
7
                const char* end = str.end();
244
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
34
                }
247
7
            }
248
249
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
7
                                                                    res_offsets);
251
7
        }
252
8
    }
_ZN5doris12FunctionLeft8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
217
8
                         size_t size) {
218
8
        auto& res_chars = res.get_chars();
219
8
        auto& res_offsets = res.get_offsets();
220
8
        res_offsets.resize(size);
221
8
        const auto& len_data = len_col.get_data();
222
223
8
        if constexpr (str_const) {
224
8
            res_chars.reserve(size * (str_col.get_chars().size()));
225
        } else {
226
            res_chars.reserve(str_col.get_chars().size());
227
        }
228
229
16
        for (int i = 0; i < size; ++i) {
230
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
8
            int len = len_data[index_check_const<len_const>(i)];
232
8
            if (len <= 0 || str.empty()) {
233
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
1
                continue;
235
1
            }
236
237
7
            const char* begin = str.begin();
238
7
            const char* p = begin;
239
240
            if constexpr (is_ascii) {
241
                p = begin + std::min(len, static_cast<int>(str.size));
242
7
            } else {
243
7
                const char* end = str.end();
244
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
34
                }
247
7
            }
248
249
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
7
                                                                    res_offsets);
251
7
        }
252
8
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
217
106
                         size_t size) {
218
106
        auto& res_chars = res.get_chars();
219
106
        auto& res_offsets = res.get_offsets();
220
106
        res_offsets.resize(size);
221
106
        const auto& len_data = len_col.get_data();
222
223
        if constexpr (str_const) {
224
            res_chars.reserve(size * (str_col.get_chars().size()));
225
106
        } else {
226
106
            res_chars.reserve(str_col.get_chars().size());
227
106
        }
228
229
280
        for (int i = 0; i < size; ++i) {
230
174
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
174
            int len = len_data[index_check_const<len_const>(i)];
232
174
            if (len <= 0 || str.empty()) {
233
24
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
24
                continue;
235
24
            }
236
237
150
            const char* begin = str.begin();
238
150
            const char* p = begin;
239
240
150
            if constexpr (is_ascii) {
241
150
                p = begin + std::min(len, static_cast<int>(str.size));
242
            } else {
243
                const char* end = str.end();
244
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
                }
247
            }
248
249
150
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
150
                                                                    res_offsets);
251
150
        }
252
106
    }
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
217
80
                         size_t size) {
218
80
        auto& res_chars = res.get_chars();
219
80
        auto& res_offsets = res.get_offsets();
220
80
        res_offsets.resize(size);
221
80
        const auto& len_data = len_col.get_data();
222
223
        if constexpr (str_const) {
224
            res_chars.reserve(size * (str_col.get_chars().size()));
225
80
        } else {
226
80
            res_chars.reserve(str_col.get_chars().size());
227
80
        }
228
229
236
        for (int i = 0; i < size; ++i) {
230
156
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
156
            int len = len_data[index_check_const<len_const>(i)];
232
156
            if (len <= 0 || str.empty()) {
233
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
10
                continue;
235
10
            }
236
237
146
            const char* begin = str.begin();
238
146
            const char* p = begin;
239
240
146
            if constexpr (is_ascii) {
241
146
                p = begin + std::min(len, static_cast<int>(str.size));
242
            } else {
243
                const char* end = str.end();
244
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
                }
247
            }
248
249
146
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
146
                                                                    res_offsets);
251
146
        }
252
80
    }
_ZN5doris12FunctionLeft8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
217
36
                         size_t size) {
218
36
        auto& res_chars = res.get_chars();
219
36
        auto& res_offsets = res.get_offsets();
220
36
        res_offsets.resize(size);
221
36
        const auto& len_data = len_col.get_data();
222
223
36
        if constexpr (str_const) {
224
36
            res_chars.reserve(size * (str_col.get_chars().size()));
225
        } else {
226
            res_chars.reserve(str_col.get_chars().size());
227
        }
228
229
72
        for (int i = 0; i < size; ++i) {
230
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
231
36
            int len = len_data[index_check_const<len_const>(i)];
232
36
            if (len <= 0 || str.empty()) {
233
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
234
10
                continue;
235
10
            }
236
237
26
            const char* begin = str.begin();
238
26
            const char* p = begin;
239
240
26
            if constexpr (is_ascii) {
241
26
                p = begin + std::min(len, static_cast<int>(str.size));
242
            } else {
243
                const char* end = str.end();
244
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
245
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
246
                }
247
            }
248
249
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
250
26
                                                                    res_offsets);
251
26
        }
252
36
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
253
};
254
255
class FunctionRight : public IFunction {
256
public:
257
    static constexpr auto name = "right";
258
210
    static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
259
1
    String get_name() const override { return name; }
260
201
    size_t get_number_of_arguments() const override { return 2; }
261
201
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
262
201
        return std::make_shared<DataTypeString>();
263
201
    }
264
265
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
266
221
                        uint32_t result, size_t input_rows_count) const override {
267
221
        auto int_type = std::make_shared<DataTypeInt32>();
268
221
        auto params1 = ColumnInt32::create(input_rows_count);
269
221
        auto params2 = ColumnInt32::create(input_rows_count);
270
221
        size_t num_columns_without_result = block.columns();
271
272
        // params1 = max(arg[1], -len(arg))
273
221
        auto& index_data = params1->get_data();
274
221
        auto& strlen_data = params2->get_data();
275
276
221
        auto str_col =
277
221
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
278
221
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
279
221
        auto pos_col =
280
221
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
281
221
        const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
282
283
604
        for (int i = 0; i < input_rows_count; ++i) {
284
383
            auto str = str_column->get_data_at(i);
285
383
            strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size);
286
383
        }
287
288
604
        for (int i = 0; i < input_rows_count; ++i) {
289
383
            index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
290
383
        }
291
292
221
        block.insert({std::move(params1), int_type, "index"});
293
221
        block.insert({std::move(params2), int_type, "strlen"});
294
295
221
        ColumnNumbers temp_arguments(3);
296
221
        temp_arguments[0] = arguments[0];
297
221
        temp_arguments[1] = num_columns_without_result;
298
221
        temp_arguments[2] = num_columns_without_result + 1;
299
221
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
300
221
        return Status::OK();
301
221
    }
302
};
303
304
struct NullOrEmptyImpl {
305
0
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; }
306
307
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
308
95
                          uint32_t result, size_t input_rows_count, bool reverse) {
309
95
        auto res_map = ColumnUInt8::create(input_rows_count, 0);
310
311
95
        auto column = block.get_by_position(arguments[0]).column;
312
95
        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
313
49
            column = nullable->get_nested_column_ptr();
314
49
            VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data());
315
49
        }
316
95
        auto str_col = assert_cast<const ColumnString*>(column.get());
317
95
        const auto& offsets = str_col->get_offsets();
318
319
95
        auto& res_map_data = res_map->get_data();
320
324
        for (int i = 0; i < input_rows_count; ++i) {
321
229
            int size = offsets[i] - offsets[i - 1];
322
229
            res_map_data[i] |= (size == 0);
323
229
        }
324
95
        if (reverse) {
325
196
            for (int i = 0; i < input_rows_count; ++i) {
326
143
                res_map_data[i] = !res_map_data[i];
327
143
            }
328
53
        }
329
330
95
        block.replace_by_position(result, std::move(res_map));
331
95
        return Status::OK();
332
95
    }
333
};
334
335
class FunctionNullOrEmpty : public IFunction {
336
public:
337
    static constexpr auto name = "null_or_empty";
338
38
    static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); }
339
1
    String get_name() const override { return name; }
340
29
    size_t get_number_of_arguments() const override { return 1; }
341
342
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
343
29
        return std::make_shared<DataTypeUInt8>();
344
29
    }
345
346
71
    bool use_default_implementation_for_nulls() const override { return false; }
347
348
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
349
42
                        uint32_t result, size_t input_rows_count) const override {
350
42
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
351
42
                                                 input_rows_count, false));
352
42
        return Status::OK();
353
42
    }
354
};
355
356
class FunctionNotNullOrEmpty : public IFunction {
357
public:
358
    static constexpr auto name = "not_null_or_empty";
359
50
    static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); }
360
1
    String get_name() const override { return name; }
361
41
    size_t get_number_of_arguments() const override { return 1; }
362
363
41
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
364
41
        return std::make_shared<DataTypeUInt8>();
365
41
    }
366
367
94
    bool use_default_implementation_for_nulls() const override { return false; }
368
369
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
370
53
                        uint32_t result, size_t input_rows_count) const override {
371
53
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
372
53
                                                 input_rows_count, true));
373
53
        return Status::OK();
374
53
    }
375
};
376
377
8
void register_function_string_basic(SimpleFunctionFactory& factory) {
378
8
    factory.register_function<FunctionSubstring<Substr3Impl>>();
379
8
    factory.register_function<FunctionSubstring<Substr2Impl>>();
380
8
    factory.register_function<FunctionLeft>();
381
8
    factory.register_function<FunctionRight>();
382
8
    factory.register_function<FunctionNullOrEmpty>();
383
8
    factory.register_function<FunctionNotNullOrEmpty>();
384
8
    factory.register_function<FunctionStrcmp>();
385
386
8
    factory.register_alias(FunctionLeft::name, "strleft");
387
8
    factory.register_alias(FunctionRight::name, "strright");
388
8
    factory.register_alias(SubstringUtil::name, "substr");
389
8
    factory.register_alias(SubstringUtil::name, "mid");
390
8
}
391
392
#include "common/compile_check_avoid_end.h"
393
} // namespace doris