Coverage Report

Created: 2026-04-11 00:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_search.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <cstring>
20
#include <numeric>
21
#include <string>
22
#include <string_view>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "core/assert_cast.h"
27
#include "core/block/block.h"
28
#include "core/block/column_numbers.h"
29
#include "core/column/column_array.h"
30
#include "core/column/column_const.h"
31
#include "core/column/column_nullable.h"
32
#include "core/column/column_string.h"
33
#include "core/column/column_vector.h"
34
#include "core/data_type/data_type_array.h"
35
#include "core/data_type/data_type_nullable.h"
36
#include "core/data_type/data_type_number.h"
37
#include "core/data_type/data_type_string.h"
38
#include "core/data_type/define_primitive_type.h"
39
#include "core/memcmp_small.h"
40
#include "core/memcpy_small.h"
41
#include "core/pod_array_fwd.h"
42
#include "core/string_ref.h"
43
#include "exec/common/stringop_substring.h"
44
#include "exec/common/template_helpers.hpp"
45
#include "exec/common/util.hpp"
46
#include "exprs/function/function.h"
47
#include "exprs/function/function_helpers.h"
48
#include "exprs/function/simple_function_factory.h"
49
#include "exprs/function_context.h"
50
#include "util/simd/vstring_function.h"
51
#include "util/string_search.hpp"
52
53
namespace doris {
54
#include "common/compile_check_avoid_begin.h"
55
56
class FunctionStringLocatePos : public IFunction {
57
public:
58
    static constexpr auto name = "locate";
59
816
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
60
0
    String get_name() const override { return name; }
61
0
    size_t get_number_of_arguments() const override { return 3; }
62
63
814
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
64
814
        return std::make_shared<DataTypeInt32>();
65
814
    }
66
67
1
    DataTypes get_variadic_argument_types_impl() const override {
68
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
69
1
                std::make_shared<DataTypeInt32>()};
70
1
    }
71
72
815
    bool is_variadic() const override { return true; }
73
74
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
75
576
                        uint32_t result, size_t input_rows_count) const override {
76
576
        if (arguments.size() != 3) {
77
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
78
0
                                           get_name(), arguments.size());
79
0
        }
80
576
        bool col_const[3];
81
576
        ColumnPtr argument_columns[3];
82
2.30k
        for (int i = 0; i < 3; ++i) {
83
1.72k
            std::tie(argument_columns[i], col_const[i]) =
84
1.72k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
85
1.72k
        }
86
87
576
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
88
576
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
89
576
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
90
91
576
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
92
576
        auto& vec_res = col_res->get_data();
93
576
        vec_res.resize(block.rows());
94
95
576
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
96
97
576
        if (col_const[0]) {
98
246
            std::visit(
99
246
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
246
                        scalar_search<is_ascii, str_const, pos_const>(
101
246
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
246
                                input_rows_count);
103
246
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
104
246
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
105
246
                    make_bool_variant(col_const[2]));
106
107
330
        } else {
108
330
            std::visit(
109
330
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
330
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
330
                                                                      col_pos->get_data(), vec_res,
112
330
                                                                      input_rows_count);
113
330
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
109
23
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
23
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
23
                                                                      col_pos->get_data(), vec_res,
112
23
                                                                      input_rows_count);
113
23
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
109
61
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
61
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
61
                                                                      col_pos->get_data(), vec_res,
112
61
                                                                      input_rows_count);
113
61
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
114
330
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
115
330
                    make_bool_variant(col_const[2]));
116
330
        }
117
576
        block.replace_by_position(result, std::move(col_res));
118
576
        return Status::OK();
119
576
    }
120
121
private:
122
    template <bool is_ascii, bool str_const, bool pos_const>
123
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
124
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
125
246
                       size_t size) const {
126
246
        res.resize(size);
127
246
        StringRef substr(ldata.data, ldata.size);
128
246
        StringSearch search {&substr};
129
130
492
        for (int i = 0; i < size; ++i) {
131
246
            res[i] = locate_pos<is_ascii>(substr,
132
246
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
246
                                          search, posdata[index_check_const<pos_const>(i)]);
134
246
        }
135
246
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
136
137
    template <bool is_ascii, bool str_const, bool pos_const>
138
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
139
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
140
330
                       size_t size) const {
141
330
        res.resize(size);
142
330
        StringSearch search;
143
774
        for (int i = 0; i < size; ++i) {
144
444
            StringRef substr = col_left->get_data_at(i);
145
444
            search.set_pattern(&substr);
146
444
            res[i] = locate_pos<is_ascii>(substr,
147
444
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
444
                                          search, posdata[index_check_const<pos_const>(i)]);
149
444
        }
150
330
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
23
                       size_t size) const {
141
23
        res.resize(size);
142
23
        StringSearch search;
143
71
        for (int i = 0; i < size; ++i) {
144
48
            StringRef substr = col_left->get_data_at(i);
145
48
            search.set_pattern(&substr);
146
48
            res[i] = locate_pos<is_ascii>(substr,
147
48
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
48
                                          search, posdata[index_check_const<pos_const>(i)]);
149
48
        }
150
23
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
61
                       size_t size) const {
141
61
        res.resize(size);
142
61
        StringSearch search;
143
211
        for (int i = 0; i < size; ++i) {
144
150
            StringRef substr = col_left->get_data_at(i);
145
150
            search.set_pattern(&substr);
146
150
            res[i] = locate_pos<is_ascii>(substr,
147
150
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
150
                                          search, posdata[index_check_const<pos_const>(i)]);
149
150
        }
150
61
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
151
152
    template <bool is_ascii>
153
690
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
690
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
11
            return 1;
159
11
        }
160
679
        if (is_ascii) {
161
499
            return locate_pos_ascii(substr, str, search, start_pos);
162
499
        } else {
163
180
            return locate_pos_utf8(substr, str, search, start_pos);
164
180
        }
165
679
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
153
180
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
180
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
0
            return 1;
159
0
        }
160
180
        if (is_ascii) {
161
0
            return locate_pos_ascii(substr, str, search, start_pos);
162
180
        } else {
163
180
            return locate_pos_utf8(substr, str, search, start_pos);
164
180
        }
165
180
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
153
510
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
510
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
11
            return 1;
159
11
        }
160
499
        if (is_ascii) {
161
499
            return locate_pos_ascii(substr, str, search, start_pos);
162
499
        } else {
163
0
            return locate_pos_utf8(substr, str, search, start_pos);
164
0
        }
165
499
    }
166
167
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
168
180
                        int start_pos) const {
169
180
        std::vector<size_t> index;
170
180
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
171
180
        if (start_pos <= 0 || start_pos > char_len) {
172
43
            return 0;
173
43
        }
174
137
        if (substr.size == 0) {
175
17
            return start_pos;
176
17
        }
177
        // Input start_pos starts from 1.
178
120
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
179
120
        int32_t match_pos = search.search(&adjusted_str);
180
120
        if (match_pos >= 0) {
181
            // Hive returns the position in the original string starting from 1.
182
104
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
183
104
        } else {
184
16
            return 0;
185
16
        }
186
120
    }
187
188
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
189
499
                         int start_pos) const {
190
499
        if (start_pos <= 0 || start_pos > str.size) {
191
367
            return 0;
192
367
        }
193
132
        if (substr.size == 0) {
194
36
            return start_pos;
195
36
        }
196
        // Input start_pos starts from 1.
197
96
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
198
96
        int32_t match_pos = search.search(&adjusted_str);
199
96
        if (match_pos >= 0) {
200
            // Hive returns the position in the original string starting from 1.
201
40
            return start_pos + match_pos;
202
56
        } else {
203
56
            return 0;
204
56
        }
205
96
    }
206
};
207
208
class FunctionSplitPart : public IFunction {
209
public:
210
    static constexpr auto name = "split_part";
211
2
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
212
1
    String get_name() const override { return name; }
213
0
    size_t get_number_of_arguments() const override { return 3; }
214
215
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
216
0
        return make_nullable(std::make_shared<DataTypeString>());
217
0
    }
218
219
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
220
0
                        uint32_t result, size_t input_rows_count) const override {
221
0
        DCHECK_EQ(arguments.size(), 3);
222
223
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
224
        // Create a zero column to simply implement
225
0
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
226
0
        auto res = ColumnString::create();
227
228
0
        auto& null_map_data = null_map->get_data();
229
0
        auto& res_offsets = res->get_offsets();
230
0
        auto& res_chars = res->get_chars();
231
0
        res_offsets.resize(input_rows_count);
232
233
0
        const size_t argument_size = arguments.size();
234
0
        std::vector<ColumnPtr> argument_columns(argument_size);
235
0
        for (size_t i = 0; i < argument_size; ++i) {
236
0
            argument_columns[i] =
237
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
238
0
            if (const auto* nullable =
239
0
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
240
                // Danger: Here must dispose the null map data first! Because
241
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
242
                // of column nullable mem of null map
243
0
                VectorizedUtils::update_null_map(null_map->get_data(),
244
0
                                                 nullable->get_null_map_data());
245
0
                argument_columns[i] = nullable->get_nested_column_ptr();
246
0
            }
247
0
        }
248
249
0
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
250
251
0
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
252
253
0
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
254
0
        const auto& part_num_col_data = part_num_col->get_data();
255
256
0
        for (size_t i = 0; i < input_rows_count; ++i) {
257
0
            if (part_num_col_data[i] == 0) {
258
0
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
259
0
                continue;
260
0
            }
261
262
0
            auto delimiter = delimiter_col->get_data_at(i);
263
0
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
264
0
            auto part_number = part_num_col_data[i];
265
0
            auto str = str_col->get_data_at(i);
266
0
            if (delimiter.size == 0) {
267
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
0
                continue;
269
0
            }
270
271
0
            if (part_number > 0) {
272
0
                if (delimiter.size == 1) {
273
                    // If delimiter is a char, use memchr to split
274
0
                    int32_t pre_offset = -1;
275
0
                    int32_t offset = -1;
276
0
                    int32_t num = 0;
277
0
                    while (num < part_number) {
278
0
                        pre_offset = offset;
279
0
                        size_t n = str.size - offset - 1;
280
0
                        const char* pos = reinterpret_cast<const char*>(
281
0
                                memchr(str.data + offset + 1, delimiter_str[0], n));
282
0
                        if (pos != nullptr) {
283
0
                            offset = pos - str.data;
284
0
                            num++;
285
0
                        } else {
286
0
                            offset = str.size;
287
0
                            num = (num == 0) ? 0 : num + 1;
288
0
                            break;
289
0
                        }
290
0
                    }
291
292
0
                    if (num == part_number) {
293
0
                        StringOP::push_value_string(
294
0
                                std::string_view {
295
0
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
296
0
                                        (size_t)offset - pre_offset - 1},
297
0
                                i, res_chars, res_offsets);
298
0
                    } else {
299
0
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
300
0
                    }
301
0
                } else {
302
                    // If delimiter is a string, use memmem to split
303
0
                    int32_t pre_offset = -delimiter.size;
304
0
                    int32_t offset = -delimiter.size;
305
0
                    int32_t num = 0;
306
0
                    while (num < part_number) {
307
0
                        pre_offset = offset;
308
0
                        size_t n = str.size - offset - delimiter.size;
309
0
                        char* pos =
310
0
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
311
0
                                                               n, delimiter.data, delimiter.size));
312
0
                        if (pos != nullptr) {
313
0
                            offset = pos - str.data;
314
0
                            num++;
315
0
                        } else {
316
0
                            offset = str.size;
317
0
                            num = (num == 0) ? 0 : num + 1;
318
0
                            break;
319
0
                        }
320
0
                    }
321
322
0
                    if (num == part_number) {
323
0
                        StringOP::push_value_string(
324
0
                                std::string_view {reinterpret_cast<const char*>(
325
0
                                                          str.data + pre_offset + delimiter.size),
326
0
                                                  (size_t)offset - pre_offset - delimiter.size},
327
0
                                i, res_chars, res_offsets);
328
0
                    } else {
329
0
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
330
0
                    }
331
0
                }
332
0
            } else {
333
0
                part_number = -part_number;
334
0
                auto str_str = str.to_string();
335
0
                int32_t offset = str.size;
336
0
                int32_t pre_offset = offset;
337
0
                int32_t num = 0;
338
0
                auto substr = str_str;
339
0
                while (num <= part_number && offset >= 0) {
340
0
                    offset = (int)substr.rfind(delimiter, offset);
341
0
                    if (offset != -1) {
342
0
                        if (++num == part_number) {
343
0
                            break;
344
0
                        }
345
0
                        pre_offset = offset;
346
0
                        offset = offset - 1;
347
0
                        substr = str_str.substr(0, pre_offset);
348
0
                    } else {
349
0
                        break;
350
0
                    }
351
0
                }
352
0
                num = (offset == -1 && num != 0) ? num + 1 : num;
353
354
0
                if (num == part_number) {
355
0
                    if (offset == -1) {
356
0
                        StringOP::push_value_string(
357
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
358
0
                                                  (size_t)pre_offset},
359
0
                                i, res_chars, res_offsets);
360
0
                    } else {
361
0
                        StringOP::push_value_string(
362
0
                                std::string_view {str_str.substr(
363
0
                                        offset + delimiter.size,
364
0
                                        (size_t)pre_offset - offset - delimiter.size)},
365
0
                                i, res_chars, res_offsets);
366
0
                    }
367
0
                } else {
368
0
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
369
0
                }
370
0
            }
371
0
        }
372
373
0
        block.get_by_position(result).column =
374
0
                ColumnNullable::create(std::move(res), std::move(null_map));
375
0
        return Status::OK();
376
0
    }
377
};
378
379
class FunctionSubstringIndex : public IFunction {
380
public:
381
    static constexpr auto name = "substring_index";
382
2
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
383
1
    String get_name() const override { return name; }
384
0
    size_t get_number_of_arguments() const override { return 3; }
385
386
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
387
0
        return std::make_shared<DataTypeString>();
388
0
    }
389
390
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
391
0
                        uint32_t result, size_t input_rows_count) const override {
392
0
        DCHECK_EQ(arguments.size(), 3);
393
394
        // Create a zero column to simply implement
395
0
        auto res = ColumnString::create();
396
397
0
        auto& res_offsets = res->get_offsets();
398
0
        auto& res_chars = res->get_chars();
399
0
        res_offsets.resize(input_rows_count);
400
0
        ColumnPtr content_column;
401
0
        bool content_const = false;
402
0
        std::tie(content_column, content_const) =
403
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
404
405
0
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
406
407
        // Handle both constant and non-constant delimiter parameters
408
0
        ColumnPtr delimiter_column_ptr;
409
0
        bool delimiter_const = false;
410
0
        std::tie(delimiter_column_ptr, delimiter_const) =
411
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
412
0
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
413
414
0
        ColumnPtr part_num_column_ptr;
415
0
        bool part_num_const = false;
416
0
        std::tie(part_num_column_ptr, part_num_const) =
417
0
                unpack_if_const(block.get_by_position(arguments[2]).column);
418
0
        const ColumnInt32* part_num_col =
419
0
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
420
421
        // For constant multi-character delimiters, create StringRef and StringSearch only once
422
0
        std::optional<StringRef> const_delimiter_ref;
423
0
        std::optional<StringSearch> const_search;
424
0
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
425
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
426
0
            const_search.emplace(&const_delimiter_ref.value());
427
0
        }
428
429
0
        for (size_t i = 0; i < input_rows_count; ++i) {
430
0
            auto str = str_col->get_data_at(content_const ? 0 : i);
431
0
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
432
0
            int32_t delimiter_size = delimiter.size;
433
434
0
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
435
436
0
            if (part_number == 0 || delimiter_size == 0) {
437
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
438
0
                continue;
439
0
            }
440
441
0
            if (part_number > 0) {
442
0
                if (delimiter_size == 1) {
443
0
                    int32_t offset = -1;
444
0
                    int32_t num = 0;
445
0
                    while (num < part_number) {
446
0
                        size_t n = str.size - offset - 1;
447
0
                        const char* pos = reinterpret_cast<const char*>(
448
0
                                memchr(str.data + offset + 1, delimiter.data[0], n));
449
0
                        if (pos != nullptr) {
450
0
                            offset = pos - str.data;
451
0
                            num++;
452
0
                        } else {
453
0
                            offset = str.size;
454
0
                            num = (num == 0) ? 0 : num + 1;
455
0
                            break;
456
0
                        }
457
0
                    }
458
459
0
                    if (num == part_number) {
460
0
                        StringOP::push_value_string(
461
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
462
0
                                                  (size_t)offset},
463
0
                                i, res_chars, res_offsets);
464
0
                    } else {
465
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
466
0
                                                    res_chars, res_offsets);
467
0
                    }
468
0
                } else {
469
                    // For multi-character delimiters
470
                    // Use pre-created StringRef and StringSearch for constant delimiters
471
0
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
472
0
                                                                  : StringRef(delimiter);
473
0
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
474
0
                    StringSearch local_search(&delimiter_ref);
475
0
                    if (!search_ptr) {
476
0
                        search_ptr = &local_search;
477
0
                    }
478
479
0
                    int32_t offset = -delimiter_size;
480
0
                    int32_t num = 0;
481
0
                    while (num < part_number) {
482
0
                        size_t n = str.size - offset - delimiter_size;
483
                        // search first match delimter_ref index from src string among str_offset to end
484
0
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
485
0
                        if (pos < str.data + str.size) {
486
0
                            offset = pos - str.data;
487
0
                            num++;
488
0
                        } else {
489
0
                            offset = str.size;
490
0
                            num = (num == 0) ? 0 : num + 1;
491
0
                            break;
492
0
                        }
493
0
                    }
494
495
0
                    if (num == part_number) {
496
0
                        StringOP::push_value_string(
497
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
498
0
                                                  (size_t)offset},
499
0
                                i, res_chars, res_offsets);
500
0
                    } else {
501
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
502
0
                                                    res_chars, res_offsets);
503
0
                    }
504
0
                }
505
0
            } else {
506
0
                int neg_part_number = -part_number;
507
0
                auto str_str = str.to_string();
508
0
                int32_t offset = str.size;
509
0
                int32_t pre_offset = offset;
510
0
                int32_t num = 0;
511
0
                auto substr = str_str;
512
513
                // Use pre-created StringRef for constant delimiters
514
0
                StringRef delimiter_str =
515
0
                        const_delimiter_ref
516
0
                                ? const_delimiter_ref.value()
517
0
                                : StringRef(reinterpret_cast<const char*>(delimiter.data),
518
0
                                            delimiter.size);
519
520
0
                while (num <= neg_part_number && offset >= 0) {
521
0
                    offset = (int)substr.rfind(delimiter_str, offset);
522
0
                    if (offset != -1) {
523
0
                        if (++num == neg_part_number) {
524
0
                            break;
525
0
                        }
526
0
                        pre_offset = offset;
527
0
                        offset = offset - 1;
528
0
                        substr = str_str.substr(0, pre_offset);
529
0
                    } else {
530
0
                        break;
531
0
                    }
532
0
                }
533
0
                num = (offset == -1 && num != 0) ? num + 1 : num;
534
535
0
                if (num == neg_part_number) {
536
0
                    if (offset == -1) {
537
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
538
0
                                                    res_chars, res_offsets);
539
0
                    } else {
540
0
                        StringOP::push_value_string(
541
0
                                std::string_view {str.data + offset + delimiter_size,
542
0
                                                  str.size - offset - delimiter_size},
543
0
                                i, res_chars, res_offsets);
544
0
                    }
545
0
                } else {
546
0
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
547
0
                                                res_offsets);
548
0
                }
549
0
            }
550
0
        }
551
552
0
        block.get_by_position(result).column = std::move(res);
553
0
        return Status::OK();
554
0
    }
555
};
556
557
class FunctionSplitByString : public IFunction {
558
public:
559
    static constexpr auto name = "split_by_string";
560
561
2
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
562
    using NullMapType = PaddedPODArray<UInt8>;
563
564
1
    String get_name() const override { return name; }
565
566
1
    bool is_variadic() const override { return false; }
567
568
0
    size_t get_number_of_arguments() const override { return 2; }
569
570
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
571
0
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
572
0
                << "first argument for function: " << name << " should be string"
573
0
                << " and arguments[0] is " << arguments[0]->get_name();
574
0
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
575
0
                << "second argument for function: " << name << " should be string"
576
0
                << " and arguments[1] is " << arguments[1]->get_name();
577
0
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
578
0
    }
579
580
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
581
0
                        uint32_t result, size_t input_rows_count) const override {
582
0
        DCHECK_EQ(arguments.size(), 2);
583
584
0
        const auto& [src_column, left_const] =
585
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
586
0
        const auto& [right_column, right_const] =
587
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
588
589
0
        DataTypePtr right_column_type = block.get_by_position(arguments[1]).type;
590
0
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
591
0
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
592
0
                                                   ColumnArray::ColumnOffsets::create());
593
594
0
        dest_column_ptr->resize(0);
595
0
        auto& dest_offsets = dest_column_ptr->get_offsets();
596
597
0
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
598
0
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
599
600
0
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
601
602
0
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
603
604
0
        std::visit(
605
0
                [&](auto src_const, auto delimiter_const) {
606
0
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
607
0
                                                         *dest_nested_column, dest_offsets,
608
0
                                                         input_rows_count);
609
0
                },
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_
610
0
                make_bool_variant(left_const), make_bool_variant(right_const));
611
612
        // all elements in dest_nested_column are not null
613
0
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
614
0
                                                                       false);
615
0
        block.replace_by_position(result, std::move(dest_column_ptr));
616
617
0
        return Status::OK();
618
0
    }
619
620
private:
621
    template <bool src_const, bool delimiter_const>
622
    void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column,
623
                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
624
0
                  size_t size) const {
625
0
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
626
0
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
627
0
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
628
0
        column_string_chars.reserve(0);
629
630
0
        ColumnArray::Offset64 string_pos = 0;
631
0
        ColumnArray::Offset64 dest_pos = 0;
632
633
0
        StringSearch search;
634
0
        StringRef delimiter_ref_for_search;
635
636
0
        if constexpr (delimiter_const) {
637
0
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
638
0
            search.set_pattern(&delimiter_ref_for_search);
639
0
        }
640
641
0
        for (size_t i = 0; i < size; i++) {
642
0
            const StringRef str_ref =
643
0
                    src_column_string.get_data_at(index_check_const<src_const>(i));
644
0
            const StringRef delimiter_ref =
645
0
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
646
647
0
            if (str_ref.size == 0) {
648
0
                dest_offsets.push_back(dest_pos);
649
0
                continue;
650
0
            }
651
0
            if (delimiter_ref.size == 0) {
652
0
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
653
0
                                      string_pos, dest_pos);
654
0
            } else {
655
0
                if constexpr (!delimiter_const) {
656
0
                    search.set_pattern(&delimiter_ref);
657
0
                }
658
0
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
659
0
                    const size_t str_offset = str_pos;
660
0
                    const size_t old_size = column_string_chars.size();
661
                    // search first match delimter_ref index from src string among str_offset to end
662
0
                    const char* result_start =
663
0
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
664
                    // compute split part size
665
0
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
666
                    // save dist string split part
667
0
                    if (split_part_size > 0) {
668
0
                        const size_t new_size = old_size + split_part_size;
669
0
                        column_string_chars.resize(new_size);
670
0
                        memcpy_small_allow_read_write_overflow15(
671
0
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
672
0
                                split_part_size);
673
                        // add dist string offset
674
0
                        string_pos += split_part_size;
675
0
                    }
676
0
                    column_string_offsets.push_back(string_pos);
677
                    // array offset + 1
678
0
                    dest_pos++;
679
                    // add src string str_pos to next search start
680
0
                    str_pos += split_part_size + delimiter_ref.size;
681
0
                }
682
0
            }
683
0
            dest_offsets.push_back(dest_pos);
684
0
        }
685
0
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
686
687
    void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars,
688
                               ColumnString::Offsets& column_string_offsets,
689
                               ColumnArray::Offset64& string_pos,
690
0
                               ColumnArray::Offset64& dest_pos) const {
691
0
        const size_t old_size = column_string_chars.size();
692
0
        const size_t new_size = old_size + str_ref.size;
693
0
        column_string_chars.resize(new_size);
694
0
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
695
0
        if (simd::VStringFunctions::is_ascii(str_ref)) {
696
0
            const auto size = str_ref.size;
697
698
0
            const auto nested_old_size = column_string_offsets.size();
699
0
            const auto nested_new_size = nested_old_size + size;
700
0
            column_string_offsets.resize(nested_new_size);
701
0
            std::iota(column_string_offsets.data() + nested_old_size,
702
0
                      column_string_offsets.data() + nested_new_size, string_pos + 1);
703
704
0
            string_pos += size;
705
0
            dest_pos += size;
706
            // The above code is equivalent to the code in the following comment.
707
            // for (size_t i = 0; i < str_ref.size; i++) {
708
            //     string_pos++;
709
            //     column_string_offsets.push_back(string_pos);
710
            //     (*dest_nested_null_map).push_back(false);
711
            //     dest_pos++;
712
            // }
713
0
        } else {
714
0
            for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
715
0
                utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
716
717
0
                string_pos += utf8_char_len;
718
0
                column_string_offsets.push_back(string_pos);
719
0
                dest_pos++;
720
0
            }
721
0
        }
722
0
    }
723
};
724
725
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
726
727
template <FunctionCountSubStringType type>
728
class FunctionCountSubString : public IFunction {
729
public:
730
    static constexpr auto name = "count_substrings";
731
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
732
733
223
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
733
45
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
733
178
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
734
    using NullMapType = PaddedPODArray<UInt8>;
735
736
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
737
738
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
739
740
219
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
741
219
        return std::make_shared<DataTypeInt32>();
742
219
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
740
43
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
741
43
        return std::make_shared<DataTypeInt32>();
742
43
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
740
176
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
741
176
        return std::make_shared<DataTypeInt32>();
742
176
    }
743
744
2
    DataTypes get_variadic_argument_types_impl() const override {
745
2
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
746
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
747
1
        } else {
748
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
749
1
                    std::make_shared<DataTypeInt32>()};
750
1
        }
751
2
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
744
1
    DataTypes get_variadic_argument_types_impl() const override {
745
1
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
746
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
747
        } else {
748
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
749
                    std::make_shared<DataTypeInt32>()};
750
        }
751
1
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
744
1
    DataTypes get_variadic_argument_types_impl() const override {
745
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
746
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
747
1
        } else {
748
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
749
1
                    std::make_shared<DataTypeInt32>()};
750
1
        }
751
1
    }
752
753
221
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
753
44
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
753
177
    bool is_variadic() const override { return true; }
754
755
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
756
189
                        uint32_t result, size_t input_rows_count) const override {
757
189
        DCHECK(arg_count);
758
189
        bool col_const[arg_count];
759
189
        ColumnPtr argument_columns[arg_count];
760
722
        for (int i = 0; i < arg_count; ++i) {
761
533
            std::tie(argument_columns[i], col_const[i]) =
762
533
                    unpack_if_const(block.get_by_position(arguments[i]).column);
763
533
        }
764
765
189
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
766
189
        auto& dest_column_data = dest_column_ptr->get_data();
767
768
189
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
769
34
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
770
34
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
771
34
            std::visit(
772
34
                    [&](auto str_const, auto pattern_const) {
773
34
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
34
                                                           dest_column_data, input_rows_count);
775
34
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
772
12
                    [&](auto str_const, auto pattern_const) {
773
12
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
12
                                                           dest_column_data, input_rows_count);
775
12
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
772
11
                    [&](auto str_const, auto pattern_const) {
773
11
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
11
                                                           dest_column_data, input_rows_count);
775
11
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
772
11
                    [&](auto str_const, auto pattern_const) {
773
11
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
11
                                                           dest_column_data, input_rows_count);
775
11
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
776
34
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
777
155
        } else {
778
155
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
779
155
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
780
155
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
781
155
            std::visit(
782
155
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
155
                        _execute<str_const, pattern_const, start_pos_const>(
784
155
                                src_column_string, pattern_column, start_pos_column,
785
155
                                dest_column_data, input_rows_count);
786
155
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
782
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
23
                        _execute<str_const, pattern_const, start_pos_const>(
784
23
                                src_column_string, pattern_column, start_pos_column,
785
23
                                dest_column_data, input_rows_count);
786
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
787
155
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
788
155
                    make_bool_variant(col_const[2]));
789
155
        }
790
791
189
        block.replace_by_position(result, std::move(dest_column_ptr));
792
189
        return Status::OK();
793
189
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
756
34
                        uint32_t result, size_t input_rows_count) const override {
757
34
        DCHECK(arg_count);
758
34
        bool col_const[arg_count];
759
34
        ColumnPtr argument_columns[arg_count];
760
102
        for (int i = 0; i < arg_count; ++i) {
761
68
            std::tie(argument_columns[i], col_const[i]) =
762
68
                    unpack_if_const(block.get_by_position(arguments[i]).column);
763
68
        }
764
765
34
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
766
34
        auto& dest_column_data = dest_column_ptr->get_data();
767
768
34
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
769
34
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
770
34
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
771
34
            std::visit(
772
34
                    [&](auto str_const, auto pattern_const) {
773
34
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
34
                                                           dest_column_data, input_rows_count);
775
34
                    },
776
34
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
777
        } else {
778
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
779
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
780
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
781
            std::visit(
782
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
                        _execute<str_const, pattern_const, start_pos_const>(
784
                                src_column_string, pattern_column, start_pos_column,
785
                                dest_column_data, input_rows_count);
786
                    },
787
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
788
                    make_bool_variant(col_const[2]));
789
        }
790
791
34
        block.replace_by_position(result, std::move(dest_column_ptr));
792
34
        return Status::OK();
793
34
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
756
155
                        uint32_t result, size_t input_rows_count) const override {
757
155
        DCHECK(arg_count);
758
155
        bool col_const[arg_count];
759
155
        ColumnPtr argument_columns[arg_count];
760
620
        for (int i = 0; i < arg_count; ++i) {
761
465
            std::tie(argument_columns[i], col_const[i]) =
762
465
                    unpack_if_const(block.get_by_position(arguments[i]).column);
763
465
        }
764
765
155
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
766
155
        auto& dest_column_data = dest_column_ptr->get_data();
767
768
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
769
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
770
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
771
            std::visit(
772
                    [&](auto str_const, auto pattern_const) {
773
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
                                                           dest_column_data, input_rows_count);
775
                    },
776
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
777
155
        } else {
778
155
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
779
155
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
780
155
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
781
155
            std::visit(
782
155
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
155
                        _execute<str_const, pattern_const, start_pos_const>(
784
155
                                src_column_string, pattern_column, start_pos_column,
785
155
                                dest_column_data, input_rows_count);
786
155
                    },
787
155
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
788
155
                    make_bool_variant(col_const[2]));
789
155
        }
790
791
155
        block.replace_by_position(result, std::move(dest_column_ptr));
792
155
        return Status::OK();
793
155
    }
794
795
private:
796
    template <bool src_const, bool pattern_const>
797
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
798
34
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
81
        for (size_t i = 0; i < size; i++) {
800
47
            const StringRef str_ref =
801
47
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
47
            const StringRef pattern_ref =
804
47
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
47
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
47
        }
807
34
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
798
12
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
37
        for (size_t i = 0; i < size; i++) {
800
25
            const StringRef str_ref =
801
25
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
25
            const StringRef pattern_ref =
804
25
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
25
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
25
        }
807
12
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
798
11
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
22
        for (size_t i = 0; i < size; i++) {
800
11
            const StringRef str_ref =
801
11
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
11
            const StringRef pattern_ref =
804
11
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
11
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
11
        }
807
11
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
798
11
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
22
        for (size_t i = 0; i < size; i++) {
800
11
            const StringRef str_ref =
801
11
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
11
            const StringRef pattern_ref =
804
11
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
11
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
11
        }
807
11
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
808
809
    template <bool src_const, bool pattern_const, bool start_pos_const>
810
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
811
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
812
155
                  size_t size) const {
813
334
        for (size_t i = 0; i < size; i++) {
814
179
            const StringRef str_ref =
815
179
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
179
            const StringRef pattern_ref =
817
179
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
179
            int32_t start_pos =
820
179
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
179
            const char* p = str_ref.begin();
823
179
            const char* end = str_ref.end();
824
179
            int char_size = 0;
825
1.22k
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
1.04k
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
1.04k
            }
828
179
            const auto start_byte_len = p - str_ref.begin();
829
830
179
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
115
                dest_column_data[i] = 0;
832
115
            } else {
833
64
                dest_column_data[i] =
834
64
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
64
            }
836
179
        }
837
155
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
23
                  size_t size) const {
813
70
        for (size_t i = 0; i < size; i++) {
814
47
            const StringRef str_ref =
815
47
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
47
            const StringRef pattern_ref =
817
47
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
47
            int32_t start_pos =
820
47
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
47
            const char* p = str_ref.begin();
823
47
            const char* end = str_ref.end();
824
47
            int char_size = 0;
825
316
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
269
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
269
            }
828
47
            const auto start_byte_len = p - str_ref.begin();
829
830
47
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
31
                dest_column_data[i] = 0;
832
31
            } else {
833
16
                dest_column_data[i] =
834
16
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
16
            }
836
47
        }
837
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
838
839
208
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
840
208
        size_t old_size = pos;
841
208
        size_t str_size = str_ref.size;
842
1.15k
        while (pos < str_size &&
843
1.15k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
844
1.06k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
845
948
            pos++;
846
948
        }
847
208
        return pos - old_size;
848
208
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
839
56
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
840
56
        size_t old_size = pos;
841
56
        size_t str_size = str_ref.size;
842
372
        while (pos < str_size &&
843
372
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
844
344
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
845
316
            pos++;
846
316
        }
847
56
        return pos - old_size;
848
56
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
839
152
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
840
152
        size_t old_size = pos;
841
152
        size_t str_size = str_ref.size;
842
784
        while (pos < str_size &&
843
784
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
844
720
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
845
632
            pos++;
846
632
        }
847
152
        return pos - old_size;
848
152
    }
849
850
111
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
851
111
        int count = 0;
852
111
        if (str_ref.size == 0 || pattern_ref.size == 0) {
853
19
            return 0;
854
92
        } else {
855
208
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
856
208
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
857
208
                if (res_pos == (str_ref.size - str_pos)) {
858
92
                    break; // not find
859
92
                }
860
116
                count++;
861
116
                str_pos = str_pos + res_pos + pattern_ref.size;
862
116
            }
863
92
        }
864
92
        return count;
865
111
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
850
47
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
851
47
        int count = 0;
852
47
        if (str_ref.size == 0 || pattern_ref.size == 0) {
853
19
            return 0;
854
28
        } else {
855
56
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
856
56
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
857
56
                if (res_pos == (str_ref.size - str_pos)) {
858
28
                    break; // not find
859
28
                }
860
28
                count++;
861
28
                str_pos = str_pos + res_pos + pattern_ref.size;
862
28
            }
863
28
        }
864
28
        return count;
865
47
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
850
64
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
851
64
        int count = 0;
852
64
        if (str_ref.size == 0 || pattern_ref.size == 0) {
853
0
            return 0;
854
64
        } else {
855
152
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
856
152
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
857
152
                if (res_pos == (str_ref.size - str_pos)) {
858
64
                    break; // not find
859
64
                }
860
88
                count++;
861
88
                str_pos = str_pos + res_pos + pattern_ref.size;
862
88
            }
863
64
        }
864
64
        return count;
865
64
    }
866
};
867
868
1
void register_function_string_search(SimpleFunctionFactory& factory) {
869
1
    factory.register_function<FunctionStringLocatePos>();
870
1
    factory.register_function<FunctionSplitPart>();
871
1
    factory.register_function<FunctionSplitByString>();
872
1
    factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>();
873
1
    factory.register_function<
874
1
            FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>();
875
1
    factory.register_function<FunctionSubstringIndex>();
876
877
1
    factory.register_alias(FunctionStringLocatePos::name, "position");
878
1
}
879
880
#include "common/compile_check_avoid_end.h"
881
} // namespace doris