Coverage Report

Created: 2026-04-11 14:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_search.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <cstring>
20
#include <numeric>
21
#include <string>
22
#include <string_view>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "core/assert_cast.h"
27
#include "core/block/block.h"
28
#include "core/block/column_numbers.h"
29
#include "core/column/column_array.h"
30
#include "core/column/column_const.h"
31
#include "core/column/column_nullable.h"
32
#include "core/column/column_string.h"
33
#include "core/column/column_vector.h"
34
#include "core/data_type/data_type_array.h"
35
#include "core/data_type/data_type_nullable.h"
36
#include "core/data_type/data_type_number.h"
37
#include "core/data_type/data_type_string.h"
38
#include "core/data_type/define_primitive_type.h"
39
#include "core/memcmp_small.h"
40
#include "core/memcpy_small.h"
41
#include "core/pod_array_fwd.h"
42
#include "core/string_ref.h"
43
#include "exec/common/stringop_substring.h"
44
#include "exec/common/template_helpers.hpp"
45
#include "exec/common/util.hpp"
46
#include "exprs/function/function.h"
47
#include "exprs/function/function_helpers.h"
48
#include "exprs/function/simple_function_factory.h"
49
#include "exprs/function_context.h"
50
#include "util/simd/vstring_function.h"
51
#include "util/string_search.hpp"
52
53
namespace doris {
54
#include "common/compile_check_avoid_begin.h"
55
56
class FunctionStringLocatePos : public IFunction {
57
public:
58
    static constexpr auto name = "locate";
59
873
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
60
0
    String get_name() const override { return name; }
61
0
    size_t get_number_of_arguments() const override { return 3; }
62
63
864
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
64
864
        return std::make_shared<DataTypeInt32>();
65
864
    }
66
67
8
    DataTypes get_variadic_argument_types_impl() const override {
68
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
69
8
                std::make_shared<DataTypeInt32>()};
70
8
    }
71
72
865
    bool is_variadic() const override { return true; }
73
74
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
75
651
                        uint32_t result, size_t input_rows_count) const override {
76
651
        if (arguments.size() != 3) {
77
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
78
0
                                           get_name(), arguments.size());
79
0
        }
80
651
        bool col_const[3];
81
651
        ColumnPtr argument_columns[3];
82
2.60k
        for (int i = 0; i < 3; ++i) {
83
1.95k
            std::tie(argument_columns[i], col_const[i]) =
84
1.95k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
85
1.95k
        }
86
87
651
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
88
651
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
89
651
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
90
91
651
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
92
651
        auto& vec_res = col_res->get_data();
93
651
        vec_res.resize(block.rows());
94
95
651
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
96
97
651
        if (col_const[0]) {
98
250
            std::visit(
99
250
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
250
                        scalar_search<is_ascii, str_const, pos_const>(
101
250
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
250
                                input_rows_count);
103
250
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
99
64
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
64
                        scalar_search<is_ascii, str_const, pos_const>(
101
64
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
64
                                input_rows_count);
103
64
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
104
250
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
105
250
                    make_bool_variant(col_const[2]));
106
107
401
        } else {
108
401
            std::visit(
109
401
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
401
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
401
                                                                      col_pos->get_data(), vec_res,
112
401
                                                                      input_rows_count);
113
401
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
109
39
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
39
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
39
                                                                      col_pos->get_data(), vec_res,
112
39
                                                                      input_rows_count);
113
39
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
109
116
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
116
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
116
                                                                      col_pos->get_data(), vec_res,
112
116
                                                                      input_rows_count);
113
116
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
114
401
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
115
401
                    make_bool_variant(col_const[2]));
116
401
        }
117
651
        block.replace_by_position(result, std::move(col_res));
118
651
        return Status::OK();
119
651
    }
120
121
private:
122
    template <bool is_ascii, bool str_const, bool pos_const>
123
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
124
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
125
250
                       size_t size) const {
126
250
        res.resize(size);
127
250
        StringRef substr(ldata.data, ldata.size);
128
250
        StringSearch search {&substr};
129
130
521
        for (int i = 0; i < size; ++i) {
131
271
            res[i] = locate_pos<is_ascii>(substr,
132
271
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
271
                                          search, posdata[index_check_const<pos_const>(i)]);
134
271
        }
135
250
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
64
                       size_t size) const {
126
64
        res.resize(size);
127
64
        StringRef substr(ldata.data, ldata.size);
128
64
        StringSearch search {&substr};
129
130
149
        for (int i = 0; i < size; ++i) {
131
85
            res[i] = locate_pos<is_ascii>(substr,
132
85
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
85
                                          search, posdata[index_check_const<pos_const>(i)]);
134
85
        }
135
64
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
136
137
    template <bool is_ascii, bool str_const, bool pos_const>
138
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
139
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
140
401
                       size_t size) const {
141
401
        res.resize(size);
142
401
        StringSearch search;
143
984
        for (int i = 0; i < size; ++i) {
144
583
            StringRef substr = col_left->get_data_at(i);
145
583
            search.set_pattern(&substr);
146
583
            res[i] = locate_pos<is_ascii>(substr,
147
583
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
583
                                          search, posdata[index_check_const<pos_const>(i)]);
149
583
        }
150
401
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
39
                       size_t size) const {
141
39
        res.resize(size);
142
39
        StringSearch search;
143
103
        for (int i = 0; i < size; ++i) {
144
64
            StringRef substr = col_left->get_data_at(i);
145
64
            search.set_pattern(&substr);
146
64
            res[i] = locate_pos<is_ascii>(substr,
147
64
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
64
                                          search, posdata[index_check_const<pos_const>(i)]);
149
64
        }
150
39
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
116
                       size_t size) const {
141
116
        res.resize(size);
142
116
        StringSearch search;
143
389
        for (int i = 0; i < size; ++i) {
144
273
            StringRef substr = col_left->get_data_at(i);
145
273
            search.set_pattern(&substr);
146
273
            res[i] = locate_pos<is_ascii>(substr,
147
273
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
273
                                          search, posdata[index_check_const<pos_const>(i)]);
149
273
        }
150
116
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
151
152
    template <bool is_ascii>
153
854
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
854
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
13
            return 1;
159
13
        }
160
841
        if (is_ascii) {
161
645
            return locate_pos_ascii(substr, str, search, start_pos);
162
645
        } else {
163
196
            return locate_pos_utf8(substr, str, search, start_pos);
164
196
        }
165
841
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
153
196
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
196
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
0
            return 1;
159
0
        }
160
196
        if (is_ascii) {
161
0
            return locate_pos_ascii(substr, str, search, start_pos);
162
196
        } else {
163
196
            return locate_pos_utf8(substr, str, search, start_pos);
164
196
        }
165
196
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
153
658
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
658
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
13
            return 1;
159
13
        }
160
645
        if (is_ascii) {
161
645
            return locate_pos_ascii(substr, str, search, start_pos);
162
645
        } else {
163
0
            return locate_pos_utf8(substr, str, search, start_pos);
164
0
        }
165
645
    }
166
167
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
168
196
                        int start_pos) const {
169
196
        std::vector<size_t> index;
170
196
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
171
196
        if (start_pos <= 0 || start_pos > char_len) {
172
49
            return 0;
173
49
        }
174
147
        if (substr.size == 0) {
175
18
            return start_pos;
176
18
        }
177
        // Input start_pos starts from 1.
178
129
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
179
129
        int32_t match_pos = search.search(&adjusted_str);
180
129
        if (match_pos >= 0) {
181
            // Hive returns the position in the original string starting from 1.
182
111
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
183
111
        } else {
184
18
            return 0;
185
18
        }
186
129
    }
187
188
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
189
645
                         int start_pos) const {
190
645
        if (start_pos <= 0 || start_pos > str.size) {
191
412
            return 0;
192
412
        }
193
233
        if (substr.size == 0) {
194
38
            return start_pos;
195
38
        }
196
        // Input start_pos starts from 1.
197
195
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
198
195
        int32_t match_pos = search.search(&adjusted_str);
199
195
        if (match_pos >= 0) {
200
            // Hive returns the position in the original string starting from 1.
201
62
            return start_pos + match_pos;
202
133
        } else {
203
133
            return 0;
204
133
        }
205
195
    }
206
};
207
208
class FunctionSplitPart : public IFunction {
209
public:
210
    static constexpr auto name = "split_part";
211
145
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
212
1
    String get_name() const override { return name; }
213
136
    size_t get_number_of_arguments() const override { return 3; }
214
215
136
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
216
136
        return make_nullable(std::make_shared<DataTypeString>());
217
136
    }
218
219
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
220
153
                        uint32_t result, size_t input_rows_count) const override {
221
153
        DCHECK_EQ(arguments.size(), 3);
222
223
153
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
224
        // Create a zero column to simply implement
225
153
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
226
153
        auto res = ColumnString::create();
227
228
153
        auto& null_map_data = null_map->get_data();
229
153
        auto& res_offsets = res->get_offsets();
230
153
        auto& res_chars = res->get_chars();
231
153
        res_offsets.resize(input_rows_count);
232
233
153
        const size_t argument_size = arguments.size();
234
153
        std::vector<ColumnPtr> argument_columns(argument_size);
235
612
        for (size_t i = 0; i < argument_size; ++i) {
236
459
            argument_columns[i] =
237
459
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
238
459
            if (const auto* nullable =
239
459
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
240
                // Danger: Here must dispose the null map data first! Because
241
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
242
                // of column nullable mem of null map
243
0
                VectorizedUtils::update_null_map(null_map->get_data(),
244
0
                                                 nullable->get_null_map_data());
245
0
                argument_columns[i] = nullable->get_nested_column_ptr();
246
0
            }
247
459
        }
248
249
153
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
250
251
153
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
252
253
153
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
254
153
        const auto& part_num_col_data = part_num_col->get_data();
255
256
400
        for (size_t i = 0; i < input_rows_count; ++i) {
257
247
            if (part_num_col_data[i] == 0) {
258
11
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
259
11
                continue;
260
11
            }
261
262
236
            auto delimiter = delimiter_col->get_data_at(i);
263
236
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
264
236
            auto part_number = part_num_col_data[i];
265
236
            auto str = str_col->get_data_at(i);
266
236
            if (delimiter.size == 0) {
267
9
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
9
                continue;
269
9
            }
270
271
227
            if (part_number > 0) {
272
188
                if (delimiter.size == 1) {
273
                    // If delimiter is a char, use memchr to split
274
155
                    int32_t pre_offset = -1;
275
155
                    int32_t offset = -1;
276
155
                    int32_t num = 0;
277
261
                    while (num < part_number) {
278
217
                        pre_offset = offset;
279
217
                        size_t n = str.size - offset - 1;
280
217
                        const char* pos = reinterpret_cast<const char*>(
281
217
                                memchr(str.data + offset + 1, delimiter_str[0], n));
282
217
                        if (pos != nullptr) {
283
106
                            offset = pos - str.data;
284
106
                            num++;
285
111
                        } else {
286
111
                            offset = str.size;
287
111
                            num = (num == 0) ? 0 : num + 1;
288
111
                            break;
289
111
                        }
290
217
                    }
291
292
155
                    if (num == part_number) {
293
71
                        StringOP::push_value_string(
294
71
                                std::string_view {
295
71
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
296
71
                                        (size_t)offset - pre_offset - 1},
297
71
                                i, res_chars, res_offsets);
298
84
                    } else {
299
84
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
300
84
                    }
301
155
                } else {
302
                    // If delimiter is a string, use memmem to split
303
33
                    int32_t pre_offset = -delimiter.size;
304
33
                    int32_t offset = -delimiter.size;
305
33
                    int32_t num = 0;
306
68
                    while (num < part_number) {
307
54
                        pre_offset = offset;
308
54
                        size_t n = str.size - offset - delimiter.size;
309
54
                        char* pos =
310
54
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
311
54
                                                               n, delimiter.data, delimiter.size));
312
54
                        if (pos != nullptr) {
313
35
                            offset = pos - str.data;
314
35
                            num++;
315
35
                        } else {
316
19
                            offset = str.size;
317
19
                            num = (num == 0) ? 0 : num + 1;
318
19
                            break;
319
19
                        }
320
54
                    }
321
322
33
                    if (num == part_number) {
323
27
                        StringOP::push_value_string(
324
27
                                std::string_view {reinterpret_cast<const char*>(
325
27
                                                          str.data + pre_offset + delimiter.size),
326
27
                                                  (size_t)offset - pre_offset - delimiter.size},
327
27
                                i, res_chars, res_offsets);
328
27
                    } else {
329
6
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
330
6
                    }
331
33
                }
332
188
            } else {
333
39
                part_number = -part_number;
334
39
                auto str_str = str.to_string();
335
39
                int32_t offset = str.size;
336
39
                int32_t pre_offset = offset;
337
39
                int32_t num = 0;
338
39
                auto substr = str_str;
339
83
                while (num <= part_number && offset >= 0) {
340
83
                    offset = (int)substr.rfind(delimiter, offset);
341
83
                    if (offset != -1) {
342
62
                        if (++num == part_number) {
343
18
                            break;
344
18
                        }
345
44
                        pre_offset = offset;
346
44
                        offset = offset - 1;
347
44
                        substr = str_str.substr(0, pre_offset);
348
44
                    } else {
349
21
                        break;
350
21
                    }
351
83
                }
352
39
                num = (offset == -1 && num != 0) ? num + 1 : num;
353
354
39
                if (num == part_number) {
355
24
                    if (offset == -1) {
356
6
                        StringOP::push_value_string(
357
6
                                std::string_view {reinterpret_cast<const char*>(str.data),
358
6
                                                  (size_t)pre_offset},
359
6
                                i, res_chars, res_offsets);
360
18
                    } else {
361
18
                        StringOP::push_value_string(
362
18
                                std::string_view {str_str.substr(
363
18
                                        offset + delimiter.size,
364
18
                                        (size_t)pre_offset - offset - delimiter.size)},
365
18
                                i, res_chars, res_offsets);
366
18
                    }
367
24
                } else {
368
15
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
369
15
                }
370
39
            }
371
227
        }
372
373
153
        block.get_by_position(result).column =
374
153
                ColumnNullable::create(std::move(res), std::move(null_map));
375
153
        return Status::OK();
376
153
    }
377
};
378
379
class FunctionSubstringIndex : public IFunction {
380
public:
381
    static constexpr auto name = "substring_index";
382
112
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
383
1
    String get_name() const override { return name; }
384
103
    size_t get_number_of_arguments() const override { return 3; }
385
386
103
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
387
103
        return std::make_shared<DataTypeString>();
388
103
    }
389
390
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
391
107
                        uint32_t result, size_t input_rows_count) const override {
392
107
        DCHECK_EQ(arguments.size(), 3);
393
394
        // Create a zero column to simply implement
395
107
        auto res = ColumnString::create();
396
397
107
        auto& res_offsets = res->get_offsets();
398
107
        auto& res_chars = res->get_chars();
399
107
        res_offsets.resize(input_rows_count);
400
107
        ColumnPtr content_column;
401
107
        bool content_const = false;
402
107
        std::tie(content_column, content_const) =
403
107
                unpack_if_const(block.get_by_position(arguments[0]).column);
404
405
107
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
406
407
        // Handle both constant and non-constant delimiter parameters
408
107
        ColumnPtr delimiter_column_ptr;
409
107
        bool delimiter_const = false;
410
107
        std::tie(delimiter_column_ptr, delimiter_const) =
411
107
                unpack_if_const(block.get_by_position(arguments[1]).column);
412
107
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
413
414
107
        ColumnPtr part_num_column_ptr;
415
107
        bool part_num_const = false;
416
107
        std::tie(part_num_column_ptr, part_num_const) =
417
107
                unpack_if_const(block.get_by_position(arguments[2]).column);
418
107
        const ColumnInt32* part_num_col =
419
107
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
420
421
        // For constant multi-character delimiters, create StringRef and StringSearch only once
422
107
        std::optional<StringRef> const_delimiter_ref;
423
107
        std::optional<StringSearch> const_search;
424
107
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
425
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
426
0
            const_search.emplace(&const_delimiter_ref.value());
427
0
        }
428
429
297
        for (size_t i = 0; i < input_rows_count; ++i) {
430
190
            auto str = str_col->get_data_at(content_const ? 0 : i);
431
190
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
432
190
            int32_t delimiter_size = delimiter.size;
433
434
190
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
435
436
190
            if (part_number == 0 || delimiter_size == 0) {
437
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
438
7
                continue;
439
7
            }
440
441
183
            if (part_number > 0) {
442
128
                if (delimiter_size == 1) {
443
85
                    int32_t offset = -1;
444
85
                    int32_t num = 0;
445
137
                    while (num < part_number) {
446
117
                        size_t n = str.size - offset - 1;
447
117
                        const char* pos = reinterpret_cast<const char*>(
448
117
                                memchr(str.data + offset + 1, delimiter.data[0], n));
449
117
                        if (pos != nullptr) {
450
52
                            offset = pos - str.data;
451
52
                            num++;
452
65
                        } else {
453
65
                            offset = str.size;
454
65
                            num = (num == 0) ? 0 : num + 1;
455
65
                            break;
456
65
                        }
457
117
                    }
458
459
85
                    if (num == part_number) {
460
25
                        StringOP::push_value_string(
461
25
                                std::string_view {reinterpret_cast<const char*>(str.data),
462
25
                                                  (size_t)offset},
463
25
                                i, res_chars, res_offsets);
464
60
                    } else {
465
60
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
466
60
                                                    res_chars, res_offsets);
467
60
                    }
468
85
                } else {
469
                    // For multi-character delimiters
470
                    // Use pre-created StringRef and StringSearch for constant delimiters
471
43
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
472
43
                                                                  : StringRef(delimiter);
473
43
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
474
43
                    StringSearch local_search(&delimiter_ref);
475
43
                    if (!search_ptr) {
476
43
                        search_ptr = &local_search;
477
43
                    }
478
479
43
                    int32_t offset = -delimiter_size;
480
43
                    int32_t num = 0;
481
86
                    while (num < part_number) {
482
59
                        size_t n = str.size - offset - delimiter_size;
483
                        // search first match delimter_ref index from src string among str_offset to end
484
59
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
485
59
                        if (pos < str.data + str.size) {
486
43
                            offset = pos - str.data;
487
43
                            num++;
488
43
                        } else {
489
16
                            offset = str.size;
490
16
                            num = (num == 0) ? 0 : num + 1;
491
16
                            break;
492
16
                        }
493
59
                    }
494
495
43
                    if (num == part_number) {
496
40
                        StringOP::push_value_string(
497
40
                                std::string_view {reinterpret_cast<const char*>(str.data),
498
40
                                                  (size_t)offset},
499
40
                                i, res_chars, res_offsets);
500
40
                    } else {
501
3
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
502
3
                                                    res_chars, res_offsets);
503
3
                    }
504
43
                }
505
128
            } else {
506
55
                int neg_part_number = -part_number;
507
55
                auto str_str = str.to_string();
508
55
                int32_t offset = str.size;
509
55
                int32_t pre_offset = offset;
510
55
                int32_t num = 0;
511
55
                auto substr = str_str;
512
513
                // Use pre-created StringRef for constant delimiters
514
55
                StringRef delimiter_str =
515
55
                        const_delimiter_ref
516
55
                                ? const_delimiter_ref.value()
517
55
                                : StringRef(reinterpret_cast<const char*>(delimiter.data),
518
55
                                            delimiter.size);
519
520
79
                while (num <= neg_part_number && offset >= 0) {
521
79
                    offset = (int)substr.rfind(delimiter_str, offset);
522
79
                    if (offset != -1) {
523
63
                        if (++num == neg_part_number) {
524
39
                            break;
525
39
                        }
526
24
                        pre_offset = offset;
527
24
                        offset = offset - 1;
528
24
                        substr = str_str.substr(0, pre_offset);
529
24
                    } else {
530
16
                        break;
531
16
                    }
532
79
                }
533
55
                num = (offset == -1 && num != 0) ? num + 1 : num;
534
535
55
                if (num == neg_part_number) {
536
43
                    if (offset == -1) {
537
4
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
538
4
                                                    res_chars, res_offsets);
539
39
                    } else {
540
39
                        StringOP::push_value_string(
541
39
                                std::string_view {str.data + offset + delimiter_size,
542
39
                                                  str.size - offset - delimiter_size},
543
39
                                i, res_chars, res_offsets);
544
39
                    }
545
43
                } else {
546
12
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
547
12
                                                res_offsets);
548
12
                }
549
55
            }
550
183
        }
551
552
107
        block.get_by_position(result).column = std::move(res);
553
107
        return Status::OK();
554
107
    }
555
};
556
557
class FunctionSplitByString : public IFunction {
558
public:
559
    static constexpr auto name = "split_by_string";
560
561
135
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
562
    using NullMapType = PaddedPODArray<UInt8>;
563
564
1
    String get_name() const override { return name; }
565
566
127
    bool is_variadic() const override { return false; }
567
568
126
    size_t get_number_of_arguments() const override { return 2; }
569
570
126
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
571
126
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
572
0
                << "first argument for function: " << name << " should be string"
573
0
                << " and arguments[0] is " << arguments[0]->get_name();
574
126
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
575
0
                << "second argument for function: " << name << " should be string"
576
0
                << " and arguments[1] is " << arguments[1]->get_name();
577
126
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
578
126
    }
579
580
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
581
157
                        uint32_t result, size_t input_rows_count) const override {
582
157
        DCHECK_EQ(arguments.size(), 2);
583
584
157
        const auto& [src_column, left_const] =
585
157
                unpack_if_const(block.get_by_position(arguments[0]).column);
586
157
        const auto& [right_column, right_const] =
587
157
                unpack_if_const(block.get_by_position(arguments[1]).column);
588
589
157
        DataTypePtr right_column_type = block.get_by_position(arguments[1]).type;
590
157
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
591
157
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
592
157
                                                   ColumnArray::ColumnOffsets::create());
593
594
157
        dest_column_ptr->resize(0);
595
157
        auto& dest_offsets = dest_column_ptr->get_offsets();
596
597
157
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
598
157
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
599
600
157
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
601
602
157
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
603
604
157
        std::visit(
605
157
                [&](auto src_const, auto delimiter_const) {
606
157
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
607
157
                                                         *dest_nested_column, dest_offsets,
608
157
                                                         input_rows_count);
609
157
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_
Line
Count
Source
605
55
                [&](auto src_const, auto delimiter_const) {
606
55
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
607
55
                                                         *dest_nested_column, dest_offsets,
608
55
                                                         input_rows_count);
609
55
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_
Line
Count
Source
605
94
                [&](auto src_const, auto delimiter_const) {
606
94
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
607
94
                                                         *dest_nested_column, dest_offsets,
608
94
                                                         input_rows_count);
609
94
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_
Line
Count
Source
605
8
                [&](auto src_const, auto delimiter_const) {
606
8
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
607
8
                                                         *dest_nested_column, dest_offsets,
608
8
                                                         input_rows_count);
609
8
                },
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_
610
157
                make_bool_variant(left_const), make_bool_variant(right_const));
611
612
        // all elements in dest_nested_column are not null
613
157
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
614
157
                                                                       false);
615
157
        block.replace_by_position(result, std::move(dest_column_ptr));
616
617
157
        return Status::OK();
618
157
    }
619
620
private:
621
    template <bool src_const, bool delimiter_const>
622
    void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column,
623
                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
624
157
                  size_t size) const {
625
157
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
626
157
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
627
157
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
628
157
        column_string_chars.reserve(0);
629
630
157
        ColumnArray::Offset64 string_pos = 0;
631
157
        ColumnArray::Offset64 dest_pos = 0;
632
633
157
        StringSearch search;
634
157
        StringRef delimiter_ref_for_search;
635
636
157
        if constexpr (delimiter_const) {
637
94
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
638
94
            search.set_pattern(&delimiter_ref_for_search);
639
94
        }
640
641
1.24k
        for (size_t i = 0; i < size; i++) {
642
1.08k
            const StringRef str_ref =
643
1.08k
                    src_column_string.get_data_at(index_check_const<src_const>(i));
644
1.08k
            const StringRef delimiter_ref =
645
1.08k
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
646
647
1.08k
            if (str_ref.size == 0) {
648
164
                dest_offsets.push_back(dest_pos);
649
164
                continue;
650
164
            }
651
919
            if (delimiter_ref.size == 0) {
652
27
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
653
27
                                      string_pos, dest_pos);
654
892
            } else {
655
892
                if constexpr (!delimiter_const) {
656
51
                    search.set_pattern(&delimiter_ref);
657
51
                }
658
51.7k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
659
50.8k
                    const size_t str_offset = str_pos;
660
50.8k
                    const size_t old_size = column_string_chars.size();
661
                    // search first match delimter_ref index from src string among str_offset to end
662
50.8k
                    const char* result_start =
663
50.8k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
664
                    // compute split part size
665
50.8k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
666
                    // save dist string split part
667
50.8k
                    if (split_part_size > 0) {
668
50.2k
                        const size_t new_size = old_size + split_part_size;
669
50.2k
                        column_string_chars.resize(new_size);
670
50.2k
                        memcpy_small_allow_read_write_overflow15(
671
50.2k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
672
50.2k
                                split_part_size);
673
                        // add dist string offset
674
50.2k
                        string_pos += split_part_size;
675
50.2k
                    }
676
50.8k
                    column_string_offsets.push_back(string_pos);
677
                    // array offset + 1
678
50.8k
                    dest_pos++;
679
                    // add src string str_pos to next search start
680
50.8k
                    str_pos += split_part_size + delimiter_ref.size;
681
50.8k
                }
682
892
            }
683
919
            dest_offsets.push_back(dest_pos);
684
919
        }
685
157
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
624
55
                  size_t size) const {
625
55
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
626
55
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
627
55
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
628
55
        column_string_chars.reserve(0);
629
630
55
        ColumnArray::Offset64 string_pos = 0;
631
55
        ColumnArray::Offset64 dest_pos = 0;
632
633
55
        StringSearch search;
634
55
        StringRef delimiter_ref_for_search;
635
636
        if constexpr (delimiter_const) {
637
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
638
            search.set_pattern(&delimiter_ref_for_search);
639
        }
640
641
130
        for (size_t i = 0; i < size; i++) {
642
75
            const StringRef str_ref =
643
75
                    src_column_string.get_data_at(index_check_const<src_const>(i));
644
75
            const StringRef delimiter_ref =
645
75
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
646
647
75
            if (str_ref.size == 0) {
648
13
                dest_offsets.push_back(dest_pos);
649
13
                continue;
650
13
            }
651
62
            if (delimiter_ref.size == 0) {
652
11
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
653
11
                                      string_pos, dest_pos);
654
51
            } else {
655
51
                if constexpr (!delimiter_const) {
656
51
                    search.set_pattern(&delimiter_ref);
657
51
                }
658
214
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
659
163
                    const size_t str_offset = str_pos;
660
163
                    const size_t old_size = column_string_chars.size();
661
                    // search first match delimter_ref index from src string among str_offset to end
662
163
                    const char* result_start =
663
163
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
664
                    // compute split part size
665
163
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
666
                    // save dist string split part
667
163
                    if (split_part_size > 0) {
668
122
                        const size_t new_size = old_size + split_part_size;
669
122
                        column_string_chars.resize(new_size);
670
122
                        memcpy_small_allow_read_write_overflow15(
671
122
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
672
122
                                split_part_size);
673
                        // add dist string offset
674
122
                        string_pos += split_part_size;
675
122
                    }
676
163
                    column_string_offsets.push_back(string_pos);
677
                    // array offset + 1
678
163
                    dest_pos++;
679
                    // add src string str_pos to next search start
680
163
                    str_pos += split_part_size + delimiter_ref.size;
681
163
                }
682
51
            }
683
62
            dest_offsets.push_back(dest_pos);
684
62
        }
685
55
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
624
94
                  size_t size) const {
625
94
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
626
94
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
627
94
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
628
94
        column_string_chars.reserve(0);
629
630
94
        ColumnArray::Offset64 string_pos = 0;
631
94
        ColumnArray::Offset64 dest_pos = 0;
632
633
94
        StringSearch search;
634
94
        StringRef delimiter_ref_for_search;
635
636
94
        if constexpr (delimiter_const) {
637
94
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
638
94
            search.set_pattern(&delimiter_ref_for_search);
639
94
        }
640
641
1.07k
        for (size_t i = 0; i < size; i++) {
642
984
            const StringRef str_ref =
643
984
                    src_column_string.get_data_at(index_check_const<src_const>(i));
644
984
            const StringRef delimiter_ref =
645
984
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
646
647
984
            if (str_ref.size == 0) {
648
135
                dest_offsets.push_back(dest_pos);
649
135
                continue;
650
135
            }
651
849
            if (delimiter_ref.size == 0) {
652
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
653
8
                                      string_pos, dest_pos);
654
841
            } else {
655
                if constexpr (!delimiter_const) {
656
                    search.set_pattern(&delimiter_ref);
657
                }
658
51.5k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
659
50.7k
                    const size_t str_offset = str_pos;
660
50.7k
                    const size_t old_size = column_string_chars.size();
661
                    // search first match delimter_ref index from src string among str_offset to end
662
50.7k
                    const char* result_start =
663
50.7k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
664
                    // compute split part size
665
50.7k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
666
                    // save dist string split part
667
50.7k
                    if (split_part_size > 0) {
668
50.1k
                        const size_t new_size = old_size + split_part_size;
669
50.1k
                        column_string_chars.resize(new_size);
670
50.1k
                        memcpy_small_allow_read_write_overflow15(
671
50.1k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
672
50.1k
                                split_part_size);
673
                        // add dist string offset
674
50.1k
                        string_pos += split_part_size;
675
50.1k
                    }
676
50.7k
                    column_string_offsets.push_back(string_pos);
677
                    // array offset + 1
678
50.7k
                    dest_pos++;
679
                    // add src string str_pos to next search start
680
50.7k
                    str_pos += split_part_size + delimiter_ref.size;
681
50.7k
                }
682
841
            }
683
849
            dest_offsets.push_back(dest_pos);
684
849
        }
685
94
    }
_ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
624
8
                  size_t size) const {
625
8
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
626
8
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
627
8
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
628
8
        column_string_chars.reserve(0);
629
630
8
        ColumnArray::Offset64 string_pos = 0;
631
8
        ColumnArray::Offset64 dest_pos = 0;
632
633
8
        StringSearch search;
634
8
        StringRef delimiter_ref_for_search;
635
636
        if constexpr (delimiter_const) {
637
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
638
            search.set_pattern(&delimiter_ref_for_search);
639
        }
640
641
32
        for (size_t i = 0; i < size; i++) {
642
24
            const StringRef str_ref =
643
24
                    src_column_string.get_data_at(index_check_const<src_const>(i));
644
24
            const StringRef delimiter_ref =
645
24
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
646
647
24
            if (str_ref.size == 0) {
648
16
                dest_offsets.push_back(dest_pos);
649
16
                continue;
650
16
            }
651
8
            if (delimiter_ref.size == 0) {
652
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
653
8
                                      string_pos, dest_pos);
654
8
            } else {
655
0
                if constexpr (!delimiter_const) {
656
0
                    search.set_pattern(&delimiter_ref);
657
0
                }
658
0
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
659
0
                    const size_t str_offset = str_pos;
660
0
                    const size_t old_size = column_string_chars.size();
661
                    // search first match delimter_ref index from src string among str_offset to end
662
0
                    const char* result_start =
663
0
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
664
                    // compute split part size
665
0
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
666
                    // save dist string split part
667
0
                    if (split_part_size > 0) {
668
0
                        const size_t new_size = old_size + split_part_size;
669
0
                        column_string_chars.resize(new_size);
670
0
                        memcpy_small_allow_read_write_overflow15(
671
0
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
672
0
                                split_part_size);
673
                        // add dist string offset
674
0
                        string_pos += split_part_size;
675
0
                    }
676
0
                    column_string_offsets.push_back(string_pos);
677
                    // array offset + 1
678
0
                    dest_pos++;
679
                    // add src string str_pos to next search start
680
0
                    str_pos += split_part_size + delimiter_ref.size;
681
0
                }
682
0
            }
683
8
            dest_offsets.push_back(dest_pos);
684
8
        }
685
8
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
686
687
    void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars,
688
                               ColumnString::Offsets& column_string_offsets,
689
                               ColumnArray::Offset64& string_pos,
690
27
                               ColumnArray::Offset64& dest_pos) const {
691
27
        const size_t old_size = column_string_chars.size();
692
27
        const size_t new_size = old_size + str_ref.size;
693
27
        column_string_chars.resize(new_size);
694
27
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
695
27
        if (simd::VStringFunctions::is_ascii(str_ref)) {
696
24
            const auto size = str_ref.size;
697
698
24
            const auto nested_old_size = column_string_offsets.size();
699
24
            const auto nested_new_size = nested_old_size + size;
700
24
            column_string_offsets.resize(nested_new_size);
701
24
            std::iota(column_string_offsets.data() + nested_old_size,
702
24
                      column_string_offsets.data() + nested_new_size, string_pos + 1);
703
704
24
            string_pos += size;
705
24
            dest_pos += size;
706
            // The above code is equivalent to the code in the following comment.
707
            // for (size_t i = 0; i < str_ref.size; i++) {
708
            //     string_pos++;
709
            //     column_string_offsets.push_back(string_pos);
710
            //     (*dest_nested_null_map).push_back(false);
711
            //     dest_pos++;
712
            // }
713
24
        } else {
714
22
            for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
715
19
                utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
716
717
19
                string_pos += utf8_char_len;
718
19
                column_string_offsets.push_back(string_pos);
719
19
                dest_pos++;
720
19
            }
721
3
        }
722
27
    }
723
};
724
725
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
726
727
template <FunctionCountSubStringType type>
728
class FunctionCountSubString : public IFunction {
729
public:
730
    static constexpr auto name = "count_substrings";
731
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
732
733
283
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
733
77
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
733
206
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
734
    using NullMapType = PaddedPODArray<UInt8>;
735
736
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
737
738
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
739
740
265
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
741
265
        return std::make_shared<DataTypeInt32>();
742
265
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
740
68
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
741
68
        return std::make_shared<DataTypeInt32>();
742
68
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
740
197
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
741
197
        return std::make_shared<DataTypeInt32>();
742
197
    }
743
744
16
    DataTypes get_variadic_argument_types_impl() const override {
745
16
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
746
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
747
8
        } else {
748
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
749
8
                    std::make_shared<DataTypeInt32>()};
750
8
        }
751
16
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
744
8
    DataTypes get_variadic_argument_types_impl() const override {
745
8
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
746
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
747
        } else {
748
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
749
                    std::make_shared<DataTypeInt32>()};
750
        }
751
8
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
744
8
    DataTypes get_variadic_argument_types_impl() const override {
745
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
746
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
747
8
        } else {
748
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
749
8
                    std::make_shared<DataTypeInt32>()};
750
8
        }
751
8
    }
752
753
267
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
753
69
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
753
198
    bool is_variadic() const override { return true; }
754
755
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
756
234
                        uint32_t result, size_t input_rows_count) const override {
757
234
        DCHECK(arg_count);
758
234
        bool col_const[arg_count];
759
234
        ColumnPtr argument_columns[arg_count];
760
878
        for (int i = 0; i < arg_count; ++i) {
761
644
            std::tie(argument_columns[i], col_const[i]) =
762
644
                    unpack_if_const(block.get_by_position(arguments[i]).column);
763
644
        }
764
765
234
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
766
234
        auto& dest_column_data = dest_column_ptr->get_data();
767
768
234
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
769
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
770
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
771
58
            std::visit(
772
58
                    [&](auto str_const, auto pattern_const) {
773
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
58
                                                           dest_column_data, input_rows_count);
775
58
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
772
32
                    [&](auto str_const, auto pattern_const) {
773
32
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
32
                                                           dest_column_data, input_rows_count);
775
32
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
772
13
                    [&](auto str_const, auto pattern_const) {
773
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
13
                                                           dest_column_data, input_rows_count);
775
13
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
772
13
                    [&](auto str_const, auto pattern_const) {
773
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
13
                                                           dest_column_data, input_rows_count);
775
13
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
776
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
777
176
        } else {
778
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
779
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
780
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
781
176
            std::visit(
782
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
176
                        _execute<str_const, pattern_const, start_pos_const>(
784
176
                                src_column_string, pattern_column, start_pos_column,
785
176
                                dest_column_data, input_rows_count);
786
176
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
782
36
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
36
                        _execute<str_const, pattern_const, start_pos_const>(
784
36
                                src_column_string, pattern_column, start_pos_column,
785
36
                                dest_column_data, input_rows_count);
786
36
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
782
29
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
29
                        _execute<str_const, pattern_const, start_pos_const>(
784
29
                                src_column_string, pattern_column, start_pos_column,
785
29
                                dest_column_data, input_rows_count);
786
29
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
782
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
23
                        _execute<str_const, pattern_const, start_pos_const>(
784
23
                                src_column_string, pattern_column, start_pos_column,
785
23
                                dest_column_data, input_rows_count);
786
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
782
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
22
                        _execute<str_const, pattern_const, start_pos_const>(
784
22
                                src_column_string, pattern_column, start_pos_column,
785
22
                                dest_column_data, input_rows_count);
786
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
787
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
788
176
                    make_bool_variant(col_const[2]));
789
176
        }
790
791
234
        block.replace_by_position(result, std::move(dest_column_ptr));
792
234
        return Status::OK();
793
234
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
756
58
                        uint32_t result, size_t input_rows_count) const override {
757
58
        DCHECK(arg_count);
758
58
        bool col_const[arg_count];
759
58
        ColumnPtr argument_columns[arg_count];
760
174
        for (int i = 0; i < arg_count; ++i) {
761
116
            std::tie(argument_columns[i], col_const[i]) =
762
116
                    unpack_if_const(block.get_by_position(arguments[i]).column);
763
116
        }
764
765
58
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
766
58
        auto& dest_column_data = dest_column_ptr->get_data();
767
768
58
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
769
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
770
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
771
58
            std::visit(
772
58
                    [&](auto str_const, auto pattern_const) {
773
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
58
                                                           dest_column_data, input_rows_count);
775
58
                    },
776
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
777
        } else {
778
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
779
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
780
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
781
            std::visit(
782
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
                        _execute<str_const, pattern_const, start_pos_const>(
784
                                src_column_string, pattern_column, start_pos_column,
785
                                dest_column_data, input_rows_count);
786
                    },
787
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
788
                    make_bool_variant(col_const[2]));
789
        }
790
791
58
        block.replace_by_position(result, std::move(dest_column_ptr));
792
58
        return Status::OK();
793
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
756
176
                        uint32_t result, size_t input_rows_count) const override {
757
176
        DCHECK(arg_count);
758
176
        bool col_const[arg_count];
759
176
        ColumnPtr argument_columns[arg_count];
760
704
        for (int i = 0; i < arg_count; ++i) {
761
528
            std::tie(argument_columns[i], col_const[i]) =
762
528
                    unpack_if_const(block.get_by_position(arguments[i]).column);
763
528
        }
764
765
176
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
766
176
        auto& dest_column_data = dest_column_ptr->get_data();
767
768
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
769
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
770
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
771
            std::visit(
772
                    [&](auto str_const, auto pattern_const) {
773
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
774
                                                           dest_column_data, input_rows_count);
775
                    },
776
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
777
176
        } else {
778
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
779
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
780
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
781
176
            std::visit(
782
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
783
176
                        _execute<str_const, pattern_const, start_pos_const>(
784
176
                                src_column_string, pattern_column, start_pos_column,
785
176
                                dest_column_data, input_rows_count);
786
176
                    },
787
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
788
176
                    make_bool_variant(col_const[2]));
789
176
        }
790
791
176
        block.replace_by_position(result, std::move(dest_column_ptr));
792
176
        return Status::OK();
793
176
    }
794
795
private:
796
    template <bool src_const, bool pattern_const>
797
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
798
58
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
241
        for (size_t i = 0; i < size; i++) {
800
183
            const StringRef str_ref =
801
183
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
183
            const StringRef pattern_ref =
804
183
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
183
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
183
        }
807
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
798
32
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
133
        for (size_t i = 0; i < size; i++) {
800
101
            const StringRef str_ref =
801
101
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
101
            const StringRef pattern_ref =
804
101
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
101
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
101
        }
807
32
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
798
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
54
        for (size_t i = 0; i < size; i++) {
800
41
            const StringRef str_ref =
801
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
41
            const StringRef pattern_ref =
804
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
41
        }
807
13
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
798
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
799
54
        for (size_t i = 0; i < size; i++) {
800
41
            const StringRef str_ref =
801
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
802
803
41
            const StringRef pattern_ref =
804
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
805
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
806
41
        }
807
13
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
808
809
    template <bool src_const, bool pattern_const, bool start_pos_const>
810
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
811
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
812
176
                  size_t size) const {
813
411
        for (size_t i = 0; i < size; i++) {
814
235
            const StringRef str_ref =
815
235
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
235
            const StringRef pattern_ref =
817
235
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
235
            int32_t start_pos =
820
235
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
235
            const char* p = str_ref.begin();
823
235
            const char* end = str_ref.end();
824
235
            int char_size = 0;
825
1.47k
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
1.24k
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
1.24k
            }
828
235
            const auto start_byte_len = p - str_ref.begin();
829
830
235
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
134
                dest_column_data[i] = 0;
832
134
            } else {
833
101
                dest_column_data[i] =
834
101
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
101
            }
836
235
        }
837
176
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
36
                  size_t size) const {
813
97
        for (size_t i = 0; i < size; i++) {
814
61
            const StringRef str_ref =
815
61
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
61
            const StringRef pattern_ref =
817
61
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
61
            int32_t start_pos =
820
61
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
61
            const char* p = str_ref.begin();
823
61
            const char* end = str_ref.end();
824
61
            int char_size = 0;
825
456
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
395
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
395
            }
828
61
            const auto start_byte_len = p - str_ref.begin();
829
830
61
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
38
                dest_column_data[i] = 0;
832
38
            } else {
833
23
                dest_column_data[i] =
834
23
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
23
            }
836
61
        }
837
36
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
29
                  size_t size) const {
813
78
        for (size_t i = 0; i < size; i++) {
814
49
            const StringRef str_ref =
815
49
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
49
            const StringRef pattern_ref =
817
49
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
49
            int32_t start_pos =
820
49
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
49
            const char* p = str_ref.begin();
823
49
            const char* end = str_ref.end();
824
49
            int char_size = 0;
825
242
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
193
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
193
            }
828
49
            const auto start_byte_len = p - str_ref.begin();
829
830
49
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
22
                dest_column_data[i] = 0;
832
27
            } else {
833
27
                dest_column_data[i] =
834
27
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
27
            }
836
49
        }
837
29
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
23
                  size_t size) const {
813
60
        for (size_t i = 0; i < size; i++) {
814
37
            const StringRef str_ref =
815
37
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
37
            const StringRef pattern_ref =
817
37
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
37
            int32_t start_pos =
820
37
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
37
            const char* p = str_ref.begin();
823
37
            const char* end = str_ref.end();
824
37
            int char_size = 0;
825
177
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
140
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
140
            }
828
37
            const auto start_byte_len = p - str_ref.begin();
829
830
37
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
18
                dest_column_data[i] = 0;
832
19
            } else {
833
19
                dest_column_data[i] =
834
19
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
19
            }
836
37
        }
837
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
812
22
                  size_t size) const {
813
44
        for (size_t i = 0; i < size; i++) {
814
22
            const StringRef str_ref =
815
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
816
22
            const StringRef pattern_ref =
817
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
818
            // 1-based index
819
22
            int32_t start_pos =
820
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
821
822
22
            const char* p = str_ref.begin();
823
22
            const char* end = str_ref.end();
824
22
            int char_size = 0;
825
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
826
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
827
129
            }
828
22
            const auto start_byte_len = p - str_ref.begin();
829
830
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
831
14
                dest_column_data[i] = 0;
832
14
            } else {
833
8
                dest_column_data[i] =
834
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
835
8
            }
836
22
        }
837
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
838
839
529
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
840
529
        size_t old_size = pos;
841
529
        size_t str_size = str_ref.size;
842
2.20k
        while (pos < str_size &&
843
2.20k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
844
1.98k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
845
1.67k
            pos++;
846
1.67k
        }
847
529
        return pos - old_size;
848
529
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
839
291
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
840
291
        size_t old_size = pos;
841
291
        size_t str_size = str_ref.size;
842
1.05k
        while (pos < str_size &&
843
1.05k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
844
933
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
845
763
            pos++;
846
763
        }
847
291
        return pos - old_size;
848
291
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
839
238
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
840
238
        size_t old_size = pos;
841
238
        size_t str_size = str_ref.size;
842
1.15k
        while (pos < str_size &&
843
1.15k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
844
1.05k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
845
914
            pos++;
846
914
        }
847
238
        return pos - old_size;
848
238
    }
849
850
284
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
851
284
        int count = 0;
852
284
        if (str_ref.size == 0 || pattern_ref.size == 0) {
853
64
            return 0;
854
220
        } else {
855
529
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
856
529
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
857
529
                if (res_pos == (str_ref.size - str_pos)) {
858
220
                    break; // not find
859
220
                }
860
309
                count++;
861
309
                str_pos = str_pos + res_pos + pattern_ref.size;
862
309
            }
863
220
        }
864
220
        return count;
865
284
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
850
183
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
851
183
        int count = 0;
852
183
        if (str_ref.size == 0 || pattern_ref.size == 0) {
853
62
            return 0;
854
121
        } else {
855
291
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
856
291
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
857
291
                if (res_pos == (str_ref.size - str_pos)) {
858
121
                    break; // not find
859
121
                }
860
170
                count++;
861
170
                str_pos = str_pos + res_pos + pattern_ref.size;
862
170
            }
863
121
        }
864
121
        return count;
865
183
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
850
101
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
851
101
        int count = 0;
852
101
        if (str_ref.size == 0 || pattern_ref.size == 0) {
853
2
            return 0;
854
99
        } else {
855
238
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
856
238
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
857
238
                if (res_pos == (str_ref.size - str_pos)) {
858
99
                    break; // not find
859
99
                }
860
139
                count++;
861
139
                str_pos = str_pos + res_pos + pattern_ref.size;
862
139
            }
863
99
        }
864
99
        return count;
865
101
    }
866
};
867
868
8
void register_function_string_search(SimpleFunctionFactory& factory) {
869
8
    factory.register_function<FunctionStringLocatePos>();
870
8
    factory.register_function<FunctionSplitPart>();
871
8
    factory.register_function<FunctionSplitByString>();
872
8
    factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>();
873
8
    factory.register_function<
874
8
            FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>();
875
8
    factory.register_function<FunctionSubstringIndex>();
876
877
8
    factory.register_alias(FunctionStringLocatePos::name, "position");
878
8
}
879
880
#include "common/compile_check_avoid_end.h"
881
} // namespace doris