Coverage Report

Created: 2026-06-09 15:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_search.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <cstddef>
19
#include <cstring>
20
#include <numeric>
21
#include <string>
22
#include <string_view>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "core/assert_cast.h"
27
#include "core/block/block.h"
28
#include "core/block/column_numbers.h"
29
#include "core/column/column_array.h"
30
#include "core/column/column_const.h"
31
#include "core/column/column_nullable.h"
32
#include "core/column/column_string.h"
33
#include "core/column/column_vector.h"
34
#include "core/data_type/data_type_array.h"
35
#include "core/data_type/data_type_nullable.h"
36
#include "core/data_type/data_type_number.h"
37
#include "core/data_type/data_type_string.h"
38
#include "core/data_type/define_primitive_type.h"
39
#include "core/memcmp_small.h"
40
#include "core/memcpy_small.h"
41
#include "core/pod_array_fwd.h"
42
#include "core/string_ref.h"
43
#include "exec/common/stringop_substring.h"
44
#include "exec/common/template_helpers.hpp"
45
#include "exec/common/util.hpp"
46
#include "exprs/function/function.h"
47
#include "exprs/function/function_helpers.h"
48
#include "exprs/function/simple_function_factory.h"
49
#include "exprs/function_context.h"
50
#include "util/simd/vstring_function.h"
51
#include "util/string_search.hpp"
52
53
namespace doris {
54
#include "common/compile_check_avoid_begin.h"
55
56
class FunctionStringLocatePos : public IFunction {
57
public:
58
    static constexpr auto name = "locate";
59
873
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
60
0
    String get_name() const override { return name; }
61
0
    size_t get_number_of_arguments() const override { return 3; }
62
63
864
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
64
864
        return std::make_shared<DataTypeInt32>();
65
864
    }
66
67
8
    DataTypes get_variadic_argument_types_impl() const override {
68
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
69
8
                std::make_shared<DataTypeInt32>()};
70
8
    }
71
72
865
    bool is_variadic() const override { return true; }
73
74
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
75
650
                        uint32_t result, size_t input_rows_count) const override {
76
650
        if (arguments.size() != 3) {
77
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
78
0
                                           get_name(), arguments.size());
79
0
        }
80
650
        bool col_const[3];
81
650
        ColumnPtr argument_columns[3];
82
2.59k
        for (int i = 0; i < 3; ++i) {
83
1.94k
            std::tie(argument_columns[i], col_const[i]) =
84
1.94k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
85
1.94k
        }
86
87
650
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
88
650
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
89
650
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
90
91
650
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
92
650
        auto& vec_res = col_res->get_data();
93
650
        vec_res.resize(block.rows());
94
95
650
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
96
97
650
        if (col_const[0]) {
98
250
            std::visit(
99
250
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
250
                        scalar_search<is_ascii, str_const, pos_const>(
101
250
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
250
                                input_rows_count);
103
250
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
99
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
22
                        scalar_search<is_ascii, str_const, pos_const>(
101
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
22
                                input_rows_count);
103
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
99
64
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
64
                        scalar_search<is_ascii, str_const, pos_const>(
101
64
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
64
                                input_rows_count);
103
64
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
99
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
100
60
                        scalar_search<is_ascii, str_const, pos_const>(
101
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
102
60
                                input_rows_count);
103
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
104
250
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
105
250
                    make_bool_variant(col_const[2]));
106
107
400
        } else {
108
400
            std::visit(
109
401
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
401
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
401
                                                                      col_pos->get_data(), vec_res,
112
401
                                                                      input_rows_count);
113
401
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
109
39
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
39
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
39
                                                                      col_pos->get_data(), vec_res,
112
39
                                                                      input_rows_count);
113
39
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
109
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
22
                                                                      col_pos->get_data(), vec_res,
112
22
                                                                      input_rows_count);
113
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
109
116
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
116
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
116
                                                                      col_pos->get_data(), vec_res,
112
116
                                                                      input_rows_count);
113
116
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
109
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
110
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
111
60
                                                                      col_pos->get_data(), vec_res,
112
60
                                                                      input_rows_count);
113
60
                    },
114
400
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
115
400
                    make_bool_variant(col_const[2]));
116
400
        }
117
650
        block.replace_by_position(result, std::move(col_res));
118
650
        return Status::OK();
119
650
    }
120
121
private:
122
    template <bool is_ascii, bool str_const, bool pos_const>
123
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
124
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
125
250
                       size_t size) const {
126
250
        res.resize(size);
127
250
        StringRef substr(ldata.data, ldata.size);
128
250
        StringSearch search {&substr};
129
130
521
        for (int i = 0; i < size; ++i) {
131
271
            res[i] = locate_pos<is_ascii>(substr,
132
271
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
271
                                          search, posdata[index_check_const<pos_const>(i)]);
134
271
        }
135
250
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
22
                       size_t size) const {
126
22
        res.resize(size);
127
22
        StringRef substr(ldata.data, ldata.size);
128
22
        StringSearch search {&substr};
129
130
44
        for (int i = 0; i < size; ++i) {
131
22
            res[i] = locate_pos<is_ascii>(substr,
132
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
22
                                          search, posdata[index_check_const<pos_const>(i)]);
134
22
        }
135
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
64
                       size_t size) const {
126
64
        res.resize(size);
127
64
        StringRef substr(ldata.data, ldata.size);
128
64
        StringSearch search {&substr};
129
130
149
        for (int i = 0; i < size; ++i) {
131
85
            res[i] = locate_pos<is_ascii>(substr,
132
85
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
85
                                          search, posdata[index_check_const<pos_const>(i)]);
134
85
        }
135
64
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
125
60
                       size_t size) const {
126
60
        res.resize(size);
127
60
        StringRef substr(ldata.data, ldata.size);
128
60
        StringSearch search {&substr};
129
130
120
        for (int i = 0; i < size; ++i) {
131
60
            res[i] = locate_pos<is_ascii>(substr,
132
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
133
60
                                          search, posdata[index_check_const<pos_const>(i)]);
134
60
        }
135
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
136
137
    template <bool is_ascii, bool str_const, bool pos_const>
138
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
139
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
140
401
                       size_t size) const {
141
401
        res.resize(size);
142
401
        StringSearch search;
143
984
        for (int i = 0; i < size; ++i) {
144
583
            StringRef substr = col_left->get_data_at(i);
145
583
            search.set_pattern(&substr);
146
583
            res[i] = locate_pos<is_ascii>(substr,
147
583
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
583
                                          search, posdata[index_check_const<pos_const>(i)]);
149
583
        }
150
401
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
39
                       size_t size) const {
141
39
        res.resize(size);
142
39
        StringSearch search;
143
103
        for (int i = 0; i < size; ++i) {
144
64
            StringRef substr = col_left->get_data_at(i);
145
64
            search.set_pattern(&substr);
146
64
            res[i] = locate_pos<is_ascii>(substr,
147
64
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
64
                                          search, posdata[index_check_const<pos_const>(i)]);
149
64
        }
150
39
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
22
                       size_t size) const {
141
22
        res.resize(size);
142
22
        StringSearch search;
143
44
        for (int i = 0; i < size; ++i) {
144
22
            StringRef substr = col_left->get_data_at(i);
145
22
            search.set_pattern(&substr);
146
22
            res[i] = locate_pos<is_ascii>(substr,
147
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
22
                                          search, posdata[index_check_const<pos_const>(i)]);
149
22
        }
150
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
116
                       size_t size) const {
141
116
        res.resize(size);
142
116
        StringSearch search;
143
389
        for (int i = 0; i < size; ++i) {
144
273
            StringRef substr = col_left->get_data_at(i);
145
273
            search.set_pattern(&substr);
146
273
            res[i] = locate_pos<is_ascii>(substr,
147
273
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
273
                                          search, posdata[index_check_const<pos_const>(i)]);
149
273
        }
150
116
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
140
60
                       size_t size) const {
141
60
        res.resize(size);
142
60
        StringSearch search;
143
120
        for (int i = 0; i < size; ++i) {
144
60
            StringRef substr = col_left->get_data_at(i);
145
60
            search.set_pattern(&substr);
146
60
            res[i] = locate_pos<is_ascii>(substr,
147
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
148
60
                                          search, posdata[index_check_const<pos_const>(i)]);
149
60
        }
150
60
    }
151
152
    template <bool is_ascii>
153
854
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
854
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
13
            return 1;
159
13
        }
160
841
        if (is_ascii) {
161
645
            return locate_pos_ascii(substr, str, search, start_pos);
162
645
        } else {
163
196
            return locate_pos_utf8(substr, str, search, start_pos);
164
196
        }
165
841
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
153
196
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
196
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
0
            return 1;
159
0
        }
160
196
        if (is_ascii) {
161
0
            return locate_pos_ascii(substr, str, search, start_pos);
162
196
        } else {
163
196
            return locate_pos_utf8(substr, str, search, start_pos);
164
196
        }
165
196
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
153
658
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
154
658
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
155
            // BEHAVIOR COMPATIBLE WITH MYSQL
156
            // locate('','')  locate('','',1) locate('','',2)
157
            // 1  1 0
158
13
            return 1;
159
13
        }
160
645
        if (is_ascii) {
161
645
            return locate_pos_ascii(substr, str, search, start_pos);
162
645
        } else {
163
0
            return locate_pos_utf8(substr, str, search, start_pos);
164
0
        }
165
645
    }
166
167
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
168
196
                        int start_pos) const {
169
196
        std::vector<size_t> index;
170
196
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
171
196
        if (start_pos <= 0 || start_pos > char_len) {
172
49
            return 0;
173
49
        }
174
147
        if (substr.size == 0) {
175
18
            return start_pos;
176
18
        }
177
        // Input start_pos starts from 1.
178
129
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
179
129
        int32_t match_pos = search.search(&adjusted_str);
180
129
        if (match_pos >= 0) {
181
            // Hive returns the position in the original string starting from 1.
182
111
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
183
111
        } else {
184
18
            return 0;
185
18
        }
186
129
    }
187
188
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
189
645
                         int start_pos) const {
190
645
        if (start_pos <= 0 || start_pos > str.size) {
191
412
            return 0;
192
412
        }
193
233
        if (substr.size == 0) {
194
38
            return start_pos;
195
38
        }
196
        // Input start_pos starts from 1.
197
195
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
198
195
        int32_t match_pos = search.search(&adjusted_str);
199
195
        if (match_pos >= 0) {
200
            // Hive returns the position in the original string starting from 1.
201
62
            return start_pos + match_pos;
202
133
        } else {
203
133
            return 0;
204
133
        }
205
195
    }
206
};
207
208
class FunctionSplitPart : public IFunction {
209
public:
210
    static constexpr auto name = "split_part";
211
145
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
212
1
    String get_name() const override { return name; }
213
136
    size_t get_number_of_arguments() const override { return 3; }
214
215
136
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
216
136
        return make_nullable(std::make_shared<DataTypeString>());
217
136
    }
218
219
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
220
151
                        uint32_t result, size_t input_rows_count) const override {
221
151
        DCHECK_EQ(arguments.size(), 3);
222
223
151
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
224
        // Create a zero column to simply implement
225
151
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
226
151
        auto res = ColumnString::create();
227
228
151
        auto& null_map_data = null_map->get_data();
229
151
        auto& res_offsets = res->get_offsets();
230
151
        auto& res_chars = res->get_chars();
231
151
        res_offsets.resize(input_rows_count);
232
233
151
        const size_t argument_size = arguments.size();
234
151
        std::vector<ColumnPtr> argument_columns(argument_size);
235
604
        for (size_t i = 0; i < argument_size; ++i) {
236
453
            argument_columns[i] =
237
453
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
238
453
            if (const auto* nullable =
239
453
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
240
                // Danger: Here must dispose the null map data first! Because
241
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
242
                // of column nullable mem of null map
243
0
                VectorizedUtils::update_null_map(null_map->get_data(),
244
0
                                                 nullable->get_null_map_data());
245
0
                argument_columns[i] = nullable->get_nested_column_ptr();
246
0
            }
247
453
        }
248
249
151
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
250
251
151
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
252
253
151
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
254
151
        const auto& part_num_col_data = part_num_col->get_data();
255
256
396
        for (size_t i = 0; i < input_rows_count; ++i) {
257
245
            if (part_num_col_data[i] == 0) {
258
11
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
259
11
                continue;
260
11
            }
261
262
234
            auto delimiter = delimiter_col->get_data_at(i);
263
234
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
264
234
            auto part_number = part_num_col_data[i];
265
234
            auto str = str_col->get_data_at(i);
266
234
            if (delimiter.size == 0) {
267
9
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
9
                continue;
269
9
            }
270
271
225
            if (part_number > 0) {
272
186
                if (delimiter.size == 1) {
273
                    // If delimiter is a char, use memchr to split
274
153
                    int32_t pre_offset = -1;
275
153
                    int32_t offset = -1;
276
153
                    int32_t num = 0;
277
258
                    while (num < part_number) {
278
215
                        pre_offset = offset;
279
215
                        size_t n = str.size - offset - 1;
280
215
                        const char* pos = reinterpret_cast<const char*>(
281
215
                                memchr(str.data + offset + 1, delimiter_str[0], n));
282
215
                        if (pos != nullptr) {
283
105
                            offset = pos - str.data;
284
105
                            num++;
285
110
                        } else {
286
110
                            offset = str.size;
287
110
                            num = (num == 0) ? 0 : num + 1;
288
110
                            break;
289
110
                        }
290
215
                    }
291
292
153
                    if (num == part_number) {
293
70
                        StringOP::push_value_string(
294
70
                                std::string_view {
295
70
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
296
70
                                        (size_t)offset - pre_offset - 1},
297
70
                                i, res_chars, res_offsets);
298
83
                    } else {
299
83
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
300
83
                    }
301
153
                } else {
302
                    // If delimiter is a string, use memmem to split
303
33
                    int32_t pre_offset = -delimiter.size;
304
33
                    int32_t offset = -delimiter.size;
305
33
                    int32_t num = 0;
306
68
                    while (num < part_number) {
307
54
                        pre_offset = offset;
308
54
                        size_t n = str.size - offset - delimiter.size;
309
54
                        char* pos =
310
54
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
311
54
                                                               n, delimiter.data, delimiter.size));
312
54
                        if (pos != nullptr) {
313
35
                            offset = pos - str.data;
314
35
                            num++;
315
35
                        } else {
316
19
                            offset = str.size;
317
19
                            num = (num == 0) ? 0 : num + 1;
318
19
                            break;
319
19
                        }
320
54
                    }
321
322
33
                    if (num == part_number) {
323
27
                        StringOP::push_value_string(
324
27
                                std::string_view {reinterpret_cast<const char*>(
325
27
                                                          str.data + pre_offset + delimiter.size),
326
27
                                                  (size_t)offset - pre_offset - delimiter.size},
327
27
                                i, res_chars, res_offsets);
328
27
                    } else {
329
6
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
330
6
                    }
331
33
                }
332
186
            } else {
333
39
                part_number = -part_number;
334
39
                auto str_str = str.to_string();
335
39
                int32_t offset = str.size;
336
39
                int32_t pre_offset = offset;
337
39
                int32_t num = 0;
338
39
                auto substr = str_str;
339
83
                while (num <= part_number && offset >= 0) {
340
83
                    offset = (int)substr.rfind(delimiter, offset);
341
83
                    if (offset != -1) {
342
62
                        if (++num == part_number) {
343
18
                            break;
344
18
                        }
345
44
                        pre_offset = offset;
346
44
                        offset = offset - 1;
347
44
                        substr = str_str.substr(0, pre_offset);
348
44
                    } else {
349
21
                        break;
350
21
                    }
351
83
                }
352
39
                num = (offset == -1 && num != 0) ? num + 1 : num;
353
354
39
                if (num == part_number) {
355
24
                    if (offset == -1) {
356
6
                        StringOP::push_value_string(std::string_view {str.data, (size_t)pre_offset},
357
6
                                                    i, res_chars, res_offsets);
358
18
                    } else {
359
18
                        StringOP::push_value_string(
360
18
                                std::string_view {str_str.substr(
361
18
                                        offset + delimiter.size,
362
18
                                        (size_t)pre_offset - offset - delimiter.size)},
363
18
                                i, res_chars, res_offsets);
364
18
                    }
365
24
                } else {
366
15
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
367
15
                }
368
39
            }
369
225
        }
370
371
151
        block.get_by_position(result).column =
372
151
                ColumnNullable::create(std::move(res), std::move(null_map));
373
151
        return Status::OK();
374
151
    }
375
};
376
377
class FunctionSubstringIndex : public IFunction {
378
public:
379
    static constexpr auto name = "substring_index";
380
112
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
381
1
    String get_name() const override { return name; }
382
103
    size_t get_number_of_arguments() const override { return 3; }
383
384
103
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
385
103
        return std::make_shared<DataTypeString>();
386
103
    }
387
388
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
389
107
                        uint32_t result, size_t input_rows_count) const override {
390
107
        DCHECK_EQ(arguments.size(), 3);
391
392
        // Create a zero column to simply implement
393
107
        auto res = ColumnString::create();
394
395
107
        auto& res_offsets = res->get_offsets();
396
107
        auto& res_chars = res->get_chars();
397
107
        res_offsets.resize(input_rows_count);
398
107
        ColumnPtr content_column;
399
107
        bool content_const = false;
400
107
        std::tie(content_column, content_const) =
401
107
                unpack_if_const(block.get_by_position(arguments[0]).column);
402
403
107
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
404
405
        // Handle both constant and non-constant delimiter parameters
406
107
        ColumnPtr delimiter_column_ptr;
407
107
        bool delimiter_const = false;
408
107
        std::tie(delimiter_column_ptr, delimiter_const) =
409
107
                unpack_if_const(block.get_by_position(arguments[1]).column);
410
107
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
411
412
107
        ColumnPtr part_num_column_ptr;
413
107
        bool part_num_const = false;
414
107
        std::tie(part_num_column_ptr, part_num_const) =
415
107
                unpack_if_const(block.get_by_position(arguments[2]).column);
416
107
        const ColumnInt32* part_num_col =
417
107
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
418
419
        // For constant multi-character delimiters, create StringRef and StringSearch only once
420
107
        std::optional<StringRef> const_delimiter_ref;
421
107
        std::optional<StringSearch> const_search;
422
107
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
423
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
424
0
            const_search.emplace(&const_delimiter_ref.value());
425
0
        }
426
427
297
        for (size_t i = 0; i < input_rows_count; ++i) {
428
190
            auto str = str_col->get_data_at(content_const ? 0 : i);
429
190
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
430
190
            int32_t delimiter_size = delimiter.size;
431
432
190
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
433
434
190
            if (part_number == 0 || delimiter_size == 0) {
435
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
436
7
                continue;
437
7
            }
438
439
183
            if (part_number > 0) {
440
128
                if (delimiter_size == 1) {
441
85
                    int32_t offset = -1;
442
85
                    int32_t num = 0;
443
137
                    while (num < part_number) {
444
117
                        size_t n = str.size - offset - 1;
445
117
                        const char* pos = reinterpret_cast<const char*>(
446
117
                                memchr(str.data + offset + 1, delimiter.data[0], n));
447
117
                        if (pos != nullptr) {
448
52
                            offset = pos - str.data;
449
52
                            num++;
450
65
                        } else {
451
65
                            offset = str.size;
452
65
                            num = (num == 0) ? 0 : num + 1;
453
65
                            break;
454
65
                        }
455
117
                    }
456
457
85
                    if (num == part_number) {
458
25
                        StringOP::push_value_string(std::string_view {str.data, (size_t)offset}, i,
459
25
                                                    res_chars, res_offsets);
460
60
                    } else {
461
60
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
462
60
                                                    res_chars, res_offsets);
463
60
                    }
464
85
                } else {
465
                    // For multi-character delimiters
466
                    // Use pre-created StringRef and StringSearch for constant delimiters
467
43
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
468
43
                                                                  : StringRef(delimiter);
469
43
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
470
43
                    StringSearch local_search(&delimiter_ref);
471
43
                    if (!search_ptr) {
472
43
                        search_ptr = &local_search;
473
43
                    }
474
475
43
                    int32_t offset = -delimiter_size;
476
43
                    int32_t num = 0;
477
86
                    while (num < part_number) {
478
59
                        size_t n = str.size - offset - delimiter_size;
479
                        // search first match delimter_ref index from src string among str_offset to end
480
59
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
481
59
                        if (pos < str.data + str.size) {
482
43
                            offset = pos - str.data;
483
43
                            num++;
484
43
                        } else {
485
16
                            offset = str.size;
486
16
                            num = (num == 0) ? 0 : num + 1;
487
16
                            break;
488
16
                        }
489
59
                    }
490
491
43
                    if (num == part_number) {
492
40
                        StringOP::push_value_string(std::string_view {str.data, (size_t)offset}, i,
493
40
                                                    res_chars, res_offsets);
494
40
                    } else {
495
3
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
496
3
                                                    res_chars, res_offsets);
497
3
                    }
498
43
                }
499
128
            } else {
500
55
                int neg_part_number = -part_number;
501
55
                auto str_str = str.to_string();
502
55
                int32_t offset = str.size;
503
55
                int32_t pre_offset = offset;
504
55
                int32_t num = 0;
505
55
                auto substr = str_str;
506
507
                // Use pre-created StringRef for constant delimiters
508
55
                StringRef delimiter_str = const_delimiter_ref
509
55
                                                  ? const_delimiter_ref.value()
510
55
                                                  : StringRef(delimiter.data, delimiter.size);
511
512
79
                while (num <= neg_part_number && offset >= 0) {
513
79
                    offset = (int)substr.rfind(delimiter_str, offset);
514
79
                    if (offset != -1) {
515
63
                        if (++num == neg_part_number) {
516
39
                            break;
517
39
                        }
518
24
                        pre_offset = offset;
519
24
                        offset = offset - 1;
520
24
                        substr = str_str.substr(0, pre_offset);
521
24
                    } else {
522
16
                        break;
523
16
                    }
524
79
                }
525
55
                num = (offset == -1 && num != 0) ? num + 1 : num;
526
527
55
                if (num == neg_part_number) {
528
43
                    if (offset == -1) {
529
4
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
530
4
                                                    res_chars, res_offsets);
531
39
                    } else {
532
39
                        StringOP::push_value_string(
533
39
                                std::string_view {str.data + offset + delimiter_size,
534
39
                                                  str.size - offset - delimiter_size},
535
39
                                i, res_chars, res_offsets);
536
39
                    }
537
43
                } else {
538
12
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
539
12
                                                res_offsets);
540
12
                }
541
55
            }
542
183
        }
543
544
107
        block.get_by_position(result).column = std::move(res);
545
107
        return Status::OK();
546
107
    }
547
};
548
549
class FunctionSplitByString : public IFunction {
550
public:
551
    static constexpr auto name = "split_by_string";
552
553
135
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
554
    using NullMapType = PaddedPODArray<UInt8>;
555
556
1
    String get_name() const override { return name; }
557
558
127
    bool is_variadic() const override { return false; }
559
560
126
    size_t get_number_of_arguments() const override { return 2; }
561
562
126
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
563
126
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
564
0
                << "first argument for function: " << name << " should be string"
565
0
                << " and arguments[0] is " << arguments[0]->get_name();
566
126
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
567
0
                << "second argument for function: " << name << " should be string"
568
0
                << " and arguments[1] is " << arguments[1]->get_name();
569
126
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
570
126
    }
571
572
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
573
166
                        uint32_t result, size_t input_rows_count) const override {
574
166
        DCHECK_EQ(arguments.size(), 2);
575
576
166
        const auto& [src_column, left_const] =
577
166
                unpack_if_const(block.get_by_position(arguments[0]).column);
578
166
        const auto& [right_column, right_const] =
579
166
                unpack_if_const(block.get_by_position(arguments[1]).column);
580
581
166
        DataTypePtr right_column_type = block.get_by_position(arguments[1]).type;
582
166
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
583
166
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
584
166
                                                   ColumnArray::ColumnOffsets::create());
585
586
166
        dest_column_ptr->resize(0);
587
166
        auto& dest_offsets = dest_column_ptr->get_offsets();
588
589
166
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
590
166
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
591
592
166
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
593
594
166
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
595
596
166
        std::visit(
597
166
                [&](auto src_const, auto delimiter_const) {
598
166
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
599
166
                                                         *dest_nested_column, dest_offsets,
600
166
                                                         input_rows_count);
601
166
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_
Line
Count
Source
597
55
                [&](auto src_const, auto delimiter_const) {
598
55
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
599
55
                                                         *dest_nested_column, dest_offsets,
600
55
                                                         input_rows_count);
601
55
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_
Line
Count
Source
597
103
                [&](auto src_const, auto delimiter_const) {
598
103
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
599
103
                                                         *dest_nested_column, dest_offsets,
600
103
                                                         input_rows_count);
601
103
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_
Line
Count
Source
597
8
                [&](auto src_const, auto delimiter_const) {
598
8
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
599
8
                                                         *dest_nested_column, dest_offsets,
600
8
                                                         input_rows_count);
601
8
                },
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_
602
166
                make_bool_variant(left_const), make_bool_variant(right_const));
603
604
        // all elements in dest_nested_column are not null
605
166
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
606
166
                                                                       false);
607
166
        block.replace_by_position(result, std::move(dest_column_ptr));
608
609
166
        return Status::OK();
610
166
    }
611
612
private:
613
    template <bool src_const, bool delimiter_const>
614
    void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column,
615
                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
616
166
                  size_t size) const {
617
166
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
618
166
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
619
166
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
620
166
        column_string_chars.reserve(0);
621
622
166
        ColumnArray::Offset64 string_pos = 0;
623
166
        ColumnArray::Offset64 dest_pos = 0;
624
625
166
        StringSearch search;
626
166
        StringRef delimiter_ref_for_search;
627
628
166
        if constexpr (delimiter_const) {
629
103
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
630
103
            search.set_pattern(&delimiter_ref_for_search);
631
103
        }
632
633
1.25k
        for (size_t i = 0; i < size; i++) {
634
1.08k
            const StringRef str_ref =
635
1.08k
                    src_column_string.get_data_at(index_check_const<src_const>(i));
636
1.08k
            const StringRef delimiter_ref =
637
1.08k
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
638
639
1.08k
            if (str_ref.size == 0) {
640
164
                dest_offsets.push_back(dest_pos);
641
164
                continue;
642
164
            }
643
920
            if (delimiter_ref.size == 0) {
644
27
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
645
27
                                      string_pos, dest_pos);
646
893
            } else {
647
893
                if constexpr (!delimiter_const) {
648
51
                    search.set_pattern(&delimiter_ref);
649
51
                }
650
51.7k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
651
50.8k
                    const size_t str_offset = str_pos;
652
50.8k
                    const size_t old_size = column_string_chars.size();
653
                    // search first match delimter_ref index from src string among str_offset to end
654
50.8k
                    const char* result_start =
655
50.8k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
656
                    // compute split part size
657
50.8k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
658
                    // save dist string split part
659
50.8k
                    if (split_part_size > 0) {
660
50.2k
                        const size_t new_size = old_size + split_part_size;
661
50.2k
                        column_string_chars.resize(new_size);
662
50.2k
                        memcpy_small_allow_read_write_overflow15(
663
50.2k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
664
50.2k
                                split_part_size);
665
                        // add dist string offset
666
50.2k
                        string_pos += split_part_size;
667
50.2k
                    }
668
50.8k
                    column_string_offsets.push_back(string_pos);
669
                    // array offset + 1
670
50.8k
                    dest_pos++;
671
                    // add src string str_pos to next search start
672
50.8k
                    str_pos += split_part_size + delimiter_ref.size;
673
50.8k
                }
674
893
            }
675
920
            dest_offsets.push_back(dest_pos);
676
920
        }
677
166
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
616
55
                  size_t size) const {
617
55
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
618
55
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
619
55
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
620
55
        column_string_chars.reserve(0);
621
622
55
        ColumnArray::Offset64 string_pos = 0;
623
55
        ColumnArray::Offset64 dest_pos = 0;
624
625
55
        StringSearch search;
626
55
        StringRef delimiter_ref_for_search;
627
628
        if constexpr (delimiter_const) {
629
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
630
            search.set_pattern(&delimiter_ref_for_search);
631
        }
632
633
130
        for (size_t i = 0; i < size; i++) {
634
75
            const StringRef str_ref =
635
75
                    src_column_string.get_data_at(index_check_const<src_const>(i));
636
75
            const StringRef delimiter_ref =
637
75
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
638
639
75
            if (str_ref.size == 0) {
640
13
                dest_offsets.push_back(dest_pos);
641
13
                continue;
642
13
            }
643
62
            if (delimiter_ref.size == 0) {
644
11
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
645
11
                                      string_pos, dest_pos);
646
51
            } else {
647
51
                if constexpr (!delimiter_const) {
648
51
                    search.set_pattern(&delimiter_ref);
649
51
                }
650
214
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
651
163
                    const size_t str_offset = str_pos;
652
163
                    const size_t old_size = column_string_chars.size();
653
                    // search first match delimter_ref index from src string among str_offset to end
654
163
                    const char* result_start =
655
163
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
656
                    // compute split part size
657
163
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
658
                    // save dist string split part
659
163
                    if (split_part_size > 0) {
660
122
                        const size_t new_size = old_size + split_part_size;
661
122
                        column_string_chars.resize(new_size);
662
122
                        memcpy_small_allow_read_write_overflow15(
663
122
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
664
122
                                split_part_size);
665
                        // add dist string offset
666
122
                        string_pos += split_part_size;
667
122
                    }
668
163
                    column_string_offsets.push_back(string_pos);
669
                    // array offset + 1
670
163
                    dest_pos++;
671
                    // add src string str_pos to next search start
672
163
                    str_pos += split_part_size + delimiter_ref.size;
673
163
                }
674
51
            }
675
62
            dest_offsets.push_back(dest_pos);
676
62
        }
677
55
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
616
103
                  size_t size) const {
617
103
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
618
103
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
619
103
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
620
103
        column_string_chars.reserve(0);
621
622
103
        ColumnArray::Offset64 string_pos = 0;
623
103
        ColumnArray::Offset64 dest_pos = 0;
624
625
103
        StringSearch search;
626
103
        StringRef delimiter_ref_for_search;
627
628
103
        if constexpr (delimiter_const) {
629
103
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
630
103
            search.set_pattern(&delimiter_ref_for_search);
631
103
        }
632
633
1.08k
        for (size_t i = 0; i < size; i++) {
634
985
            const StringRef str_ref =
635
985
                    src_column_string.get_data_at(index_check_const<src_const>(i));
636
985
            const StringRef delimiter_ref =
637
985
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
638
639
985
            if (str_ref.size == 0) {
640
135
                dest_offsets.push_back(dest_pos);
641
135
                continue;
642
135
            }
643
850
            if (delimiter_ref.size == 0) {
644
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
645
8
                                      string_pos, dest_pos);
646
842
            } else {
647
                if constexpr (!delimiter_const) {
648
                    search.set_pattern(&delimiter_ref);
649
                }
650
51.5k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
651
50.7k
                    const size_t str_offset = str_pos;
652
50.7k
                    const size_t old_size = column_string_chars.size();
653
                    // search first match delimter_ref index from src string among str_offset to end
654
50.7k
                    const char* result_start =
655
50.7k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
656
                    // compute split part size
657
50.7k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
658
                    // save dist string split part
659
50.7k
                    if (split_part_size > 0) {
660
50.1k
                        const size_t new_size = old_size + split_part_size;
661
50.1k
                        column_string_chars.resize(new_size);
662
50.1k
                        memcpy_small_allow_read_write_overflow15(
663
50.1k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
664
50.1k
                                split_part_size);
665
                        // add dist string offset
666
50.1k
                        string_pos += split_part_size;
667
50.1k
                    }
668
50.7k
                    column_string_offsets.push_back(string_pos);
669
                    // array offset + 1
670
50.7k
                    dest_pos++;
671
                    // add src string str_pos to next search start
672
50.7k
                    str_pos += split_part_size + delimiter_ref.size;
673
50.7k
                }
674
842
            }
675
850
            dest_offsets.push_back(dest_pos);
676
850
        }
677
103
    }
_ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
616
8
                  size_t size) const {
617
8
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
618
8
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
619
8
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
620
8
        column_string_chars.reserve(0);
621
622
8
        ColumnArray::Offset64 string_pos = 0;
623
8
        ColumnArray::Offset64 dest_pos = 0;
624
625
8
        StringSearch search;
626
8
        StringRef delimiter_ref_for_search;
627
628
        if constexpr (delimiter_const) {
629
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
630
            search.set_pattern(&delimiter_ref_for_search);
631
        }
632
633
32
        for (size_t i = 0; i < size; i++) {
634
24
            const StringRef str_ref =
635
24
                    src_column_string.get_data_at(index_check_const<src_const>(i));
636
24
            const StringRef delimiter_ref =
637
24
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
638
639
24
            if (str_ref.size == 0) {
640
16
                dest_offsets.push_back(dest_pos);
641
16
                continue;
642
16
            }
643
8
            if (delimiter_ref.size == 0) {
644
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
645
8
                                      string_pos, dest_pos);
646
8
            } else {
647
0
                if constexpr (!delimiter_const) {
648
0
                    search.set_pattern(&delimiter_ref);
649
0
                }
650
0
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
651
0
                    const size_t str_offset = str_pos;
652
0
                    const size_t old_size = column_string_chars.size();
653
                    // search first match delimter_ref index from src string among str_offset to end
654
0
                    const char* result_start =
655
0
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
656
                    // compute split part size
657
0
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
658
                    // save dist string split part
659
0
                    if (split_part_size > 0) {
660
0
                        const size_t new_size = old_size + split_part_size;
661
0
                        column_string_chars.resize(new_size);
662
0
                        memcpy_small_allow_read_write_overflow15(
663
0
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
664
0
                                split_part_size);
665
                        // add dist string offset
666
0
                        string_pos += split_part_size;
667
0
                    }
668
0
                    column_string_offsets.push_back(string_pos);
669
                    // array offset + 1
670
0
                    dest_pos++;
671
                    // add src string str_pos to next search start
672
0
                    str_pos += split_part_size + delimiter_ref.size;
673
0
                }
674
0
            }
675
8
            dest_offsets.push_back(dest_pos);
676
8
        }
677
8
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
678
679
    void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars,
680
                               ColumnString::Offsets& column_string_offsets,
681
                               ColumnArray::Offset64& string_pos,
682
27
                               ColumnArray::Offset64& dest_pos) const {
683
27
        const size_t old_size = column_string_chars.size();
684
27
        const size_t new_size = old_size + str_ref.size;
685
27
        column_string_chars.resize(new_size);
686
27
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
687
27
        if (simd::VStringFunctions::is_ascii(str_ref)) {
688
24
            const auto size = str_ref.size;
689
690
24
            const auto nested_old_size = column_string_offsets.size();
691
24
            const auto nested_new_size = nested_old_size + size;
692
24
            column_string_offsets.resize(nested_new_size);
693
24
            std::iota(column_string_offsets.data() + nested_old_size,
694
24
                      column_string_offsets.data() + nested_new_size, string_pos + 1);
695
696
24
            string_pos += size;
697
24
            dest_pos += size;
698
            // The above code is equivalent to the code in the following comment.
699
            // for (size_t i = 0; i < str_ref.size; i++) {
700
            //     string_pos++;
701
            //     column_string_offsets.push_back(string_pos);
702
            //     (*dest_nested_null_map).push_back(false);
703
            //     dest_pos++;
704
            // }
705
24
        } else {
706
22
            for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
707
19
                utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
708
709
19
                string_pos += utf8_char_len;
710
19
                column_string_offsets.push_back(string_pos);
711
19
                dest_pos++;
712
19
            }
713
3
        }
714
27
    }
715
};
716
717
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
718
719
template <FunctionCountSubStringType type>
720
class FunctionCountSubString : public IFunction {
721
public:
722
    static constexpr auto name = "count_substrings";
723
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
724
725
283
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
725
77
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
725
206
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
726
    using NullMapType = PaddedPODArray<UInt8>;
727
728
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
729
730
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
731
732
265
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
733
265
        return std::make_shared<DataTypeInt32>();
734
265
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
732
68
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
733
68
        return std::make_shared<DataTypeInt32>();
734
68
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
732
197
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
733
197
        return std::make_shared<DataTypeInt32>();
734
197
    }
735
736
16
    DataTypes get_variadic_argument_types_impl() const override {
737
16
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
738
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
739
8
        } else {
740
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
741
8
                    std::make_shared<DataTypeInt32>()};
742
8
        }
743
16
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
736
8
    DataTypes get_variadic_argument_types_impl() const override {
737
8
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
738
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
739
        } else {
740
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
741
                    std::make_shared<DataTypeInt32>()};
742
        }
743
8
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
736
8
    DataTypes get_variadic_argument_types_impl() const override {
737
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
738
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
739
8
        } else {
740
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
741
8
                    std::make_shared<DataTypeInt32>()};
742
8
        }
743
8
    }
744
745
267
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
745
69
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
745
198
    bool is_variadic() const override { return true; }
746
747
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
748
234
                        uint32_t result, size_t input_rows_count) const override {
749
234
        DCHECK(arg_count);
750
234
        bool col_const[arg_count];
751
234
        ColumnPtr argument_columns[arg_count];
752
878
        for (int i = 0; i < arg_count; ++i) {
753
644
            std::tie(argument_columns[i], col_const[i]) =
754
644
                    unpack_if_const(block.get_by_position(arguments[i]).column);
755
644
        }
756
757
234
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
758
234
        auto& dest_column_data = dest_column_ptr->get_data();
759
760
234
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
761
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
762
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
763
58
            std::visit(
764
58
                    [&](auto str_const, auto pattern_const) {
765
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
766
58
                                                           dest_column_data, input_rows_count);
767
58
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
764
32
                    [&](auto str_const, auto pattern_const) {
765
32
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
766
32
                                                           dest_column_data, input_rows_count);
767
32
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
764
13
                    [&](auto str_const, auto pattern_const) {
765
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
766
13
                                                           dest_column_data, input_rows_count);
767
13
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
764
13
                    [&](auto str_const, auto pattern_const) {
765
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
766
13
                                                           dest_column_data, input_rows_count);
767
13
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
768
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
769
176
        } else {
770
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
771
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
772
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
773
176
            std::visit(
774
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
176
                        _execute<str_const, pattern_const, start_pos_const>(
776
176
                                src_column_string, pattern_column, start_pos_column,
777
176
                                dest_column_data, input_rows_count);
778
176
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
774
36
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
36
                        _execute<str_const, pattern_const, start_pos_const>(
776
36
                                src_column_string, pattern_column, start_pos_column,
777
36
                                dest_column_data, input_rows_count);
778
36
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
774
29
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
29
                        _execute<str_const, pattern_const, start_pos_const>(
776
29
                                src_column_string, pattern_column, start_pos_column,
777
29
                                dest_column_data, input_rows_count);
778
29
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
774
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
22
                        _execute<str_const, pattern_const, start_pos_const>(
776
22
                                src_column_string, pattern_column, start_pos_column,
777
22
                                dest_column_data, input_rows_count);
778
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
774
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
23
                        _execute<str_const, pattern_const, start_pos_const>(
776
23
                                src_column_string, pattern_column, start_pos_column,
777
23
                                dest_column_data, input_rows_count);
778
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
774
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
22
                        _execute<str_const, pattern_const, start_pos_const>(
776
22
                                src_column_string, pattern_column, start_pos_column,
777
22
                                dest_column_data, input_rows_count);
778
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
774
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
22
                        _execute<str_const, pattern_const, start_pos_const>(
776
22
                                src_column_string, pattern_column, start_pos_column,
777
22
                                dest_column_data, input_rows_count);
778
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
774
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
22
                        _execute<str_const, pattern_const, start_pos_const>(
776
22
                                src_column_string, pattern_column, start_pos_column,
777
22
                                dest_column_data, input_rows_count);
778
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
779
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
780
176
                    make_bool_variant(col_const[2]));
781
176
        }
782
783
234
        block.replace_by_position(result, std::move(dest_column_ptr));
784
234
        return Status::OK();
785
234
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
748
58
                        uint32_t result, size_t input_rows_count) const override {
749
58
        DCHECK(arg_count);
750
58
        bool col_const[arg_count];
751
58
        ColumnPtr argument_columns[arg_count];
752
174
        for (int i = 0; i < arg_count; ++i) {
753
116
            std::tie(argument_columns[i], col_const[i]) =
754
116
                    unpack_if_const(block.get_by_position(arguments[i]).column);
755
116
        }
756
757
58
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
758
58
        auto& dest_column_data = dest_column_ptr->get_data();
759
760
58
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
761
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
762
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
763
58
            std::visit(
764
58
                    [&](auto str_const, auto pattern_const) {
765
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
766
58
                                                           dest_column_data, input_rows_count);
767
58
                    },
768
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
769
        } else {
770
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
771
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
772
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
773
            std::visit(
774
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
                        _execute<str_const, pattern_const, start_pos_const>(
776
                                src_column_string, pattern_column, start_pos_column,
777
                                dest_column_data, input_rows_count);
778
                    },
779
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
780
                    make_bool_variant(col_const[2]));
781
        }
782
783
58
        block.replace_by_position(result, std::move(dest_column_ptr));
784
58
        return Status::OK();
785
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
748
176
                        uint32_t result, size_t input_rows_count) const override {
749
176
        DCHECK(arg_count);
750
176
        bool col_const[arg_count];
751
176
        ColumnPtr argument_columns[arg_count];
752
704
        for (int i = 0; i < arg_count; ++i) {
753
528
            std::tie(argument_columns[i], col_const[i]) =
754
528
                    unpack_if_const(block.get_by_position(arguments[i]).column);
755
528
        }
756
757
176
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
758
176
        auto& dest_column_data = dest_column_ptr->get_data();
759
760
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
761
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
762
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
763
            std::visit(
764
                    [&](auto str_const, auto pattern_const) {
765
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
766
                                                           dest_column_data, input_rows_count);
767
                    },
768
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
769
176
        } else {
770
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
771
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
772
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
773
176
            std::visit(
774
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
775
176
                        _execute<str_const, pattern_const, start_pos_const>(
776
176
                                src_column_string, pattern_column, start_pos_column,
777
176
                                dest_column_data, input_rows_count);
778
176
                    },
779
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
780
176
                    make_bool_variant(col_const[2]));
781
176
        }
782
783
176
        block.replace_by_position(result, std::move(dest_column_ptr));
784
176
        return Status::OK();
785
176
    }
786
787
private:
788
    template <bool src_const, bool pattern_const>
789
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
790
58
                  ColumnInt32::Container& dest_column_data, size_t size) const {
791
241
        for (size_t i = 0; i < size; i++) {
792
183
            const StringRef str_ref =
793
183
                    src_column_string.get_data_at(index_check_const<src_const>(i));
794
795
183
            const StringRef pattern_ref =
796
183
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
797
183
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
798
183
        }
799
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
790
32
                  ColumnInt32::Container& dest_column_data, size_t size) const {
791
133
        for (size_t i = 0; i < size; i++) {
792
101
            const StringRef str_ref =
793
101
                    src_column_string.get_data_at(index_check_const<src_const>(i));
794
795
101
            const StringRef pattern_ref =
796
101
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
797
101
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
798
101
        }
799
32
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
790
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
791
54
        for (size_t i = 0; i < size; i++) {
792
41
            const StringRef str_ref =
793
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
794
795
41
            const StringRef pattern_ref =
796
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
797
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
798
41
        }
799
13
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
790
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
791
54
        for (size_t i = 0; i < size; i++) {
792
41
            const StringRef str_ref =
793
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
794
795
41
            const StringRef pattern_ref =
796
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
797
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
798
41
        }
799
13
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
800
801
    template <bool src_const, bool pattern_const, bool start_pos_const>
802
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
803
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
804
176
                  size_t size) const {
805
411
        for (size_t i = 0; i < size; i++) {
806
235
            const StringRef str_ref =
807
235
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
235
            const StringRef pattern_ref =
809
235
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
235
            int32_t start_pos =
812
235
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
235
            const char* p = str_ref.begin();
815
235
            const char* end = str_ref.end();
816
235
            int char_size = 0;
817
1.47k
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
1.24k
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
1.24k
            }
820
235
            const auto start_byte_len = p - str_ref.begin();
821
822
235
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
134
                dest_column_data[i] = 0;
824
134
            } else {
825
101
                dest_column_data[i] =
826
101
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
101
            }
828
235
        }
829
176
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
36
                  size_t size) const {
805
97
        for (size_t i = 0; i < size; i++) {
806
61
            const StringRef str_ref =
807
61
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
61
            const StringRef pattern_ref =
809
61
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
61
            int32_t start_pos =
812
61
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
61
            const char* p = str_ref.begin();
815
61
            const char* end = str_ref.end();
816
61
            int char_size = 0;
817
456
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
395
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
395
            }
820
61
            const auto start_byte_len = p - str_ref.begin();
821
822
61
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
38
                dest_column_data[i] = 0;
824
38
            } else {
825
23
                dest_column_data[i] =
826
23
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
23
            }
828
61
        }
829
36
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
29
                  size_t size) const {
805
78
        for (size_t i = 0; i < size; i++) {
806
49
            const StringRef str_ref =
807
49
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
49
            const StringRef pattern_ref =
809
49
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
49
            int32_t start_pos =
812
49
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
49
            const char* p = str_ref.begin();
815
49
            const char* end = str_ref.end();
816
49
            int char_size = 0;
817
242
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
193
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
193
            }
820
49
            const auto start_byte_len = p - str_ref.begin();
821
822
49
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
22
                dest_column_data[i] = 0;
824
27
            } else {
825
27
                dest_column_data[i] =
826
27
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
27
            }
828
49
        }
829
29
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
22
                  size_t size) const {
805
44
        for (size_t i = 0; i < size; i++) {
806
22
            const StringRef str_ref =
807
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
22
            const StringRef pattern_ref =
809
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
22
            int32_t start_pos =
812
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
22
            const char* p = str_ref.begin();
815
22
            const char* end = str_ref.end();
816
22
            int char_size = 0;
817
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
129
            }
820
22
            const auto start_byte_len = p - str_ref.begin();
821
822
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
14
                dest_column_data[i] = 0;
824
14
            } else {
825
8
                dest_column_data[i] =
826
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
8
            }
828
22
        }
829
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
23
                  size_t size) const {
805
60
        for (size_t i = 0; i < size; i++) {
806
37
            const StringRef str_ref =
807
37
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
37
            const StringRef pattern_ref =
809
37
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
37
            int32_t start_pos =
812
37
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
37
            const char* p = str_ref.begin();
815
37
            const char* end = str_ref.end();
816
37
            int char_size = 0;
817
177
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
140
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
140
            }
820
37
            const auto start_byte_len = p - str_ref.begin();
821
822
37
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
18
                dest_column_data[i] = 0;
824
19
            } else {
825
19
                dest_column_data[i] =
826
19
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
19
            }
828
37
        }
829
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
22
                  size_t size) const {
805
44
        for (size_t i = 0; i < size; i++) {
806
22
            const StringRef str_ref =
807
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
22
            const StringRef pattern_ref =
809
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
22
            int32_t start_pos =
812
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
22
            const char* p = str_ref.begin();
815
22
            const char* end = str_ref.end();
816
22
            int char_size = 0;
817
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
129
            }
820
22
            const auto start_byte_len = p - str_ref.begin();
821
822
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
14
                dest_column_data[i] = 0;
824
14
            } else {
825
8
                dest_column_data[i] =
826
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
8
            }
828
22
        }
829
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
22
                  size_t size) const {
805
44
        for (size_t i = 0; i < size; i++) {
806
22
            const StringRef str_ref =
807
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
22
            const StringRef pattern_ref =
809
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
22
            int32_t start_pos =
812
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
22
            const char* p = str_ref.begin();
815
22
            const char* end = str_ref.end();
816
22
            int char_size = 0;
817
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
129
            }
820
22
            const auto start_byte_len = p - str_ref.begin();
821
822
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
14
                dest_column_data[i] = 0;
824
14
            } else {
825
8
                dest_column_data[i] =
826
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
8
            }
828
22
        }
829
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
804
22
                  size_t size) const {
805
44
        for (size_t i = 0; i < size; i++) {
806
22
            const StringRef str_ref =
807
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
808
22
            const StringRef pattern_ref =
809
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
810
            // 1-based index
811
22
            int32_t start_pos =
812
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
813
814
22
            const char* p = str_ref.begin();
815
22
            const char* end = str_ref.end();
816
22
            int char_size = 0;
817
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
818
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
819
129
            }
820
22
            const auto start_byte_len = p - str_ref.begin();
821
822
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
823
14
                dest_column_data[i] = 0;
824
14
            } else {
825
8
                dest_column_data[i] =
826
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
827
8
            }
828
22
        }
829
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
830
831
529
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
832
529
        size_t old_size = pos;
833
529
        size_t str_size = str_ref.size;
834
2.20k
        while (pos < str_size &&
835
2.20k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
836
1.98k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
837
1.67k
            pos++;
838
1.67k
        }
839
529
        return pos - old_size;
840
529
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
831
291
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
832
291
        size_t old_size = pos;
833
291
        size_t str_size = str_ref.size;
834
1.05k
        while (pos < str_size &&
835
1.05k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
836
933
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
837
763
            pos++;
838
763
        }
839
291
        return pos - old_size;
840
291
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
831
238
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
832
238
        size_t old_size = pos;
833
238
        size_t str_size = str_ref.size;
834
1.15k
        while (pos < str_size &&
835
1.15k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
836
1.05k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
837
914
            pos++;
838
914
        }
839
238
        return pos - old_size;
840
238
    }
841
842
284
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
843
284
        int count = 0;
844
284
        if (str_ref.size == 0 || pattern_ref.size == 0) {
845
64
            return 0;
846
220
        } else {
847
529
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
848
529
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
849
529
                if (res_pos == (str_ref.size - str_pos)) {
850
220
                    break; // not find
851
220
                }
852
309
                count++;
853
309
                str_pos = str_pos + res_pos + pattern_ref.size;
854
309
            }
855
220
        }
856
220
        return count;
857
284
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
842
183
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
843
183
        int count = 0;
844
183
        if (str_ref.size == 0 || pattern_ref.size == 0) {
845
62
            return 0;
846
121
        } else {
847
291
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
848
291
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
849
291
                if (res_pos == (str_ref.size - str_pos)) {
850
121
                    break; // not find
851
121
                }
852
170
                count++;
853
170
                str_pos = str_pos + res_pos + pattern_ref.size;
854
170
            }
855
121
        }
856
121
        return count;
857
183
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
842
101
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
843
101
        int count = 0;
844
101
        if (str_ref.size == 0 || pattern_ref.size == 0) {
845
2
            return 0;
846
99
        } else {
847
238
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
848
238
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
849
238
                if (res_pos == (str_ref.size - str_pos)) {
850
99
                    break; // not find
851
99
                }
852
139
                count++;
853
139
                str_pos = str_pos + res_pos + pattern_ref.size;
854
139
            }
855
99
        }
856
99
        return count;
857
101
    }
858
};
859
860
8
void register_function_string_search(SimpleFunctionFactory& factory) {
861
8
    factory.register_function<FunctionStringLocatePos>();
862
8
    factory.register_function<FunctionSplitPart>();
863
8
    factory.register_function<FunctionSplitByString>();
864
8
    factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>();
865
8
    factory.register_function<
866
8
            FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>();
867
8
    factory.register_function<FunctionSubstringIndex>();
868
869
8
    factory.register_alias(FunctionStringLocatePos::name, "position");
870
8
}
871
872
#include "common/compile_check_avoid_end.h"
873
} // namespace doris