Coverage Report

Created: 2026-06-05 03:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_search.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <algorithm>
19
#include <cstddef>
20
#include <cstring>
21
#include <numeric>
22
#include <string>
23
#include <string_view>
24
#include <vector>
25
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_numbers.h"
30
#include "core/column/column_array.h"
31
#include "core/column/column_const.h"
32
#include "core/column/column_nullable.h"
33
#include "core/column/column_string.h"
34
#include "core/column/column_vector.h"
35
#include "core/data_type/data_type_array.h"
36
#include "core/data_type/data_type_nullable.h"
37
#include "core/data_type/data_type_number.h"
38
#include "core/data_type/data_type_string.h"
39
#include "core/data_type/define_primitive_type.h"
40
#include "core/memcmp_small.h"
41
#include "core/memcpy_small.h"
42
#include "core/pod_array_fwd.h"
43
#include "core/string_ref.h"
44
#include "exec/common/string_searcher.h"
45
#include "exec/common/stringop_substring.h"
46
#include "exec/common/template_helpers.hpp"
47
#include "exec/common/util.hpp"
48
#include "exprs/function/function.h"
49
#include "exprs/function/function_helpers.h"
50
#include "exprs/function/simple_function_factory.h"
51
#include "exprs/function_context.h"
52
#include "util/simd/vstring_function.h"
53
#include "util/string_search.hpp"
54
55
namespace doris {
56
#include "common/compile_check_avoid_begin.h"
57
58
class FunctionStringLocatePos : public IFunction {
59
public:
60
    static constexpr auto name = "locate";
61
868
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
62
0
    String get_name() const override { return name; }
63
0
    size_t get_number_of_arguments() const override { return 3; }
64
65
862
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
66
862
        return std::make_shared<DataTypeInt32>();
67
862
    }
68
69
5
    DataTypes get_variadic_argument_types_impl() const override {
70
5
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
71
5
                std::make_shared<DataTypeInt32>()};
72
5
    }
73
74
863
    bool is_variadic() const override { return true; }
75
76
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
77
648
                        uint32_t result, size_t input_rows_count) const override {
78
648
        if (arguments.size() != 3) {
79
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
80
0
                                           get_name(), arguments.size());
81
0
        }
82
648
        bool col_const[3];
83
648
        ColumnPtr argument_columns[3];
84
2.59k
        for (int i = 0; i < 3; ++i) {
85
1.94k
            std::tie(argument_columns[i], col_const[i]) =
86
1.94k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
87
1.94k
        }
88
89
648
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
90
648
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
91
648
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
92
93
648
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
94
648
        auto& vec_res = col_res->get_data();
95
648
        vec_res.resize(block.rows());
96
97
648
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
98
99
648
        if (col_const[0]) {
100
248
            std::visit(
101
248
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
248
                        scalar_search<is_ascii, str_const, pos_const>(
103
248
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
248
                                input_rows_count);
105
248
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
101
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
22
                        scalar_search<is_ascii, str_const, pos_const>(
103
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
22
                                input_rows_count);
105
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
101
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
22
                        scalar_search<is_ascii, str_const, pos_const>(
103
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
22
                                input_rows_count);
105
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
101
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
22
                        scalar_search<is_ascii, str_const, pos_const>(
103
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
22
                                input_rows_count);
105
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
101
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
60
                        scalar_search<is_ascii, str_const, pos_const>(
103
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
60
                                input_rows_count);
105
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
101
62
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
62
                        scalar_search<is_ascii, str_const, pos_const>(
103
62
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
62
                                input_rows_count);
105
62
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
101
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
102
60
                        scalar_search<is_ascii, str_const, pos_const>(
103
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
104
60
                                input_rows_count);
105
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
106
248
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
107
248
                    make_bool_variant(col_const[2]));
108
109
400
        } else {
110
400
            std::visit(
111
401
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
401
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
401
                                                                      col_pos->get_data(), vec_res,
114
401
                                                                      input_rows_count);
115
401
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
111
39
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
39
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
39
                                                                      col_pos->get_data(), vec_res,
114
39
                                                                      input_rows_count);
115
39
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
111
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
22
                                                                      col_pos->get_data(), vec_res,
114
22
                                                                      input_rows_count);
115
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
111
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
22
                                                                      col_pos->get_data(), vec_res,
114
22
                                                                      input_rows_count);
115
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
111
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
22
                                                                      col_pos->get_data(), vec_res,
114
22
                                                                      input_rows_count);
115
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
111
116
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
116
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
116
                                                                      col_pos->get_data(), vec_res,
114
116
                                                                      input_rows_count);
115
116
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
111
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
60
                                                                      col_pos->get_data(), vec_res,
114
60
                                                                      input_rows_count);
115
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
111
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
60
                                                                      col_pos->get_data(), vec_res,
114
60
                                                                      input_rows_count);
115
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
111
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
112
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
113
60
                                                                      col_pos->get_data(), vec_res,
114
60
                                                                      input_rows_count);
115
60
                    },
116
400
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
117
400
                    make_bool_variant(col_const[2]));
118
400
        }
119
648
        block.replace_by_position(result, std::move(col_res));
120
648
        return Status::OK();
121
648
    }
122
123
private:
124
    template <bool is_ascii, bool str_const, bool pos_const>
125
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
126
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
127
248
                       size_t size) const {
128
248
        res.resize(size);
129
248
        StringRef substr(ldata.data, ldata.size);
130
248
        StringSearch search {&substr};
131
132
499
        for (int i = 0; i < size; ++i) {
133
251
            res[i] = locate_pos<is_ascii>(substr,
134
251
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
251
                                          search, posdata[index_check_const<pos_const>(i)]);
136
251
        }
137
248
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
127
22
                       size_t size) const {
128
22
        res.resize(size);
129
22
        StringRef substr(ldata.data, ldata.size);
130
22
        StringSearch search {&substr};
131
132
44
        for (int i = 0; i < size; ++i) {
133
22
            res[i] = locate_pos<is_ascii>(substr,
134
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
22
                                          search, posdata[index_check_const<pos_const>(i)]);
136
22
        }
137
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
127
22
                       size_t size) const {
128
22
        res.resize(size);
129
22
        StringRef substr(ldata.data, ldata.size);
130
22
        StringSearch search {&substr};
131
132
44
        for (int i = 0; i < size; ++i) {
133
22
            res[i] = locate_pos<is_ascii>(substr,
134
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
22
                                          search, posdata[index_check_const<pos_const>(i)]);
136
22
        }
137
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
127
22
                       size_t size) const {
128
22
        res.resize(size);
129
22
        StringRef substr(ldata.data, ldata.size);
130
22
        StringSearch search {&substr};
131
132
44
        for (int i = 0; i < size; ++i) {
133
22
            res[i] = locate_pos<is_ascii>(substr,
134
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
22
                                          search, posdata[index_check_const<pos_const>(i)]);
136
22
        }
137
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
127
60
                       size_t size) const {
128
60
        res.resize(size);
129
60
        StringRef substr(ldata.data, ldata.size);
130
60
        StringSearch search {&substr};
131
132
120
        for (int i = 0; i < size; ++i) {
133
60
            res[i] = locate_pos<is_ascii>(substr,
134
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
60
                                          search, posdata[index_check_const<pos_const>(i)]);
136
60
        }
137
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
127
62
                       size_t size) const {
128
62
        res.resize(size);
129
62
        StringRef substr(ldata.data, ldata.size);
130
62
        StringSearch search {&substr};
131
132
127
        for (int i = 0; i < size; ++i) {
133
65
            res[i] = locate_pos<is_ascii>(substr,
134
65
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
65
                                          search, posdata[index_check_const<pos_const>(i)]);
136
65
        }
137
62
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
127
60
                       size_t size) const {
128
60
        res.resize(size);
129
60
        StringRef substr(ldata.data, ldata.size);
130
60
        StringSearch search {&substr};
131
132
120
        for (int i = 0; i < size; ++i) {
133
60
            res[i] = locate_pos<is_ascii>(substr,
134
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
135
60
                                          search, posdata[index_check_const<pos_const>(i)]);
136
60
        }
137
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
138
139
    template <bool is_ascii, bool str_const, bool pos_const>
140
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
141
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
142
401
                       size_t size) const {
143
401
        res.resize(size);
144
401
        StringSearch search;
145
984
        for (int i = 0; i < size; ++i) {
146
583
            StringRef substr = col_left->get_data_at(i);
147
583
            search.set_pattern(&substr);
148
583
            res[i] = locate_pos<is_ascii>(substr,
149
583
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
583
                                          search, posdata[index_check_const<pos_const>(i)]);
151
583
        }
152
401
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
39
                       size_t size) const {
143
39
        res.resize(size);
144
39
        StringSearch search;
145
103
        for (int i = 0; i < size; ++i) {
146
64
            StringRef substr = col_left->get_data_at(i);
147
64
            search.set_pattern(&substr);
148
64
            res[i] = locate_pos<is_ascii>(substr,
149
64
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
64
                                          search, posdata[index_check_const<pos_const>(i)]);
151
64
        }
152
39
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
22
                       size_t size) const {
143
22
        res.resize(size);
144
22
        StringSearch search;
145
44
        for (int i = 0; i < size; ++i) {
146
22
            StringRef substr = col_left->get_data_at(i);
147
22
            search.set_pattern(&substr);
148
22
            res[i] = locate_pos<is_ascii>(substr,
149
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
22
                                          search, posdata[index_check_const<pos_const>(i)]);
151
22
        }
152
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
22
                       size_t size) const {
143
22
        res.resize(size);
144
22
        StringSearch search;
145
44
        for (int i = 0; i < size; ++i) {
146
22
            StringRef substr = col_left->get_data_at(i);
147
22
            search.set_pattern(&substr);
148
22
            res[i] = locate_pos<is_ascii>(substr,
149
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
22
                                          search, posdata[index_check_const<pos_const>(i)]);
151
22
        }
152
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
22
                       size_t size) const {
143
22
        res.resize(size);
144
22
        StringSearch search;
145
44
        for (int i = 0; i < size; ++i) {
146
22
            StringRef substr = col_left->get_data_at(i);
147
22
            search.set_pattern(&substr);
148
22
            res[i] = locate_pos<is_ascii>(substr,
149
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
22
                                          search, posdata[index_check_const<pos_const>(i)]);
151
22
        }
152
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
116
                       size_t size) const {
143
116
        res.resize(size);
144
116
        StringSearch search;
145
389
        for (int i = 0; i < size; ++i) {
146
273
            StringRef substr = col_left->get_data_at(i);
147
273
            search.set_pattern(&substr);
148
273
            res[i] = locate_pos<is_ascii>(substr,
149
273
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
273
                                          search, posdata[index_check_const<pos_const>(i)]);
151
273
        }
152
116
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
60
                       size_t size) const {
143
60
        res.resize(size);
144
60
        StringSearch search;
145
120
        for (int i = 0; i < size; ++i) {
146
60
            StringRef substr = col_left->get_data_at(i);
147
60
            search.set_pattern(&substr);
148
60
            res[i] = locate_pos<is_ascii>(substr,
149
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
60
                                          search, posdata[index_check_const<pos_const>(i)]);
151
60
        }
152
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
60
                       size_t size) const {
143
60
        res.resize(size);
144
60
        StringSearch search;
145
120
        for (int i = 0; i < size; ++i) {
146
60
            StringRef substr = col_left->get_data_at(i);
147
60
            search.set_pattern(&substr);
148
60
            res[i] = locate_pos<is_ascii>(substr,
149
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
60
                                          search, posdata[index_check_const<pos_const>(i)]);
151
60
        }
152
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
142
60
                       size_t size) const {
143
60
        res.resize(size);
144
60
        StringSearch search;
145
120
        for (int i = 0; i < size; ++i) {
146
60
            StringRef substr = col_left->get_data_at(i);
147
60
            search.set_pattern(&substr);
148
60
            res[i] = locate_pos<is_ascii>(substr,
149
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
150
60
                                          search, posdata[index_check_const<pos_const>(i)]);
151
60
        }
152
60
    }
153
154
    template <bool is_ascii>
155
834
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
156
834
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
157
            // BEHAVIOR COMPATIBLE WITH MYSQL
158
            // locate('','')  locate('','',1) locate('','',2)
159
            // 1  1 0
160
13
            return 1;
161
13
        }
162
821
        if (is_ascii) {
163
625
            return locate_pos_ascii(substr, str, search, start_pos);
164
625
        } else {
165
196
            return locate_pos_utf8(substr, str, search, start_pos);
166
196
        }
167
821
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
155
196
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
156
196
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
157
            // BEHAVIOR COMPATIBLE WITH MYSQL
158
            // locate('','')  locate('','',1) locate('','',2)
159
            // 1  1 0
160
0
            return 1;
161
0
        }
162
196
        if (is_ascii) {
163
0
            return locate_pos_ascii(substr, str, search, start_pos);
164
196
        } else {
165
196
            return locate_pos_utf8(substr, str, search, start_pos);
166
196
        }
167
196
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
155
638
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
156
638
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
157
            // BEHAVIOR COMPATIBLE WITH MYSQL
158
            // locate('','')  locate('','',1) locate('','',2)
159
            // 1  1 0
160
13
            return 1;
161
13
        }
162
625
        if (is_ascii) {
163
625
            return locate_pos_ascii(substr, str, search, start_pos);
164
625
        } else {
165
0
            return locate_pos_utf8(substr, str, search, start_pos);
166
0
        }
167
625
    }
168
169
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
170
196
                        int start_pos) const {
171
196
        std::vector<size_t> index;
172
196
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
173
196
        if (start_pos <= 0 || start_pos > char_len) {
174
49
            return 0;
175
49
        }
176
147
        if (substr.size == 0) {
177
18
            return start_pos;
178
18
        }
179
        // Input start_pos starts from 1.
180
129
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
181
129
        int32_t match_pos = search.search(&adjusted_str);
182
129
        if (match_pos >= 0) {
183
            // Hive returns the position in the original string starting from 1.
184
111
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
185
111
        } else {
186
18
            return 0;
187
18
        }
188
129
    }
189
190
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
191
625
                         int start_pos) const {
192
625
        if (start_pos <= 0 || start_pos > str.size) {
193
412
            return 0;
194
412
        }
195
213
        if (substr.size == 0) {
196
38
            return start_pos;
197
38
        }
198
        // Input start_pos starts from 1.
199
175
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
200
175
        int32_t match_pos = search.search(&adjusted_str);
201
175
        if (match_pos >= 0) {
202
            // Hive returns the position in the original string starting from 1.
203
60
            return start_pos + match_pos;
204
115
        } else {
205
115
            return 0;
206
115
        }
207
175
    }
208
};
209
210
class FunctionSplitPart : public IFunction {
211
public:
212
    static constexpr auto name = "split_part";
213
142
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
214
1
    String get_name() const override { return name; }
215
136
    size_t get_number_of_arguments() const override { return 3; }
216
217
136
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
218
136
        return make_nullable(std::make_shared<DataTypeString>());
219
136
    }
220
221
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
222
153
                        uint32_t result, size_t input_rows_count) const override {
223
153
        DCHECK_EQ(arguments.size(), 3);
224
225
153
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
226
        // Create a zero column to simply implement
227
153
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
228
153
        auto res = ColumnString::create();
229
230
153
        auto& null_map_data = null_map->get_data();
231
153
        auto& res_offsets = res->get_offsets();
232
153
        auto& res_chars = res->get_chars();
233
153
        res_offsets.resize(input_rows_count);
234
235
153
        const size_t argument_size = arguments.size();
236
153
        std::vector<ColumnPtr> argument_columns(argument_size);
237
612
        for (size_t i = 0; i < argument_size; ++i) {
238
459
            argument_columns[i] =
239
459
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
240
459
            if (const auto* nullable =
241
459
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
242
                // Danger: Here must dispose the null map data first! Because
243
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
244
                // of column nullable mem of null map
245
0
                VectorizedUtils::update_null_map(null_map->get_data(),
246
0
                                                 nullable->get_null_map_data());
247
0
                argument_columns[i] = nullable->get_nested_column_ptr();
248
0
            }
249
459
        }
250
251
153
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
252
253
153
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
254
255
153
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
256
153
        const auto& part_num_col_data = part_num_col->get_data();
257
258
400
        for (size_t i = 0; i < input_rows_count; ++i) {
259
247
            if (part_num_col_data[i] == 0) {
260
11
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
261
11
                continue;
262
11
            }
263
264
236
            auto delimiter = delimiter_col->get_data_at(i);
265
236
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
266
236
            auto part_number = part_num_col_data[i];
267
236
            auto str = str_col->get_data_at(i);
268
236
            if (delimiter.size == 0) {
269
9
                StringOP::push_empty_string(i, res_chars, res_offsets);
270
9
                continue;
271
9
            }
272
273
227
            if (part_number > 0) {
274
188
                if (delimiter.size == 1) {
275
                    // If delimiter is a char, use memchr to split
276
155
                    int32_t pre_offset = -1;
277
155
                    int32_t offset = -1;
278
155
                    int32_t num = 0;
279
261
                    while (num < part_number) {
280
217
                        pre_offset = offset;
281
217
                        size_t n = str.size - offset - 1;
282
217
                        const char* pos = reinterpret_cast<const char*>(
283
217
                                memchr(str.data + offset + 1, delimiter_str[0], n));
284
217
                        if (pos != nullptr) {
285
106
                            offset = pos - str.data;
286
106
                            num++;
287
111
                        } else {
288
111
                            offset = str.size;
289
111
                            num = (num == 0) ? 0 : num + 1;
290
111
                            break;
291
111
                        }
292
217
                    }
293
294
155
                    if (num == part_number) {
295
71
                        StringOP::push_value_string(
296
71
                                std::string_view {
297
71
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
298
71
                                        (size_t)offset - pre_offset - 1},
299
71
                                i, res_chars, res_offsets);
300
84
                    } else {
301
84
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
302
84
                    }
303
155
                } else {
304
                    // If delimiter is a string, use memmem to split
305
33
                    int32_t pre_offset = -delimiter.size;
306
33
                    int32_t offset = -delimiter.size;
307
33
                    int32_t num = 0;
308
68
                    while (num < part_number) {
309
54
                        pre_offset = offset;
310
54
                        size_t n = str.size - offset - delimiter.size;
311
54
                        char* pos =
312
54
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
313
54
                                                               n, delimiter.data, delimiter.size));
314
54
                        if (pos != nullptr) {
315
35
                            offset = pos - str.data;
316
35
                            num++;
317
35
                        } else {
318
19
                            offset = str.size;
319
19
                            num = (num == 0) ? 0 : num + 1;
320
19
                            break;
321
19
                        }
322
54
                    }
323
324
33
                    if (num == part_number) {
325
27
                        StringOP::push_value_string(
326
27
                                std::string_view {reinterpret_cast<const char*>(
327
27
                                                          str.data + pre_offset + delimiter.size),
328
27
                                                  (size_t)offset - pre_offset - delimiter.size},
329
27
                                i, res_chars, res_offsets);
330
27
                    } else {
331
6
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
332
6
                    }
333
33
                }
334
188
            } else {
335
39
                part_number = -part_number;
336
39
                auto str_str = str.to_string();
337
39
                int32_t offset = str.size;
338
39
                int32_t pre_offset = offset;
339
39
                int32_t num = 0;
340
39
                auto substr = str_str;
341
83
                while (num <= part_number && offset >= 0) {
342
83
                    offset = (int)substr.rfind(delimiter, offset);
343
83
                    if (offset != -1) {
344
62
                        if (++num == part_number) {
345
18
                            break;
346
18
                        }
347
44
                        pre_offset = offset;
348
44
                        offset = offset - 1;
349
44
                        substr = str_str.substr(0, pre_offset);
350
44
                    } else {
351
21
                        break;
352
21
                    }
353
83
                }
354
39
                num = (offset == -1 && num != 0) ? num + 1 : num;
355
356
39
                if (num == part_number) {
357
24
                    if (offset == -1) {
358
6
                        StringOP::push_value_string(
359
6
                                std::string_view {reinterpret_cast<const char*>(str.data),
360
6
                                                  (size_t)pre_offset},
361
6
                                i, res_chars, res_offsets);
362
18
                    } else {
363
18
                        StringOP::push_value_string(
364
18
                                std::string_view {str_str.substr(
365
18
                                        offset + delimiter.size,
366
18
                                        (size_t)pre_offset - offset - delimiter.size)},
367
18
                                i, res_chars, res_offsets);
368
18
                    }
369
24
                } else {
370
15
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
371
15
                }
372
39
            }
373
227
        }
374
375
153
        block.get_by_position(result).column =
376
153
                ColumnNullable::create(std::move(res), std::move(null_map));
377
153
        return Status::OK();
378
153
    }
379
};
380
381
class FunctionSubstringIndex : public IFunction {
382
public:
383
    static constexpr auto name = "substring_index";
384
109
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
385
1
    String get_name() const override { return name; }
386
103
    size_t get_number_of_arguments() const override { return 3; }
387
388
103
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
389
103
        return std::make_shared<DataTypeString>();
390
103
    }
391
392
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
393
107
                        uint32_t result, size_t input_rows_count) const override {
394
107
        DCHECK_EQ(arguments.size(), 3);
395
396
        // Create a zero column to simply implement
397
107
        auto res = ColumnString::create();
398
399
107
        auto& res_offsets = res->get_offsets();
400
107
        auto& res_chars = res->get_chars();
401
107
        res_offsets.resize(input_rows_count);
402
107
        ColumnPtr content_column;
403
107
        bool content_const = false;
404
107
        std::tie(content_column, content_const) =
405
107
                unpack_if_const(block.get_by_position(arguments[0]).column);
406
407
107
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
408
409
        // Handle both constant and non-constant delimiter parameters
410
107
        ColumnPtr delimiter_column_ptr;
411
107
        bool delimiter_const = false;
412
107
        std::tie(delimiter_column_ptr, delimiter_const) =
413
107
                unpack_if_const(block.get_by_position(arguments[1]).column);
414
107
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
415
416
107
        ColumnPtr part_num_column_ptr;
417
107
        bool part_num_const = false;
418
107
        std::tie(part_num_column_ptr, part_num_const) =
419
107
                unpack_if_const(block.get_by_position(arguments[2]).column);
420
107
        const ColumnInt32* part_num_col =
421
107
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
422
423
        // For constant multi-character delimiters, create StringRef and StringSearch only once
424
107
        std::optional<StringRef> const_delimiter_ref;
425
107
        std::optional<StringSearch> const_search;
426
107
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
427
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
428
0
            const_search.emplace(&const_delimiter_ref.value());
429
0
        }
430
431
297
        for (size_t i = 0; i < input_rows_count; ++i) {
432
190
            auto str = str_col->get_data_at(content_const ? 0 : i);
433
190
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
434
190
            int32_t delimiter_size = delimiter.size;
435
436
190
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
437
438
190
            if (part_number == 0 || delimiter_size == 0) {
439
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
440
7
                continue;
441
7
            }
442
443
183
            if (part_number > 0) {
444
128
                if (delimiter_size == 1) {
445
85
                    int32_t offset = -1;
446
85
                    int32_t num = 0;
447
137
                    while (num < part_number) {
448
117
                        size_t n = str.size - offset - 1;
449
117
                        const char* pos = reinterpret_cast<const char*>(
450
117
                                memchr(str.data + offset + 1, delimiter.data[0], n));
451
117
                        if (pos != nullptr) {
452
52
                            offset = pos - str.data;
453
52
                            num++;
454
65
                        } else {
455
65
                            offset = str.size;
456
65
                            num = (num == 0) ? 0 : num + 1;
457
65
                            break;
458
65
                        }
459
117
                    }
460
461
85
                    if (num == part_number) {
462
25
                        StringOP::push_value_string(
463
25
                                std::string_view {reinterpret_cast<const char*>(str.data),
464
25
                                                  (size_t)offset},
465
25
                                i, res_chars, res_offsets);
466
60
                    } else {
467
60
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
468
60
                                                    res_chars, res_offsets);
469
60
                    }
470
85
                } else {
471
                    // For multi-character delimiters
472
                    // Use pre-created StringRef and StringSearch for constant delimiters
473
43
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
474
43
                                                                  : StringRef(delimiter);
475
43
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
476
43
                    StringSearch local_search(&delimiter_ref);
477
43
                    if (!search_ptr) {
478
43
                        search_ptr = &local_search;
479
43
                    }
480
481
43
                    int32_t offset = -delimiter_size;
482
43
                    int32_t num = 0;
483
86
                    while (num < part_number) {
484
59
                        size_t n = str.size - offset - delimiter_size;
485
                        // search first match delimter_ref index from src string among str_offset to end
486
59
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
487
59
                        if (pos < str.data + str.size) {
488
43
                            offset = pos - str.data;
489
43
                            num++;
490
43
                        } else {
491
16
                            offset = str.size;
492
16
                            num = (num == 0) ? 0 : num + 1;
493
16
                            break;
494
16
                        }
495
59
                    }
496
497
43
                    if (num == part_number) {
498
40
                        StringOP::push_value_string(
499
40
                                std::string_view {reinterpret_cast<const char*>(str.data),
500
40
                                                  (size_t)offset},
501
40
                                i, res_chars, res_offsets);
502
40
                    } else {
503
3
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
504
3
                                                    res_chars, res_offsets);
505
3
                    }
506
43
                }
507
128
            } else {
508
55
                int neg_part_number = -part_number;
509
55
                auto str_str = str.to_string();
510
55
                int32_t offset = str.size;
511
55
                int32_t pre_offset = offset;
512
55
                int32_t num = 0;
513
55
                auto substr = str_str;
514
515
                // Use pre-created StringRef for constant delimiters
516
55
                StringRef delimiter_str =
517
55
                        const_delimiter_ref
518
55
                                ? const_delimiter_ref.value()
519
55
                                : StringRef(reinterpret_cast<const char*>(delimiter.data),
520
55
                                            delimiter.size);
521
522
79
                while (num <= neg_part_number && offset >= 0) {
523
79
                    offset = (int)substr.rfind(delimiter_str, offset);
524
79
                    if (offset != -1) {
525
63
                        if (++num == neg_part_number) {
526
39
                            break;
527
39
                        }
528
24
                        pre_offset = offset;
529
24
                        offset = offset - 1;
530
24
                        substr = str_str.substr(0, pre_offset);
531
24
                    } else {
532
16
                        break;
533
16
                    }
534
79
                }
535
55
                num = (offset == -1 && num != 0) ? num + 1 : num;
536
537
55
                if (num == neg_part_number) {
538
43
                    if (offset == -1) {
539
4
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
540
4
                                                    res_chars, res_offsets);
541
39
                    } else {
542
39
                        StringOP::push_value_string(
543
39
                                std::string_view {str.data + offset + delimiter_size,
544
39
                                                  str.size - offset - delimiter_size},
545
39
                                i, res_chars, res_offsets);
546
39
                    }
547
43
                } else {
548
12
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
549
12
                                                res_offsets);
550
12
                }
551
55
            }
552
183
        }
553
554
107
        block.get_by_position(result).column = std::move(res);
555
107
        return Status::OK();
556
107
    }
557
};
558
559
class FunctionSplitByString : public IFunction {
560
public:
561
    static constexpr auto name = "split_by_string";
562
563
124
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
564
    using NullMapType = PaddedPODArray<UInt8>;
565
566
1
    String get_name() const override { return name; }
567
568
119
    bool is_variadic() const override { return false; }
569
570
118
    size_t get_number_of_arguments() const override { return 2; }
571
572
118
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
573
118
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
574
0
                << "first argument for function: " << name << " should be string"
575
0
                << " and arguments[0] is " << arguments[0]->get_name();
576
118
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
577
0
                << "second argument for function: " << name << " should be string"
578
0
                << " and arguments[1] is " << arguments[1]->get_name();
579
118
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
580
118
    }
581
582
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
583
154
                        uint32_t result, size_t input_rows_count) const override {
584
154
        DCHECK_EQ(arguments.size(), 2);
585
586
154
        const auto& [src_column, left_const] =
587
154
                unpack_if_const(block.get_by_position(arguments[0]).column);
588
154
        const auto& [right_column, right_const] =
589
154
                unpack_if_const(block.get_by_position(arguments[1]).column);
590
591
154
        DataTypePtr right_column_type = block.get_by_position(arguments[1]).type;
592
154
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
593
154
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
594
154
                                                   ColumnArray::ColumnOffsets::create());
595
596
154
        dest_column_ptr->resize(0);
597
154
        auto& dest_offsets = dest_column_ptr->get_offsets();
598
599
154
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
600
154
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
601
602
154
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
603
604
154
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
605
606
154
        std::visit(
607
154
                [&](auto src_const, auto delimiter_const) {
608
154
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
609
154
                                                         *dest_nested_column, dest_offsets,
610
154
                                                         input_rows_count);
611
154
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_
Line
Count
Source
607
55
                [&](auto src_const, auto delimiter_const) {
608
55
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
609
55
                                                         *dest_nested_column, dest_offsets,
610
55
                                                         input_rows_count);
611
55
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_
Line
Count
Source
607
91
                [&](auto src_const, auto delimiter_const) {
608
91
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
609
91
                                                         *dest_nested_column, dest_offsets,
610
91
                                                         input_rows_count);
611
91
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_
Line
Count
Source
607
8
                [&](auto src_const, auto delimiter_const) {
608
8
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
609
8
                                                         *dest_nested_column, dest_offsets,
610
8
                                                         input_rows_count);
611
8
                },
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_
612
154
                make_bool_variant(left_const), make_bool_variant(right_const));
613
614
        // all elements in dest_nested_column are not null
615
154
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
616
154
                                                                       false);
617
154
        block.replace_by_position(result, std::move(dest_column_ptr));
618
619
154
        return Status::OK();
620
154
    }
621
622
private:
623
    template <bool src_const, bool delimiter_const>
624
    void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column,
625
                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
626
154
                  size_t size) const {
627
154
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
628
154
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
629
154
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
630
154
        column_string_chars.reserve(0);
631
632
154
        ColumnArray::Offset64 string_pos = 0;
633
154
        ColumnArray::Offset64 dest_pos = 0;
634
635
154
        StringSearch search;
636
154
        StringRef delimiter_ref_for_search;
637
638
154
        if constexpr (delimiter_const) {
639
91
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
640
91
            search.set_pattern(&delimiter_ref_for_search);
641
91
        }
642
643
1.22k
        for (size_t i = 0; i < size; i++) {
644
1.07k
            const StringRef str_ref =
645
1.07k
                    src_column_string.get_data_at(index_check_const<src_const>(i));
646
1.07k
            const StringRef delimiter_ref =
647
1.07k
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
648
649
1.07k
            if (str_ref.size == 0) {
650
164
                dest_offsets.push_back(dest_pos);
651
164
                continue;
652
164
            }
653
908
            if (delimiter_ref.size == 0) {
654
27
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
655
27
                                      string_pos, dest_pos);
656
881
            } else {
657
881
                if constexpr (!delimiter_const) {
658
51
                    search.set_pattern(&delimiter_ref);
659
51
                }
660
51.7k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
661
50.8k
                    const size_t str_offset = str_pos;
662
50.8k
                    const size_t old_size = column_string_chars.size();
663
                    // search first match delimter_ref index from src string among str_offset to end
664
50.8k
                    const char* result_start =
665
50.8k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
666
                    // compute split part size
667
50.8k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
668
                    // save dist string split part
669
50.8k
                    if (split_part_size > 0) {
670
50.2k
                        const size_t new_size = old_size + split_part_size;
671
50.2k
                        column_string_chars.resize(new_size);
672
50.2k
                        memcpy_small_allow_read_write_overflow15(
673
50.2k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
674
50.2k
                                split_part_size);
675
                        // add dist string offset
676
50.2k
                        string_pos += split_part_size;
677
50.2k
                    }
678
50.8k
                    column_string_offsets.push_back(string_pos);
679
                    // array offset + 1
680
50.8k
                    dest_pos++;
681
                    // add src string str_pos to next search start
682
50.8k
                    str_pos += split_part_size + delimiter_ref.size;
683
50.8k
                }
684
881
            }
685
908
            dest_offsets.push_back(dest_pos);
686
908
        }
687
154
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
626
55
                  size_t size) const {
627
55
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
628
55
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
629
55
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
630
55
        column_string_chars.reserve(0);
631
632
55
        ColumnArray::Offset64 string_pos = 0;
633
55
        ColumnArray::Offset64 dest_pos = 0;
634
635
55
        StringSearch search;
636
55
        StringRef delimiter_ref_for_search;
637
638
        if constexpr (delimiter_const) {
639
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
640
            search.set_pattern(&delimiter_ref_for_search);
641
        }
642
643
130
        for (size_t i = 0; i < size; i++) {
644
75
            const StringRef str_ref =
645
75
                    src_column_string.get_data_at(index_check_const<src_const>(i));
646
75
            const StringRef delimiter_ref =
647
75
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
648
649
75
            if (str_ref.size == 0) {
650
13
                dest_offsets.push_back(dest_pos);
651
13
                continue;
652
13
            }
653
62
            if (delimiter_ref.size == 0) {
654
11
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
655
11
                                      string_pos, dest_pos);
656
51
            } else {
657
51
                if constexpr (!delimiter_const) {
658
51
                    search.set_pattern(&delimiter_ref);
659
51
                }
660
214
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
661
163
                    const size_t str_offset = str_pos;
662
163
                    const size_t old_size = column_string_chars.size();
663
                    // search first match delimter_ref index from src string among str_offset to end
664
163
                    const char* result_start =
665
163
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
666
                    // compute split part size
667
163
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
668
                    // save dist string split part
669
163
                    if (split_part_size > 0) {
670
122
                        const size_t new_size = old_size + split_part_size;
671
122
                        column_string_chars.resize(new_size);
672
122
                        memcpy_small_allow_read_write_overflow15(
673
122
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
674
122
                                split_part_size);
675
                        // add dist string offset
676
122
                        string_pos += split_part_size;
677
122
                    }
678
163
                    column_string_offsets.push_back(string_pos);
679
                    // array offset + 1
680
163
                    dest_pos++;
681
                    // add src string str_pos to next search start
682
163
                    str_pos += split_part_size + delimiter_ref.size;
683
163
                }
684
51
            }
685
62
            dest_offsets.push_back(dest_pos);
686
62
        }
687
55
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
626
91
                  size_t size) const {
627
91
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
628
91
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
629
91
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
630
91
        column_string_chars.reserve(0);
631
632
91
        ColumnArray::Offset64 string_pos = 0;
633
91
        ColumnArray::Offset64 dest_pos = 0;
634
635
91
        StringSearch search;
636
91
        StringRef delimiter_ref_for_search;
637
638
91
        if constexpr (delimiter_const) {
639
91
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
640
91
            search.set_pattern(&delimiter_ref_for_search);
641
91
        }
642
643
1.06k
        for (size_t i = 0; i < size; i++) {
644
973
            const StringRef str_ref =
645
973
                    src_column_string.get_data_at(index_check_const<src_const>(i));
646
973
            const StringRef delimiter_ref =
647
973
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
648
649
973
            if (str_ref.size == 0) {
650
135
                dest_offsets.push_back(dest_pos);
651
135
                continue;
652
135
            }
653
838
            if (delimiter_ref.size == 0) {
654
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
655
8
                                      string_pos, dest_pos);
656
830
            } else {
657
                if constexpr (!delimiter_const) {
658
                    search.set_pattern(&delimiter_ref);
659
                }
660
51.5k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
661
50.7k
                    const size_t str_offset = str_pos;
662
50.7k
                    const size_t old_size = column_string_chars.size();
663
                    // search first match delimter_ref index from src string among str_offset to end
664
50.7k
                    const char* result_start =
665
50.7k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
666
                    // compute split part size
667
50.7k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
668
                    // save dist string split part
669
50.7k
                    if (split_part_size > 0) {
670
50.1k
                        const size_t new_size = old_size + split_part_size;
671
50.1k
                        column_string_chars.resize(new_size);
672
50.1k
                        memcpy_small_allow_read_write_overflow15(
673
50.1k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
674
50.1k
                                split_part_size);
675
                        // add dist string offset
676
50.1k
                        string_pos += split_part_size;
677
50.1k
                    }
678
50.7k
                    column_string_offsets.push_back(string_pos);
679
                    // array offset + 1
680
50.7k
                    dest_pos++;
681
                    // add src string str_pos to next search start
682
50.7k
                    str_pos += split_part_size + delimiter_ref.size;
683
50.7k
                }
684
830
            }
685
838
            dest_offsets.push_back(dest_pos);
686
838
        }
687
91
    }
_ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
626
8
                  size_t size) const {
627
8
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
628
8
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
629
8
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
630
8
        column_string_chars.reserve(0);
631
632
8
        ColumnArray::Offset64 string_pos = 0;
633
8
        ColumnArray::Offset64 dest_pos = 0;
634
635
8
        StringSearch search;
636
8
        StringRef delimiter_ref_for_search;
637
638
        if constexpr (delimiter_const) {
639
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
640
            search.set_pattern(&delimiter_ref_for_search);
641
        }
642
643
32
        for (size_t i = 0; i < size; i++) {
644
24
            const StringRef str_ref =
645
24
                    src_column_string.get_data_at(index_check_const<src_const>(i));
646
24
            const StringRef delimiter_ref =
647
24
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
648
649
24
            if (str_ref.size == 0) {
650
16
                dest_offsets.push_back(dest_pos);
651
16
                continue;
652
16
            }
653
8
            if (delimiter_ref.size == 0) {
654
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
655
8
                                      string_pos, dest_pos);
656
8
            } else {
657
0
                if constexpr (!delimiter_const) {
658
0
                    search.set_pattern(&delimiter_ref);
659
0
                }
660
0
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
661
0
                    const size_t str_offset = str_pos;
662
0
                    const size_t old_size = column_string_chars.size();
663
                    // search first match delimter_ref index from src string among str_offset to end
664
0
                    const char* result_start =
665
0
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
666
                    // compute split part size
667
0
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
668
                    // save dist string split part
669
0
                    if (split_part_size > 0) {
670
0
                        const size_t new_size = old_size + split_part_size;
671
0
                        column_string_chars.resize(new_size);
672
0
                        memcpy_small_allow_read_write_overflow15(
673
0
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
674
0
                                split_part_size);
675
                        // add dist string offset
676
0
                        string_pos += split_part_size;
677
0
                    }
678
0
                    column_string_offsets.push_back(string_pos);
679
                    // array offset + 1
680
0
                    dest_pos++;
681
                    // add src string str_pos to next search start
682
0
                    str_pos += split_part_size + delimiter_ref.size;
683
0
                }
684
0
            }
685
8
            dest_offsets.push_back(dest_pos);
686
8
        }
687
8
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
688
689
    void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars,
690
                               ColumnString::Offsets& column_string_offsets,
691
                               ColumnArray::Offset64& string_pos,
692
27
                               ColumnArray::Offset64& dest_pos) const {
693
27
        const size_t old_size = column_string_chars.size();
694
27
        const size_t new_size = old_size + str_ref.size;
695
27
        column_string_chars.resize(new_size);
696
27
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
697
27
        if (simd::VStringFunctions::is_ascii(str_ref)) {
698
24
            const auto size = str_ref.size;
699
700
24
            const auto nested_old_size = column_string_offsets.size();
701
24
            const auto nested_new_size = nested_old_size + size;
702
24
            column_string_offsets.resize(nested_new_size);
703
24
            std::iota(column_string_offsets.data() + nested_old_size,
704
24
                      column_string_offsets.data() + nested_new_size, string_pos + 1);
705
706
24
            string_pos += size;
707
24
            dest_pos += size;
708
            // The above code is equivalent to the code in the following comment.
709
            // for (size_t i = 0; i < str_ref.size; i++) {
710
            //     string_pos++;
711
            //     column_string_offsets.push_back(string_pos);
712
            //     (*dest_nested_null_map).push_back(false);
713
            //     dest_pos++;
714
            // }
715
24
        } else {
716
22
            for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
717
19
                utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
718
719
19
                string_pos += utf8_char_len;
720
19
                column_string_offsets.push_back(string_pos);
721
19
                dest_pos++;
722
19
            }
723
3
        }
724
27
    }
725
};
726
727
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
728
729
template <FunctionCountSubStringType type>
730
class FunctionCountSubString : public IFunction {
731
public:
732
    static constexpr auto name = "count_substrings";
733
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
734
735
277
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
735
74
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
735
203
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
736
    using NullMapType = PaddedPODArray<UInt8>;
737
738
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
739
740
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
741
742
265
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
743
265
        return std::make_shared<DataTypeInt32>();
744
265
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
742
68
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
743
68
        return std::make_shared<DataTypeInt32>();
744
68
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
742
197
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
743
197
        return std::make_shared<DataTypeInt32>();
744
197
    }
745
746
10
    DataTypes get_variadic_argument_types_impl() const override {
747
10
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
748
5
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
749
5
        } else {
750
5
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
751
5
                    std::make_shared<DataTypeInt32>()};
752
5
        }
753
10
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
746
5
    DataTypes get_variadic_argument_types_impl() const override {
747
5
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
748
5
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
749
        } else {
750
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
751
                    std::make_shared<DataTypeInt32>()};
752
        }
753
5
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
746
5
    DataTypes get_variadic_argument_types_impl() const override {
747
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
748
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
749
5
        } else {
750
5
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
751
5
                    std::make_shared<DataTypeInt32>()};
752
5
        }
753
5
    }
754
755
267
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
755
69
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
755
198
    bool is_variadic() const override { return true; }
756
757
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
758
234
                        uint32_t result, size_t input_rows_count) const override {
759
234
        DCHECK(arg_count);
760
234
        bool col_const[arg_count];
761
234
        ColumnPtr argument_columns[arg_count];
762
878
        for (int i = 0; i < arg_count; ++i) {
763
644
            std::tie(argument_columns[i], col_const[i]) =
764
644
                    unpack_if_const(block.get_by_position(arguments[i]).column);
765
644
        }
766
767
234
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
768
234
        auto& dest_column_data = dest_column_ptr->get_data();
769
770
234
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
771
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
772
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
773
58
            std::visit(
774
58
                    [&](auto str_const, auto pattern_const) {
775
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
776
58
                                                           dest_column_data, input_rows_count);
777
58
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
774
32
                    [&](auto str_const, auto pattern_const) {
775
32
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
776
32
                                                           dest_column_data, input_rows_count);
777
32
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
774
13
                    [&](auto str_const, auto pattern_const) {
775
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
776
13
                                                           dest_column_data, input_rows_count);
777
13
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
774
13
                    [&](auto str_const, auto pattern_const) {
775
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
776
13
                                                           dest_column_data, input_rows_count);
777
13
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
778
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
779
176
        } else {
780
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
781
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
782
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
783
176
            std::visit(
784
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
176
                        _execute<str_const, pattern_const, start_pos_const>(
786
176
                                src_column_string, pattern_column, start_pos_column,
787
176
                                dest_column_data, input_rows_count);
788
176
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
784
36
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
36
                        _execute<str_const, pattern_const, start_pos_const>(
786
36
                                src_column_string, pattern_column, start_pos_column,
787
36
                                dest_column_data, input_rows_count);
788
36
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
784
29
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
29
                        _execute<str_const, pattern_const, start_pos_const>(
786
29
                                src_column_string, pattern_column, start_pos_column,
787
29
                                dest_column_data, input_rows_count);
788
29
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
784
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
22
                        _execute<str_const, pattern_const, start_pos_const>(
786
22
                                src_column_string, pattern_column, start_pos_column,
787
22
                                dest_column_data, input_rows_count);
788
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
784
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
23
                        _execute<str_const, pattern_const, start_pos_const>(
786
23
                                src_column_string, pattern_column, start_pos_column,
787
23
                                dest_column_data, input_rows_count);
788
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
784
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
22
                        _execute<str_const, pattern_const, start_pos_const>(
786
22
                                src_column_string, pattern_column, start_pos_column,
787
22
                                dest_column_data, input_rows_count);
788
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
784
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
22
                        _execute<str_const, pattern_const, start_pos_const>(
786
22
                                src_column_string, pattern_column, start_pos_column,
787
22
                                dest_column_data, input_rows_count);
788
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
784
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
22
                        _execute<str_const, pattern_const, start_pos_const>(
786
22
                                src_column_string, pattern_column, start_pos_column,
787
22
                                dest_column_data, input_rows_count);
788
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
789
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
790
176
                    make_bool_variant(col_const[2]));
791
176
        }
792
793
234
        block.replace_by_position(result, std::move(dest_column_ptr));
794
234
        return Status::OK();
795
234
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
758
58
                        uint32_t result, size_t input_rows_count) const override {
759
58
        DCHECK(arg_count);
760
58
        bool col_const[arg_count];
761
58
        ColumnPtr argument_columns[arg_count];
762
174
        for (int i = 0; i < arg_count; ++i) {
763
116
            std::tie(argument_columns[i], col_const[i]) =
764
116
                    unpack_if_const(block.get_by_position(arguments[i]).column);
765
116
        }
766
767
58
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
768
58
        auto& dest_column_data = dest_column_ptr->get_data();
769
770
58
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
771
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
772
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
773
58
            std::visit(
774
58
                    [&](auto str_const, auto pattern_const) {
775
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
776
58
                                                           dest_column_data, input_rows_count);
777
58
                    },
778
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
779
        } else {
780
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
781
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
782
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
783
            std::visit(
784
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
                        _execute<str_const, pattern_const, start_pos_const>(
786
                                src_column_string, pattern_column, start_pos_column,
787
                                dest_column_data, input_rows_count);
788
                    },
789
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
790
                    make_bool_variant(col_const[2]));
791
        }
792
793
58
        block.replace_by_position(result, std::move(dest_column_ptr));
794
58
        return Status::OK();
795
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
758
176
                        uint32_t result, size_t input_rows_count) const override {
759
176
        DCHECK(arg_count);
760
176
        bool col_const[arg_count];
761
176
        ColumnPtr argument_columns[arg_count];
762
704
        for (int i = 0; i < arg_count; ++i) {
763
528
            std::tie(argument_columns[i], col_const[i]) =
764
528
                    unpack_if_const(block.get_by_position(arguments[i]).column);
765
528
        }
766
767
176
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
768
176
        auto& dest_column_data = dest_column_ptr->get_data();
769
770
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
771
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
772
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
773
            std::visit(
774
                    [&](auto str_const, auto pattern_const) {
775
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
776
                                                           dest_column_data, input_rows_count);
777
                    },
778
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
779
176
        } else {
780
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
781
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
782
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
783
176
            std::visit(
784
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
785
176
                        _execute<str_const, pattern_const, start_pos_const>(
786
176
                                src_column_string, pattern_column, start_pos_column,
787
176
                                dest_column_data, input_rows_count);
788
176
                    },
789
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
790
176
                    make_bool_variant(col_const[2]));
791
176
        }
792
793
176
        block.replace_by_position(result, std::move(dest_column_ptr));
794
176
        return Status::OK();
795
176
    }
796
797
private:
798
    template <bool src_const, bool pattern_const>
799
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
800
58
                  ColumnInt32::Container& dest_column_data, size_t size) const {
801
58
        if constexpr (pattern_const) {
802
13
            const StringRef pattern_ref = pattern_column.get_data_at(0);
803
13
            if (pattern_ref.size == 0) {
804
3
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
805
3
                return;
806
3
            }
807
808
10
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
809
48
            for (size_t i = 0; i < size; i++) {
810
38
                const StringRef str_ref =
811
38
                        src_column_string.get_data_at(index_check_const<src_const>(i));
812
38
                dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher);
813
38
            }
814
10
            return;
815
13
        }
816
817
200
        for (size_t i = 0; i < size; i++) {
818
142
            const StringRef str_ref =
819
142
                    src_column_string.get_data_at(index_check_const<src_const>(i));
820
821
142
            const StringRef pattern_ref =
822
142
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
823
142
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
824
142
        }
825
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
800
32
                  ColumnInt32::Container& dest_column_data, size_t size) const {
801
        if constexpr (pattern_const) {
802
            const StringRef pattern_ref = pattern_column.get_data_at(0);
803
            if (pattern_ref.size == 0) {
804
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
805
                return;
806
            }
807
808
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
809
            for (size_t i = 0; i < size; i++) {
810
                const StringRef str_ref =
811
                        src_column_string.get_data_at(index_check_const<src_const>(i));
812
                dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher);
813
            }
814
            return;
815
        }
816
817
133
        for (size_t i = 0; i < size; i++) {
818
101
            const StringRef str_ref =
819
101
                    src_column_string.get_data_at(index_check_const<src_const>(i));
820
821
101
            const StringRef pattern_ref =
822
101
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
823
101
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
824
101
        }
825
32
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
800
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
801
13
        if constexpr (pattern_const) {
802
13
            const StringRef pattern_ref = pattern_column.get_data_at(0);
803
13
            if (pattern_ref.size == 0) {
804
3
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
805
3
                return;
806
3
            }
807
808
10
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
809
48
            for (size_t i = 0; i < size; i++) {
810
38
                const StringRef str_ref =
811
38
                        src_column_string.get_data_at(index_check_const<src_const>(i));
812
38
                dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher);
813
38
            }
814
10
            return;
815
13
        }
816
817
13
        for (size_t i = 0; i < size; i++) {
818
0
            const StringRef str_ref =
819
0
                    src_column_string.get_data_at(index_check_const<src_const>(i));
820
821
0
            const StringRef pattern_ref =
822
0
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
823
0
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
824
0
        }
825
13
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
800
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
801
        if constexpr (pattern_const) {
802
            const StringRef pattern_ref = pattern_column.get_data_at(0);
803
            if (pattern_ref.size == 0) {
804
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
805
                return;
806
            }
807
808
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
809
            for (size_t i = 0; i < size; i++) {
810
                const StringRef str_ref =
811
                        src_column_string.get_data_at(index_check_const<src_const>(i));
812
                dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher);
813
            }
814
            return;
815
        }
816
817
54
        for (size_t i = 0; i < size; i++) {
818
41
            const StringRef str_ref =
819
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
820
821
41
            const StringRef pattern_ref =
822
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
823
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
824
41
        }
825
13
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
826
827
    template <bool src_const, bool pattern_const, bool start_pos_const>
828
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
829
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
830
176
                  size_t size) const {
831
176
        if constexpr (pattern_const) {
832
67
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
67
            if (pattern_ref.size == 0) {
834
15
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
15
                return;
836
15
            }
837
838
52
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
118
            for (size_t i = 0; i < size; i++) {
840
66
                const StringRef str_ref =
841
66
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
66
                const int32_t start_pos =
843
66
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
66
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
66
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
31
                    dest_column_data[i] = 0;
848
35
                } else {
849
35
                    dest_column_data[i] = find_str_count_with_searcher(
850
35
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
35
                }
852
66
            }
853
52
            return;
854
67
        }
855
856
330
        for (size_t i = 0; i < size; i++) {
857
154
            const StringRef str_ref =
858
154
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
154
            const StringRef pattern_ref =
860
154
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
154
            int32_t start_pos =
863
154
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
154
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
154
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
88
                dest_column_data[i] = 0;
869
88
            } else {
870
66
                dest_column_data[i] =
871
66
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
66
            }
873
154
        }
874
176
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
36
                  size_t size) const {
831
        if constexpr (pattern_const) {
832
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
            if (pattern_ref.size == 0) {
834
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
                return;
836
            }
837
838
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
            for (size_t i = 0; i < size; i++) {
840
                const StringRef str_ref =
841
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
                const int32_t start_pos =
843
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
                    dest_column_data[i] = 0;
848
                } else {
849
                    dest_column_data[i] = find_str_count_with_searcher(
850
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
                }
852
            }
853
            return;
854
        }
855
856
97
        for (size_t i = 0; i < size; i++) {
857
61
            const StringRef str_ref =
858
61
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
61
            const StringRef pattern_ref =
860
61
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
61
            int32_t start_pos =
863
61
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
61
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
61
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
38
                dest_column_data[i] = 0;
869
38
            } else {
870
23
                dest_column_data[i] =
871
23
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
23
            }
873
61
        }
874
36
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
29
                  size_t size) const {
831
        if constexpr (pattern_const) {
832
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
            if (pattern_ref.size == 0) {
834
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
                return;
836
            }
837
838
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
            for (size_t i = 0; i < size; i++) {
840
                const StringRef str_ref =
841
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
                const int32_t start_pos =
843
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
                    dest_column_data[i] = 0;
848
                } else {
849
                    dest_column_data[i] = find_str_count_with_searcher(
850
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
                }
852
            }
853
            return;
854
        }
855
856
78
        for (size_t i = 0; i < size; i++) {
857
49
            const StringRef str_ref =
858
49
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
49
            const StringRef pattern_ref =
860
49
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
49
            int32_t start_pos =
863
49
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
49
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
49
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
22
                dest_column_data[i] = 0;
869
27
            } else {
870
27
                dest_column_data[i] =
871
27
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
27
            }
873
49
        }
874
29
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
22
                  size_t size) const {
831
22
        if constexpr (pattern_const) {
832
22
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
22
            if (pattern_ref.size == 0) {
834
5
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
5
                return;
836
5
            }
837
838
17
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
34
            for (size_t i = 0; i < size; i++) {
840
17
                const StringRef str_ref =
841
17
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
17
                const int32_t start_pos =
843
17
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
17
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
17
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
9
                    dest_column_data[i] = 0;
848
9
                } else {
849
8
                    dest_column_data[i] = find_str_count_with_searcher(
850
8
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
8
                }
852
17
            }
853
17
            return;
854
22
        }
855
856
22
        for (size_t i = 0; i < size; i++) {
857
0
            const StringRef str_ref =
858
0
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
0
            const StringRef pattern_ref =
860
0
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
0
            int32_t start_pos =
863
0
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
0
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
0
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
0
                dest_column_data[i] = 0;
869
0
            } else {
870
0
                dest_column_data[i] =
871
0
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
0
            }
873
0
        }
874
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
23
                  size_t size) const {
831
23
        if constexpr (pattern_const) {
832
23
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
23
            if (pattern_ref.size == 0) {
834
5
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
5
                return;
836
5
            }
837
838
18
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
50
            for (size_t i = 0; i < size; i++) {
840
32
                const StringRef str_ref =
841
32
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
32
                const int32_t start_pos =
843
32
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
32
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
32
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
13
                    dest_column_data[i] = 0;
848
19
                } else {
849
19
                    dest_column_data[i] = find_str_count_with_searcher(
850
19
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
19
                }
852
32
            }
853
18
            return;
854
23
        }
855
856
23
        for (size_t i = 0; i < size; i++) {
857
0
            const StringRef str_ref =
858
0
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
0
            const StringRef pattern_ref =
860
0
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
0
            int32_t start_pos =
863
0
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
0
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
0
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
0
                dest_column_data[i] = 0;
869
0
            } else {
870
0
                dest_column_data[i] =
871
0
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
0
            }
873
0
        }
874
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
22
                  size_t size) const {
831
        if constexpr (pattern_const) {
832
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
            if (pattern_ref.size == 0) {
834
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
                return;
836
            }
837
838
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
            for (size_t i = 0; i < size; i++) {
840
                const StringRef str_ref =
841
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
                const int32_t start_pos =
843
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
                    dest_column_data[i] = 0;
848
                } else {
849
                    dest_column_data[i] = find_str_count_with_searcher(
850
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
                }
852
            }
853
            return;
854
        }
855
856
44
        for (size_t i = 0; i < size; i++) {
857
22
            const StringRef str_ref =
858
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
22
            const StringRef pattern_ref =
860
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
22
            int32_t start_pos =
863
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
22
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
14
                dest_column_data[i] = 0;
869
14
            } else {
870
8
                dest_column_data[i] =
871
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
8
            }
873
22
        }
874
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
22
                  size_t size) const {
831
        if constexpr (pattern_const) {
832
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
            if (pattern_ref.size == 0) {
834
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
                return;
836
            }
837
838
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
            for (size_t i = 0; i < size; i++) {
840
                const StringRef str_ref =
841
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
                const int32_t start_pos =
843
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
                    dest_column_data[i] = 0;
848
                } else {
849
                    dest_column_data[i] = find_str_count_with_searcher(
850
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
                }
852
            }
853
            return;
854
        }
855
856
44
        for (size_t i = 0; i < size; i++) {
857
22
            const StringRef str_ref =
858
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
22
            const StringRef pattern_ref =
860
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
22
            int32_t start_pos =
863
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
22
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
14
                dest_column_data[i] = 0;
869
14
            } else {
870
8
                dest_column_data[i] =
871
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
8
            }
873
22
        }
874
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
830
22
                  size_t size) const {
831
22
        if constexpr (pattern_const) {
832
22
            const StringRef pattern_ref = pattern_column.get_data_at(0);
833
22
            if (pattern_ref.size == 0) {
834
5
                std::fill(dest_column_data.begin(), dest_column_data.end(), 0);
835
5
                return;
836
5
            }
837
838
17
            const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size);
839
34
            for (size_t i = 0; i < size; i++) {
840
17
                const StringRef str_ref =
841
17
                        src_column_string.get_data_at(index_check_const<src_const>(i));
842
17
                const int32_t start_pos =
843
17
                        start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
844
17
                const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
845
846
17
                if (start_pos < 0 || start_byte_len >= str_ref.size) {
847
9
                    dest_column_data[i] = 0;
848
9
                } else {
849
8
                    dest_column_data[i] = find_str_count_with_searcher(
850
8
                            str_ref.substring(start_byte_len), pattern_ref, searcher);
851
8
                }
852
17
            }
853
17
            return;
854
22
        }
855
856
22
        for (size_t i = 0; i < size; i++) {
857
0
            const StringRef str_ref =
858
0
                    src_column_string.get_data_at(index_check_const<src_const>(i));
859
0
            const StringRef pattern_ref =
860
0
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
861
            // 1-based index
862
0
            int32_t start_pos =
863
0
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
864
865
0
            const auto start_byte_len = get_start_byte_len(str_ref, start_pos);
866
867
0
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
868
0
                dest_column_data[i] = 0;
869
0
            } else {
870
0
                dest_column_data[i] =
871
0
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
872
0
            }
873
0
        }
874
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
875
876
220
    size_t get_start_byte_len(const StringRef str_ref, int32_t start_pos) const {
877
220
        const char* p = str_ref.begin();
878
220
        const char* end = str_ref.end();
879
220
        int char_size = 0;
880
1.34k
        for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
881
1.12k
            char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
882
1.12k
        }
883
220
        return p - str_ref.begin();
884
220
    }
885
886
387
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
887
387
        size_t old_size = pos;
888
387
        size_t str_size = str_ref.size;
889
1.51k
        while (pos < str_size &&
890
1.51k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
891
1.36k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
892
1.13k
            pos++;
893
1.13k
        }
894
387
        return pos - old_size;
895
387
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
886
223
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
887
223
        size_t old_size = pos;
888
223
        size_t str_size = str_ref.size;
889
753
        while (pos < str_size &&
890
753
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
891
661
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
892
530
            pos++;
893
530
        }
894
223
        return pos - old_size;
895
223
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
886
164
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
887
164
        size_t old_size = pos;
888
164
        size_t str_size = str_ref.size;
889
764
        while (pos < str_size &&
890
764
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
891
700
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
892
600
            pos++;
893
600
        }
894
164
        return pos - old_size;
895
164
    }
896
897
208
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
898
208
        int count = 0;
899
208
        if (str_ref.size == 0 || pattern_ref.size == 0) {
900
52
            return 0;
901
156
        } else {
902
387
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
903
387
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
904
387
                if (res_pos == (str_ref.size - str_pos)) {
905
156
                    break; // not find
906
156
                }
907
231
                count++;
908
231
                str_pos = str_pos + res_pos + pattern_ref.size;
909
231
            }
910
156
        }
911
156
        return count;
912
208
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
897
142
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
898
142
        int count = 0;
899
142
        if (str_ref.size == 0 || pattern_ref.size == 0) {
900
50
            return 0;
901
92
        } else {
902
223
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
903
223
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
904
223
                if (res_pos == (str_ref.size - str_pos)) {
905
92
                    break; // not find
906
92
                }
907
131
                count++;
908
131
                str_pos = str_pos + res_pos + pattern_ref.size;
909
131
            }
910
92
        }
911
92
        return count;
912
142
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
897
66
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
898
66
        int count = 0;
899
66
        if (str_ref.size == 0 || pattern_ref.size == 0) {
900
2
            return 0;
901
64
        } else {
902
164
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
903
164
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
904
164
                if (res_pos == (str_ref.size - str_pos)) {
905
64
                    break; // not find
906
64
                }
907
100
                count++;
908
100
                str_pos = str_pos + res_pos + pattern_ref.size;
909
100
            }
910
64
        }
911
64
        return count;
912
66
    }
913
914
    int find_str_count_with_searcher(const StringRef str_ref, StringRef pattern_ref,
915
73
                                     const ASCIICaseSensitiveStringSearcher& searcher) const {
916
73
        if (str_ref.size == 0 || pattern_ref.size == 0) {
917
9
            return 0;
918
9
        }
919
920
64
        int count = 0;
921
64
        const char* pos = str_ref.data;
922
64
        const char* const end = str_ref.data + str_ref.size;
923
142
        while (pos < end) {
924
142
            const char* match = searcher.search(pos, end);
925
142
            if (match == end) {
926
64
                break;
927
64
            }
928
78
            ++count;
929
78
            pos = match + pattern_ref.size;
930
78
        }
931
64
        return count;
932
73
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE28find_str_count_with_searcherENS_9StringRefES3_RKNS_14StringSearcherILb1ELb1EEE
Line
Count
Source
915
38
                                     const ASCIICaseSensitiveStringSearcher& searcher) const {
916
38
        if (str_ref.size == 0 || pattern_ref.size == 0) {
917
9
            return 0;
918
9
        }
919
920
29
        int count = 0;
921
29
        const char* pos = str_ref.data;
922
29
        const char* const end = str_ref.data + str_ref.size;
923
68
        while (pos < end) {
924
68
            const char* match = searcher.search(pos, end);
925
68
            if (match == end) {
926
29
                break;
927
29
            }
928
39
            ++count;
929
39
            pos = match + pattern_ref.size;
930
39
        }
931
29
        return count;
932
38
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE28find_str_count_with_searcherENS_9StringRefES3_RKNS_14StringSearcherILb1ELb1EEE
Line
Count
Source
915
35
                                     const ASCIICaseSensitiveStringSearcher& searcher) const {
916
35
        if (str_ref.size == 0 || pattern_ref.size == 0) {
917
0
            return 0;
918
0
        }
919
920
35
        int count = 0;
921
35
        const char* pos = str_ref.data;
922
35
        const char* const end = str_ref.data + str_ref.size;
923
74
        while (pos < end) {
924
74
            const char* match = searcher.search(pos, end);
925
74
            if (match == end) {
926
35
                break;
927
35
            }
928
39
            ++count;
929
39
            pos = match + pattern_ref.size;
930
39
        }
931
35
        return count;
932
35
    }
933
};
934
935
5
void register_function_string_search(SimpleFunctionFactory& factory) {
936
5
    factory.register_function<FunctionStringLocatePos>();
937
5
    factory.register_function<FunctionSplitPart>();
938
5
    factory.register_function<FunctionSplitByString>();
939
5
    factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>();
940
5
    factory.register_function<
941
5
            FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>();
942
5
    factory.register_function<FunctionSubstringIndex>();
943
944
5
    factory.register_alias(FunctionStringLocatePos::name, "position");
945
5
}
946
947
#include "common/compile_check_avoid_end.h"
948
} // namespace doris