be/src/exprs/function/function_string_search.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <algorithm> |
19 | | #include <cstddef> |
20 | | #include <cstring> |
21 | | #include <numeric> |
22 | | #include <string> |
23 | | #include <string_view> |
24 | | #include <vector> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/column/column_array.h" |
31 | | #include "core/column/column_const.h" |
32 | | #include "core/column/column_nullable.h" |
33 | | #include "core/column/column_string.h" |
34 | | #include "core/column/column_vector.h" |
35 | | #include "core/data_type/data_type_array.h" |
36 | | #include "core/data_type/data_type_nullable.h" |
37 | | #include "core/data_type/data_type_number.h" |
38 | | #include "core/data_type/data_type_string.h" |
39 | | #include "core/data_type/define_primitive_type.h" |
40 | | #include "core/memcmp_small.h" |
41 | | #include "core/memcpy_small.h" |
42 | | #include "core/pod_array_fwd.h" |
43 | | #include "core/string_ref.h" |
44 | | #include "exec/common/string_searcher.h" |
45 | | #include "exec/common/stringop_substring.h" |
46 | | #include "exec/common/template_helpers.hpp" |
47 | | #include "exec/common/util.hpp" |
48 | | #include "exprs/function/function.h" |
49 | | #include "exprs/function/function_helpers.h" |
50 | | #include "exprs/function/simple_function_factory.h" |
51 | | #include "exprs/function_context.h" |
52 | | #include "util/simd/vstring_function.h" |
53 | | #include "util/string_search.hpp" |
54 | | |
55 | | namespace doris { |
56 | | #include "common/compile_check_avoid_begin.h" |
57 | | |
58 | | class FunctionStringLocatePos : public IFunction { |
59 | | public: |
60 | | static constexpr auto name = "locate"; |
61 | 868 | static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); } |
62 | 0 | String get_name() const override { return name; } |
63 | 0 | size_t get_number_of_arguments() const override { return 3; } |
64 | | |
65 | 862 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
66 | 862 | return std::make_shared<DataTypeInt32>(); |
67 | 862 | } |
68 | | |
69 | 5 | DataTypes get_variadic_argument_types_impl() const override { |
70 | 5 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
71 | 5 | std::make_shared<DataTypeInt32>()}; |
72 | 5 | } |
73 | | |
74 | 863 | bool is_variadic() const override { return true; } |
75 | | |
76 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
77 | 648 | uint32_t result, size_t input_rows_count) const override { |
78 | 648 | if (arguments.size() != 3) { |
79 | 0 | return Status::InvalidArgument("Function {} requires 3 arguments, but got {}", |
80 | 0 | get_name(), arguments.size()); |
81 | 0 | } |
82 | 648 | bool col_const[3]; |
83 | 648 | ColumnPtr argument_columns[3]; |
84 | 2.59k | for (int i = 0; i < 3; ++i) { |
85 | 1.94k | std::tie(argument_columns[i], col_const[i]) = |
86 | 1.94k | unpack_if_const(block.get_by_position(arguments[i]).column); |
87 | 1.94k | } |
88 | | |
89 | 648 | const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get()); |
90 | 648 | const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get()); |
91 | 648 | const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
92 | | |
93 | 648 | ColumnInt32::MutablePtr col_res = ColumnInt32::create(); |
94 | 648 | auto& vec_res = col_res->get_data(); |
95 | 648 | vec_res.resize(block.rows()); |
96 | | |
97 | 648 | const bool is_ascii = col_left->is_ascii() && col_right->is_ascii(); |
98 | | |
99 | 648 | if (col_const[0]) { |
100 | 248 | std::visit( |
101 | 248 | [&](auto is_ascii, auto str_const, auto pos_const) { |
102 | 248 | scalar_search<is_ascii, str_const, pos_const>( |
103 | 248 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, |
104 | 248 | input_rows_count); |
105 | 248 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 101 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 102 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 103 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 104 | 22 | input_rows_count); | 105 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 101 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 102 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 103 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 104 | 22 | input_rows_count); | 105 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 101 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 102 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 103 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 104 | 22 | input_rows_count); | 105 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 101 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 102 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 103 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 104 | 60 | input_rows_count); | 105 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 101 | 62 | [&](auto is_ascii, auto str_const, auto pos_const) { | 102 | 62 | scalar_search<is_ascii, str_const, pos_const>( | 103 | 62 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 104 | 62 | input_rows_count); | 105 | 62 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 101 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 102 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 103 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 104 | 60 | input_rows_count); | 105 | 60 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
106 | 248 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
107 | 248 | make_bool_variant(col_const[2])); |
108 | | |
109 | 400 | } else { |
110 | 400 | std::visit( |
111 | 401 | [&](auto is_ascii, auto str_const, auto pos_const) { |
112 | 401 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, |
113 | 401 | col_pos->get_data(), vec_res, |
114 | 401 | input_rows_count); |
115 | 401 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 111 | 39 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 39 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 39 | col_pos->get_data(), vec_res, | 114 | 39 | input_rows_count); | 115 | 39 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 111 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 22 | col_pos->get_data(), vec_res, | 114 | 22 | input_rows_count); | 115 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 111 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 22 | col_pos->get_data(), vec_res, | 114 | 22 | input_rows_count); | 115 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Line | Count | Source | 111 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 22 | col_pos->get_data(), vec_res, | 114 | 22 | input_rows_count); | 115 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 111 | 116 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 116 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 116 | col_pos->get_data(), vec_res, | 114 | 116 | input_rows_count); | 115 | 116 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 111 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 60 | col_pos->get_data(), vec_res, | 114 | 60 | input_rows_count); | 115 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 111 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 60 | col_pos->get_data(), vec_res, | 114 | 60 | input_rows_count); | 115 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 111 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 112 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 113 | 60 | col_pos->get_data(), vec_res, | 114 | 60 | input_rows_count); | 115 | 60 | }, |
|
116 | 400 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
117 | 400 | make_bool_variant(col_const[2])); |
118 | 400 | } |
119 | 648 | block.replace_by_position(result, std::move(col_res)); |
120 | 648 | return Status::OK(); |
121 | 648 | } |
122 | | |
123 | | private: |
124 | | template <bool is_ascii, bool str_const, bool pos_const> |
125 | | void scalar_search(const StringRef& ldata, const ColumnString* col_right, |
126 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
127 | 248 | size_t size) const { |
128 | 248 | res.resize(size); |
129 | 248 | StringRef substr(ldata.data, ldata.size); |
130 | 248 | StringSearch search {&substr}; |
131 | | |
132 | 499 | for (int i = 0; i < size; ++i) { |
133 | 251 | res[i] = locate_pos<is_ascii>(substr, |
134 | 251 | col_right->get_data_at(index_check_const<str_const>(i)), |
135 | 251 | search, posdata[index_check_const<pos_const>(i)]); |
136 | 251 | } |
137 | 248 | } _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 127 | 22 | size_t size) const { | 128 | 22 | res.resize(size); | 129 | 22 | StringRef substr(ldata.data, ldata.size); | 130 | 22 | StringSearch search {&substr}; | 131 | | | 132 | 44 | for (int i = 0; i < size; ++i) { | 133 | 22 | res[i] = locate_pos<is_ascii>(substr, | 134 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 135 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 136 | 22 | } | 137 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 127 | 22 | size_t size) const { | 128 | 22 | res.resize(size); | 129 | 22 | StringRef substr(ldata.data, ldata.size); | 130 | 22 | StringSearch search {&substr}; | 131 | | | 132 | 44 | for (int i = 0; i < size; ++i) { | 133 | 22 | res[i] = locate_pos<is_ascii>(substr, | 134 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 135 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 136 | 22 | } | 137 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 127 | 22 | size_t size) const { | 128 | 22 | res.resize(size); | 129 | 22 | StringRef substr(ldata.data, ldata.size); | 130 | 22 | StringSearch search {&substr}; | 131 | | | 132 | 44 | for (int i = 0; i < size; ++i) { | 133 | 22 | res[i] = locate_pos<is_ascii>(substr, | 134 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 135 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 136 | 22 | } | 137 | 22 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 127 | 60 | size_t size) const { | 128 | 60 | res.resize(size); | 129 | 60 | StringRef substr(ldata.data, ldata.size); | 130 | 60 | StringSearch search {&substr}; | 131 | | | 132 | 120 | for (int i = 0; i < size; ++i) { | 133 | 60 | res[i] = locate_pos<is_ascii>(substr, | 134 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 135 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 136 | 60 | } | 137 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 127 | 62 | size_t size) const { | 128 | 62 | res.resize(size); | 129 | 62 | StringRef substr(ldata.data, ldata.size); | 130 | 62 | StringSearch search {&substr}; | 131 | | | 132 | 127 | for (int i = 0; i < size; ++i) { | 133 | 65 | res[i] = locate_pos<is_ascii>(substr, | 134 | 65 | col_right->get_data_at(index_check_const<str_const>(i)), | 135 | 65 | search, posdata[index_check_const<pos_const>(i)]); | 136 | 65 | } | 137 | 62 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 127 | 60 | size_t size) const { | 128 | 60 | res.resize(size); | 129 | 60 | StringRef substr(ldata.data, ldata.size); | 130 | 60 | StringSearch search {&substr}; | 131 | | | 132 | 120 | for (int i = 0; i < size; ++i) { | 133 | 60 | res[i] = locate_pos<is_ascii>(substr, | 134 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 135 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 136 | 60 | } | 137 | 60 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m |
138 | | |
139 | | template <bool is_ascii, bool str_const, bool pos_const> |
140 | | void vector_search(const ColumnString* col_left, const ColumnString* col_right, |
141 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
142 | 401 | size_t size) const { |
143 | 401 | res.resize(size); |
144 | 401 | StringSearch search; |
145 | 984 | for (int i = 0; i < size; ++i) { |
146 | 583 | StringRef substr = col_left->get_data_at(i); |
147 | 583 | search.set_pattern(&substr); |
148 | 583 | res[i] = locate_pos<is_ascii>(substr, |
149 | 583 | col_right->get_data_at(index_check_const<str_const>(i)), |
150 | 583 | search, posdata[index_check_const<pos_const>(i)]); |
151 | 583 | } |
152 | 401 | } _ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 39 | size_t size) const { | 143 | 39 | res.resize(size); | 144 | 39 | StringSearch search; | 145 | 103 | for (int i = 0; i < size; ++i) { | 146 | 64 | StringRef substr = col_left->get_data_at(i); | 147 | 64 | search.set_pattern(&substr); | 148 | 64 | res[i] = locate_pos<is_ascii>(substr, | 149 | 64 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 64 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 64 | } | 152 | 39 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 22 | size_t size) const { | 143 | 22 | res.resize(size); | 144 | 22 | StringSearch search; | 145 | 44 | for (int i = 0; i < size; ++i) { | 146 | 22 | StringRef substr = col_left->get_data_at(i); | 147 | 22 | search.set_pattern(&substr); | 148 | 22 | res[i] = locate_pos<is_ascii>(substr, | 149 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 22 | } | 152 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 22 | size_t size) const { | 143 | 22 | res.resize(size); | 144 | 22 | StringSearch search; | 145 | 44 | for (int i = 0; i < size; ++i) { | 146 | 22 | StringRef substr = col_left->get_data_at(i); | 147 | 22 | search.set_pattern(&substr); | 148 | 22 | res[i] = locate_pos<is_ascii>(substr, | 149 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 22 | } | 152 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 22 | size_t size) const { | 143 | 22 | res.resize(size); | 144 | 22 | StringSearch search; | 145 | 44 | for (int i = 0; i < size; ++i) { | 146 | 22 | StringRef substr = col_left->get_data_at(i); | 147 | 22 | search.set_pattern(&substr); | 148 | 22 | res[i] = locate_pos<is_ascii>(substr, | 149 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 22 | } | 152 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 116 | size_t size) const { | 143 | 116 | res.resize(size); | 144 | 116 | StringSearch search; | 145 | 389 | for (int i = 0; i < size; ++i) { | 146 | 273 | StringRef substr = col_left->get_data_at(i); | 147 | 273 | search.set_pattern(&substr); | 148 | 273 | res[i] = locate_pos<is_ascii>(substr, | 149 | 273 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 273 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 273 | } | 152 | 116 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 60 | size_t size) const { | 143 | 60 | res.resize(size); | 144 | 60 | StringSearch search; | 145 | 120 | for (int i = 0; i < size; ++i) { | 146 | 60 | StringRef substr = col_left->get_data_at(i); | 147 | 60 | search.set_pattern(&substr); | 148 | 60 | res[i] = locate_pos<is_ascii>(substr, | 149 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 60 | } | 152 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 60 | size_t size) const { | 143 | 60 | res.resize(size); | 144 | 60 | StringSearch search; | 145 | 120 | for (int i = 0; i < size; ++i) { | 146 | 60 | StringRef substr = col_left->get_data_at(i); | 147 | 60 | search.set_pattern(&substr); | 148 | 60 | res[i] = locate_pos<is_ascii>(substr, | 149 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 60 | } | 152 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 142 | 60 | size_t size) const { | 143 | 60 | res.resize(size); | 144 | 60 | StringSearch search; | 145 | 120 | for (int i = 0; i < size; ++i) { | 146 | 60 | StringRef substr = col_left->get_data_at(i); | 147 | 60 | search.set_pattern(&substr); | 148 | 60 | res[i] = locate_pos<is_ascii>(substr, | 149 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 150 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 151 | 60 | } | 152 | 60 | } |
|
153 | | |
154 | | template <bool is_ascii> |
155 | 834 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { |
156 | 834 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { |
157 | | // BEHAVIOR COMPATIBLE WITH MYSQL |
158 | | // locate('','') locate('','',1) locate('','',2) |
159 | | // 1 1 0 |
160 | 13 | return 1; |
161 | 13 | } |
162 | 821 | if (is_ascii) { |
163 | 625 | return locate_pos_ascii(substr, str, search, start_pos); |
164 | 625 | } else { |
165 | 196 | return locate_pos_utf8(substr, str, search, start_pos); |
166 | 196 | } |
167 | 821 | } _ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 155 | 196 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 156 | 196 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 157 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 158 | | // locate('','') locate('','',1) locate('','',2) | 159 | | // 1 1 0 | 160 | 0 | return 1; | 161 | 0 | } | 162 | 196 | if (is_ascii) { | 163 | 0 | return locate_pos_ascii(substr, str, search, start_pos); | 164 | 196 | } else { | 165 | 196 | return locate_pos_utf8(substr, str, search, start_pos); | 166 | 196 | } | 167 | 196 | } |
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 155 | 638 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 156 | 638 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 157 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 158 | | // locate('','') locate('','',1) locate('','',2) | 159 | | // 1 1 0 | 160 | 13 | return 1; | 161 | 13 | } | 162 | 625 | if (is_ascii) { | 163 | 625 | return locate_pos_ascii(substr, str, search, start_pos); | 164 | 625 | } else { | 165 | 0 | return locate_pos_utf8(substr, str, search, start_pos); | 166 | 0 | } | 167 | 625 | } |
|
168 | | |
169 | | int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search, |
170 | 196 | int start_pos) const { |
171 | 196 | std::vector<size_t> index; |
172 | 196 | size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index); |
173 | 196 | if (start_pos <= 0 || start_pos > char_len) { |
174 | 49 | return 0; |
175 | 49 | } |
176 | 147 | if (substr.size == 0) { |
177 | 18 | return start_pos; |
178 | 18 | } |
179 | | // Input start_pos starts from 1. |
180 | 129 | StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]); |
181 | 129 | int32_t match_pos = search.search(&adjusted_str); |
182 | 129 | if (match_pos >= 0) { |
183 | | // Hive returns the position in the original string starting from 1. |
184 | 111 | return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos); |
185 | 111 | } else { |
186 | 18 | return 0; |
187 | 18 | } |
188 | 129 | } |
189 | | |
190 | | int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search, |
191 | 625 | int start_pos) const { |
192 | 625 | if (start_pos <= 0 || start_pos > str.size) { |
193 | 412 | return 0; |
194 | 412 | } |
195 | 213 | if (substr.size == 0) { |
196 | 38 | return start_pos; |
197 | 38 | } |
198 | | // Input start_pos starts from 1. |
199 | 175 | StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1); |
200 | 175 | int32_t match_pos = search.search(&adjusted_str); |
201 | 175 | if (match_pos >= 0) { |
202 | | // Hive returns the position in the original string starting from 1. |
203 | 60 | return start_pos + match_pos; |
204 | 115 | } else { |
205 | 115 | return 0; |
206 | 115 | } |
207 | 175 | } |
208 | | }; |
209 | | |
210 | | class FunctionSplitPart : public IFunction { |
211 | | public: |
212 | | static constexpr auto name = "split_part"; |
213 | 142 | static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); } |
214 | 1 | String get_name() const override { return name; } |
215 | 136 | size_t get_number_of_arguments() const override { return 3; } |
216 | | |
217 | 136 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
218 | 136 | return make_nullable(std::make_shared<DataTypeString>()); |
219 | 136 | } |
220 | | |
221 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
222 | 153 | uint32_t result, size_t input_rows_count) const override { |
223 | 153 | DCHECK_EQ(arguments.size(), 3); |
224 | | |
225 | 153 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
226 | | // Create a zero column to simply implement |
227 | 153 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
228 | 153 | auto res = ColumnString::create(); |
229 | | |
230 | 153 | auto& null_map_data = null_map->get_data(); |
231 | 153 | auto& res_offsets = res->get_offsets(); |
232 | 153 | auto& res_chars = res->get_chars(); |
233 | 153 | res_offsets.resize(input_rows_count); |
234 | | |
235 | 153 | const size_t argument_size = arguments.size(); |
236 | 153 | std::vector<ColumnPtr> argument_columns(argument_size); |
237 | 612 | for (size_t i = 0; i < argument_size; ++i) { |
238 | 459 | argument_columns[i] = |
239 | 459 | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
240 | 459 | if (const auto* nullable = |
241 | 459 | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
242 | | // Danger: Here must dispose the null map data first! Because |
243 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
244 | | // of column nullable mem of null map |
245 | 0 | VectorizedUtils::update_null_map(null_map->get_data(), |
246 | 0 | nullable->get_null_map_data()); |
247 | 0 | argument_columns[i] = nullable->get_nested_column_ptr(); |
248 | 0 | } |
249 | 459 | } |
250 | | |
251 | 153 | const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
252 | | |
253 | 153 | const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get()); |
254 | | |
255 | 153 | const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
256 | 153 | const auto& part_num_col_data = part_num_col->get_data(); |
257 | | |
258 | 400 | for (size_t i = 0; i < input_rows_count; ++i) { |
259 | 247 | if (part_num_col_data[i] == 0) { |
260 | 11 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
261 | 11 | continue; |
262 | 11 | } |
263 | | |
264 | 236 | auto delimiter = delimiter_col->get_data_at(i); |
265 | 236 | auto delimiter_str = delimiter_col->get_data_at(i).to_string(); |
266 | 236 | auto part_number = part_num_col_data[i]; |
267 | 236 | auto str = str_col->get_data_at(i); |
268 | 236 | if (delimiter.size == 0) { |
269 | 9 | StringOP::push_empty_string(i, res_chars, res_offsets); |
270 | 9 | continue; |
271 | 9 | } |
272 | | |
273 | 227 | if (part_number > 0) { |
274 | 188 | if (delimiter.size == 1) { |
275 | | // If delimiter is a char, use memchr to split |
276 | 155 | int32_t pre_offset = -1; |
277 | 155 | int32_t offset = -1; |
278 | 155 | int32_t num = 0; |
279 | 261 | while (num < part_number) { |
280 | 217 | pre_offset = offset; |
281 | 217 | size_t n = str.size - offset - 1; |
282 | 217 | const char* pos = reinterpret_cast<const char*>( |
283 | 217 | memchr(str.data + offset + 1, delimiter_str[0], n)); |
284 | 217 | if (pos != nullptr) { |
285 | 106 | offset = pos - str.data; |
286 | 106 | num++; |
287 | 111 | } else { |
288 | 111 | offset = str.size; |
289 | 111 | num = (num == 0) ? 0 : num + 1; |
290 | 111 | break; |
291 | 111 | } |
292 | 217 | } |
293 | | |
294 | 155 | if (num == part_number) { |
295 | 71 | StringOP::push_value_string( |
296 | 71 | std::string_view { |
297 | 71 | reinterpret_cast<const char*>(str.data + pre_offset + 1), |
298 | 71 | (size_t)offset - pre_offset - 1}, |
299 | 71 | i, res_chars, res_offsets); |
300 | 84 | } else { |
301 | 84 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
302 | 84 | } |
303 | 155 | } else { |
304 | | // If delimiter is a string, use memmem to split |
305 | 33 | int32_t pre_offset = -delimiter.size; |
306 | 33 | int32_t offset = -delimiter.size; |
307 | 33 | int32_t num = 0; |
308 | 68 | while (num < part_number) { |
309 | 54 | pre_offset = offset; |
310 | 54 | size_t n = str.size - offset - delimiter.size; |
311 | 54 | char* pos = |
312 | 54 | reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size, |
313 | 54 | n, delimiter.data, delimiter.size)); |
314 | 54 | if (pos != nullptr) { |
315 | 35 | offset = pos - str.data; |
316 | 35 | num++; |
317 | 35 | } else { |
318 | 19 | offset = str.size; |
319 | 19 | num = (num == 0) ? 0 : num + 1; |
320 | 19 | break; |
321 | 19 | } |
322 | 54 | } |
323 | | |
324 | 33 | if (num == part_number) { |
325 | 27 | StringOP::push_value_string( |
326 | 27 | std::string_view {reinterpret_cast<const char*>( |
327 | 27 | str.data + pre_offset + delimiter.size), |
328 | 27 | (size_t)offset - pre_offset - delimiter.size}, |
329 | 27 | i, res_chars, res_offsets); |
330 | 27 | } else { |
331 | 6 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
332 | 6 | } |
333 | 33 | } |
334 | 188 | } else { |
335 | 39 | part_number = -part_number; |
336 | 39 | auto str_str = str.to_string(); |
337 | 39 | int32_t offset = str.size; |
338 | 39 | int32_t pre_offset = offset; |
339 | 39 | int32_t num = 0; |
340 | 39 | auto substr = str_str; |
341 | 83 | while (num <= part_number && offset >= 0) { |
342 | 83 | offset = (int)substr.rfind(delimiter, offset); |
343 | 83 | if (offset != -1) { |
344 | 62 | if (++num == part_number) { |
345 | 18 | break; |
346 | 18 | } |
347 | 44 | pre_offset = offset; |
348 | 44 | offset = offset - 1; |
349 | 44 | substr = str_str.substr(0, pre_offset); |
350 | 44 | } else { |
351 | 21 | break; |
352 | 21 | } |
353 | 83 | } |
354 | 39 | num = (offset == -1 && num != 0) ? num + 1 : num; |
355 | | |
356 | 39 | if (num == part_number) { |
357 | 24 | if (offset == -1) { |
358 | 6 | StringOP::push_value_string( |
359 | 6 | std::string_view {reinterpret_cast<const char*>(str.data), |
360 | 6 | (size_t)pre_offset}, |
361 | 6 | i, res_chars, res_offsets); |
362 | 18 | } else { |
363 | 18 | StringOP::push_value_string( |
364 | 18 | std::string_view {str_str.substr( |
365 | 18 | offset + delimiter.size, |
366 | 18 | (size_t)pre_offset - offset - delimiter.size)}, |
367 | 18 | i, res_chars, res_offsets); |
368 | 18 | } |
369 | 24 | } else { |
370 | 15 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
371 | 15 | } |
372 | 39 | } |
373 | 227 | } |
374 | | |
375 | 153 | block.get_by_position(result).column = |
376 | 153 | ColumnNullable::create(std::move(res), std::move(null_map)); |
377 | 153 | return Status::OK(); |
378 | 153 | } |
379 | | }; |
380 | | |
381 | | class FunctionSubstringIndex : public IFunction { |
382 | | public: |
383 | | static constexpr auto name = "substring_index"; |
384 | 109 | static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); } |
385 | 1 | String get_name() const override { return name; } |
386 | 103 | size_t get_number_of_arguments() const override { return 3; } |
387 | | |
388 | 103 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
389 | 103 | return std::make_shared<DataTypeString>(); |
390 | 103 | } |
391 | | |
392 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
393 | 107 | uint32_t result, size_t input_rows_count) const override { |
394 | 107 | DCHECK_EQ(arguments.size(), 3); |
395 | | |
396 | | // Create a zero column to simply implement |
397 | 107 | auto res = ColumnString::create(); |
398 | | |
399 | 107 | auto& res_offsets = res->get_offsets(); |
400 | 107 | auto& res_chars = res->get_chars(); |
401 | 107 | res_offsets.resize(input_rows_count); |
402 | 107 | ColumnPtr content_column; |
403 | 107 | bool content_const = false; |
404 | 107 | std::tie(content_column, content_const) = |
405 | 107 | unpack_if_const(block.get_by_position(arguments[0]).column); |
406 | | |
407 | 107 | const auto* str_col = assert_cast<const ColumnString*>(content_column.get()); |
408 | | |
409 | | // Handle both constant and non-constant delimiter parameters |
410 | 107 | ColumnPtr delimiter_column_ptr; |
411 | 107 | bool delimiter_const = false; |
412 | 107 | std::tie(delimiter_column_ptr, delimiter_const) = |
413 | 107 | unpack_if_const(block.get_by_position(arguments[1]).column); |
414 | 107 | const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get()); |
415 | | |
416 | 107 | ColumnPtr part_num_column_ptr; |
417 | 107 | bool part_num_const = false; |
418 | 107 | std::tie(part_num_column_ptr, part_num_const) = |
419 | 107 | unpack_if_const(block.get_by_position(arguments[2]).column); |
420 | 107 | const ColumnInt32* part_num_col = |
421 | 107 | assert_cast<const ColumnInt32*>(part_num_column_ptr.get()); |
422 | | |
423 | | // For constant multi-character delimiters, create StringRef and StringSearch only once |
424 | 107 | std::optional<StringRef> const_delimiter_ref; |
425 | 107 | std::optional<StringSearch> const_search; |
426 | 107 | if (delimiter_const && delimiter_col->get_data_at(0).size > 1) { |
427 | 0 | const_delimiter_ref.emplace(delimiter_col->get_data_at(0)); |
428 | 0 | const_search.emplace(&const_delimiter_ref.value()); |
429 | 0 | } |
430 | | |
431 | 297 | for (size_t i = 0; i < input_rows_count; ++i) { |
432 | 190 | auto str = str_col->get_data_at(content_const ? 0 : i); |
433 | 190 | auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i); |
434 | 190 | int32_t delimiter_size = delimiter.size; |
435 | | |
436 | 190 | auto part_number = part_num_col->get_element(part_num_const ? 0 : i); |
437 | | |
438 | 190 | if (part_number == 0 || delimiter_size == 0) { |
439 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); |
440 | 7 | continue; |
441 | 7 | } |
442 | | |
443 | 183 | if (part_number > 0) { |
444 | 128 | if (delimiter_size == 1) { |
445 | 85 | int32_t offset = -1; |
446 | 85 | int32_t num = 0; |
447 | 137 | while (num < part_number) { |
448 | 117 | size_t n = str.size - offset - 1; |
449 | 117 | const char* pos = reinterpret_cast<const char*>( |
450 | 117 | memchr(str.data + offset + 1, delimiter.data[0], n)); |
451 | 117 | if (pos != nullptr) { |
452 | 52 | offset = pos - str.data; |
453 | 52 | num++; |
454 | 65 | } else { |
455 | 65 | offset = str.size; |
456 | 65 | num = (num == 0) ? 0 : num + 1; |
457 | 65 | break; |
458 | 65 | } |
459 | 117 | } |
460 | | |
461 | 85 | if (num == part_number) { |
462 | 25 | StringOP::push_value_string( |
463 | 25 | std::string_view {reinterpret_cast<const char*>(str.data), |
464 | 25 | (size_t)offset}, |
465 | 25 | i, res_chars, res_offsets); |
466 | 60 | } else { |
467 | 60 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
468 | 60 | res_chars, res_offsets); |
469 | 60 | } |
470 | 85 | } else { |
471 | | // For multi-character delimiters |
472 | | // Use pre-created StringRef and StringSearch for constant delimiters |
473 | 43 | StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value() |
474 | 43 | : StringRef(delimiter); |
475 | 43 | const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr; |
476 | 43 | StringSearch local_search(&delimiter_ref); |
477 | 43 | if (!search_ptr) { |
478 | 43 | search_ptr = &local_search; |
479 | 43 | } |
480 | | |
481 | 43 | int32_t offset = -delimiter_size; |
482 | 43 | int32_t num = 0; |
483 | 86 | while (num < part_number) { |
484 | 59 | size_t n = str.size - offset - delimiter_size; |
485 | | // search first match delimter_ref index from src string among str_offset to end |
486 | 59 | const char* pos = search_ptr->search(str.data + offset + delimiter_size, n); |
487 | 59 | if (pos < str.data + str.size) { |
488 | 43 | offset = pos - str.data; |
489 | 43 | num++; |
490 | 43 | } else { |
491 | 16 | offset = str.size; |
492 | 16 | num = (num == 0) ? 0 : num + 1; |
493 | 16 | break; |
494 | 16 | } |
495 | 59 | } |
496 | | |
497 | 43 | if (num == part_number) { |
498 | 40 | StringOP::push_value_string( |
499 | 40 | std::string_view {reinterpret_cast<const char*>(str.data), |
500 | 40 | (size_t)offset}, |
501 | 40 | i, res_chars, res_offsets); |
502 | 40 | } else { |
503 | 3 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
504 | 3 | res_chars, res_offsets); |
505 | 3 | } |
506 | 43 | } |
507 | 128 | } else { |
508 | 55 | int neg_part_number = -part_number; |
509 | 55 | auto str_str = str.to_string(); |
510 | 55 | int32_t offset = str.size; |
511 | 55 | int32_t pre_offset = offset; |
512 | 55 | int32_t num = 0; |
513 | 55 | auto substr = str_str; |
514 | | |
515 | | // Use pre-created StringRef for constant delimiters |
516 | 55 | StringRef delimiter_str = |
517 | 55 | const_delimiter_ref |
518 | 55 | ? const_delimiter_ref.value() |
519 | 55 | : StringRef(reinterpret_cast<const char*>(delimiter.data), |
520 | 55 | delimiter.size); |
521 | | |
522 | 79 | while (num <= neg_part_number && offset >= 0) { |
523 | 79 | offset = (int)substr.rfind(delimiter_str, offset); |
524 | 79 | if (offset != -1) { |
525 | 63 | if (++num == neg_part_number) { |
526 | 39 | break; |
527 | 39 | } |
528 | 24 | pre_offset = offset; |
529 | 24 | offset = offset - 1; |
530 | 24 | substr = str_str.substr(0, pre_offset); |
531 | 24 | } else { |
532 | 16 | break; |
533 | 16 | } |
534 | 79 | } |
535 | 55 | num = (offset == -1 && num != 0) ? num + 1 : num; |
536 | | |
537 | 55 | if (num == neg_part_number) { |
538 | 43 | if (offset == -1) { |
539 | 4 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
540 | 4 | res_chars, res_offsets); |
541 | 39 | } else { |
542 | 39 | StringOP::push_value_string( |
543 | 39 | std::string_view {str.data + offset + delimiter_size, |
544 | 39 | str.size - offset - delimiter_size}, |
545 | 39 | i, res_chars, res_offsets); |
546 | 39 | } |
547 | 43 | } else { |
548 | 12 | StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars, |
549 | 12 | res_offsets); |
550 | 12 | } |
551 | 55 | } |
552 | 183 | } |
553 | | |
554 | 107 | block.get_by_position(result).column = std::move(res); |
555 | 107 | return Status::OK(); |
556 | 107 | } |
557 | | }; |
558 | | |
559 | | class FunctionSplitByString : public IFunction { |
560 | | public: |
561 | | static constexpr auto name = "split_by_string"; |
562 | | |
563 | 124 | static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); } |
564 | | using NullMapType = PaddedPODArray<UInt8>; |
565 | | |
566 | 1 | String get_name() const override { return name; } |
567 | | |
568 | 119 | bool is_variadic() const override { return false; } |
569 | | |
570 | 118 | size_t get_number_of_arguments() const override { return 2; } |
571 | | |
572 | 118 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
573 | 118 | DCHECK(is_string_type(arguments[0]->get_primitive_type())) |
574 | 0 | << "first argument for function: " << name << " should be string" |
575 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
576 | 118 | DCHECK(is_string_type(arguments[1]->get_primitive_type())) |
577 | 0 | << "second argument for function: " << name << " should be string" |
578 | 0 | << " and arguments[1] is " << arguments[1]->get_name(); |
579 | 118 | return std::make_shared<DataTypeArray>(make_nullable(arguments[0])); |
580 | 118 | } |
581 | | |
582 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
583 | 154 | uint32_t result, size_t input_rows_count) const override { |
584 | 154 | DCHECK_EQ(arguments.size(), 2); |
585 | | |
586 | 154 | const auto& [src_column, left_const] = |
587 | 154 | unpack_if_const(block.get_by_position(arguments[0]).column); |
588 | 154 | const auto& [right_column, right_const] = |
589 | 154 | unpack_if_const(block.get_by_position(arguments[1]).column); |
590 | | |
591 | 154 | DataTypePtr right_column_type = block.get_by_position(arguments[1]).type; |
592 | 154 | DataTypePtr src_column_type = block.get_by_position(arguments[0]).type; |
593 | 154 | auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(), |
594 | 154 | ColumnArray::ColumnOffsets::create()); |
595 | | |
596 | 154 | dest_column_ptr->resize(0); |
597 | 154 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
598 | | |
599 | 154 | auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data()); |
600 | 154 | auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get(); |
601 | | |
602 | 154 | const auto* col_str = assert_cast<const ColumnString*>(src_column.get()); |
603 | | |
604 | 154 | const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get()); |
605 | | |
606 | 154 | std::visit( |
607 | 154 | [&](auto src_const, auto delimiter_const) { |
608 | 154 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, |
609 | 154 | *dest_nested_column, dest_offsets, |
610 | 154 | input_rows_count); |
611 | 154 | }, _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_ Line | Count | Source | 607 | 55 | [&](auto src_const, auto delimiter_const) { | 608 | 55 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, | 609 | 55 | *dest_nested_column, dest_offsets, | 610 | 55 | input_rows_count); | 611 | 55 | }, |
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_ Line | Count | Source | 607 | 91 | [&](auto src_const, auto delimiter_const) { | 608 | 91 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, | 609 | 91 | *dest_nested_column, dest_offsets, | 610 | 91 | input_rows_count); | 611 | 91 | }, |
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_ Line | Count | Source | 607 | 8 | [&](auto src_const, auto delimiter_const) { | 608 | 8 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, | 609 | 8 | *dest_nested_column, dest_offsets, | 610 | 8 | input_rows_count); | 611 | 8 | }, |
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_ |
612 | 154 | make_bool_variant(left_const), make_bool_variant(right_const)); |
613 | | |
614 | | // all elements in dest_nested_column are not null |
615 | 154 | dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(), |
616 | 154 | false); |
617 | 154 | block.replace_by_position(result, std::move(dest_column_ptr)); |
618 | | |
619 | 154 | return Status::OK(); |
620 | 154 | } |
621 | | |
622 | | private: |
623 | | template <bool src_const, bool delimiter_const> |
624 | | void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column, |
625 | | IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets, |
626 | 154 | size_t size) const { |
627 | 154 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); |
628 | 154 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); |
629 | 154 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); |
630 | 154 | column_string_chars.reserve(0); |
631 | | |
632 | 154 | ColumnArray::Offset64 string_pos = 0; |
633 | 154 | ColumnArray::Offset64 dest_pos = 0; |
634 | | |
635 | 154 | StringSearch search; |
636 | 154 | StringRef delimiter_ref_for_search; |
637 | | |
638 | 154 | if constexpr (delimiter_const) { |
639 | 91 | delimiter_ref_for_search = delimiter_column.get_data_at(0); |
640 | 91 | search.set_pattern(&delimiter_ref_for_search); |
641 | 91 | } |
642 | | |
643 | 1.22k | for (size_t i = 0; i < size; i++) { |
644 | 1.07k | const StringRef str_ref = |
645 | 1.07k | src_column_string.get_data_at(index_check_const<src_const>(i)); |
646 | 1.07k | const StringRef delimiter_ref = |
647 | 1.07k | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); |
648 | | |
649 | 1.07k | if (str_ref.size == 0) { |
650 | 164 | dest_offsets.push_back(dest_pos); |
651 | 164 | continue; |
652 | 164 | } |
653 | 908 | if (delimiter_ref.size == 0) { |
654 | 27 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, |
655 | 27 | string_pos, dest_pos); |
656 | 881 | } else { |
657 | 881 | if constexpr (!delimiter_const) { |
658 | 51 | search.set_pattern(&delimiter_ref); |
659 | 51 | } |
660 | 51.7k | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
661 | 50.8k | const size_t str_offset = str_pos; |
662 | 50.8k | const size_t old_size = column_string_chars.size(); |
663 | | // search first match delimter_ref index from src string among str_offset to end |
664 | 50.8k | const char* result_start = |
665 | 50.8k | search.search(str_ref.data + str_offset, str_ref.size - str_offset); |
666 | | // compute split part size |
667 | 50.8k | const size_t split_part_size = result_start - str_ref.data - str_offset; |
668 | | // save dist string split part |
669 | 50.8k | if (split_part_size > 0) { |
670 | 50.2k | const size_t new_size = old_size + split_part_size; |
671 | 50.2k | column_string_chars.resize(new_size); |
672 | 50.2k | memcpy_small_allow_read_write_overflow15( |
673 | 50.2k | column_string_chars.data() + old_size, str_ref.data + str_offset, |
674 | 50.2k | split_part_size); |
675 | | // add dist string offset |
676 | 50.2k | string_pos += split_part_size; |
677 | 50.2k | } |
678 | 50.8k | column_string_offsets.push_back(string_pos); |
679 | | // array offset + 1 |
680 | 50.8k | dest_pos++; |
681 | | // add src string str_pos to next search start |
682 | 50.8k | str_pos += split_part_size + delimiter_ref.size; |
683 | 50.8k | } |
684 | 881 | } |
685 | 908 | dest_offsets.push_back(dest_pos); |
686 | 908 | } |
687 | 154 | } _ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 626 | 55 | size_t size) const { | 627 | 55 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); | 628 | 55 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); | 629 | 55 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); | 630 | 55 | column_string_chars.reserve(0); | 631 | | | 632 | 55 | ColumnArray::Offset64 string_pos = 0; | 633 | 55 | ColumnArray::Offset64 dest_pos = 0; | 634 | | | 635 | 55 | StringSearch search; | 636 | 55 | StringRef delimiter_ref_for_search; | 637 | | | 638 | | if constexpr (delimiter_const) { | 639 | | delimiter_ref_for_search = delimiter_column.get_data_at(0); | 640 | | search.set_pattern(&delimiter_ref_for_search); | 641 | | } | 642 | | | 643 | 130 | for (size_t i = 0; i < size; i++) { | 644 | 75 | const StringRef str_ref = | 645 | 75 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 646 | 75 | const StringRef delimiter_ref = | 647 | 75 | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); | 648 | | | 649 | 75 | if (str_ref.size == 0) { | 650 | 13 | dest_offsets.push_back(dest_pos); | 651 | 13 | continue; | 652 | 13 | } | 653 | 62 | if (delimiter_ref.size == 0) { | 654 | 11 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, | 655 | 11 | string_pos, dest_pos); | 656 | 51 | } else { | 657 | 51 | if constexpr (!delimiter_const) { | 658 | 51 | search.set_pattern(&delimiter_ref); | 659 | 51 | } | 660 | 214 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 661 | 163 | const size_t str_offset = str_pos; | 662 | 163 | const size_t old_size = column_string_chars.size(); | 663 | | // search first match delimter_ref index from src string among str_offset to end | 664 | 163 | const char* result_start = | 665 | 163 | search.search(str_ref.data + str_offset, str_ref.size - str_offset); | 666 | | // compute split part size | 667 | 163 | const size_t split_part_size = result_start - str_ref.data - str_offset; | 668 | | // save dist string split part | 669 | 163 | if (split_part_size > 0) { | 670 | 122 | const size_t new_size = old_size + split_part_size; | 671 | 122 | column_string_chars.resize(new_size); | 672 | 122 | memcpy_small_allow_read_write_overflow15( | 673 | 122 | column_string_chars.data() + old_size, str_ref.data + str_offset, | 674 | 122 | split_part_size); | 675 | | // add dist string offset | 676 | 122 | string_pos += split_part_size; | 677 | 122 | } | 678 | 163 | column_string_offsets.push_back(string_pos); | 679 | | // array offset + 1 | 680 | 163 | dest_pos++; | 681 | | // add src string str_pos to next search start | 682 | 163 | str_pos += split_part_size + delimiter_ref.size; | 683 | 163 | } | 684 | 51 | } | 685 | 62 | dest_offsets.push_back(dest_pos); | 686 | 62 | } | 687 | 55 | } |
_ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 626 | 91 | size_t size) const { | 627 | 91 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); | 628 | 91 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); | 629 | 91 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); | 630 | 91 | column_string_chars.reserve(0); | 631 | | | 632 | 91 | ColumnArray::Offset64 string_pos = 0; | 633 | 91 | ColumnArray::Offset64 dest_pos = 0; | 634 | | | 635 | 91 | StringSearch search; | 636 | 91 | StringRef delimiter_ref_for_search; | 637 | | | 638 | 91 | if constexpr (delimiter_const) { | 639 | 91 | delimiter_ref_for_search = delimiter_column.get_data_at(0); | 640 | 91 | search.set_pattern(&delimiter_ref_for_search); | 641 | 91 | } | 642 | | | 643 | 1.06k | for (size_t i = 0; i < size; i++) { | 644 | 973 | const StringRef str_ref = | 645 | 973 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 646 | 973 | const StringRef delimiter_ref = | 647 | 973 | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); | 648 | | | 649 | 973 | if (str_ref.size == 0) { | 650 | 135 | dest_offsets.push_back(dest_pos); | 651 | 135 | continue; | 652 | 135 | } | 653 | 838 | if (delimiter_ref.size == 0) { | 654 | 8 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, | 655 | 8 | string_pos, dest_pos); | 656 | 830 | } else { | 657 | | if constexpr (!delimiter_const) { | 658 | | search.set_pattern(&delimiter_ref); | 659 | | } | 660 | 51.5k | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 661 | 50.7k | const size_t str_offset = str_pos; | 662 | 50.7k | const size_t old_size = column_string_chars.size(); | 663 | | // search first match delimter_ref index from src string among str_offset to end | 664 | 50.7k | const char* result_start = | 665 | 50.7k | search.search(str_ref.data + str_offset, str_ref.size - str_offset); | 666 | | // compute split part size | 667 | 50.7k | const size_t split_part_size = result_start - str_ref.data - str_offset; | 668 | | // save dist string split part | 669 | 50.7k | if (split_part_size > 0) { | 670 | 50.1k | const size_t new_size = old_size + split_part_size; | 671 | 50.1k | column_string_chars.resize(new_size); | 672 | 50.1k | memcpy_small_allow_read_write_overflow15( | 673 | 50.1k | column_string_chars.data() + old_size, str_ref.data + str_offset, | 674 | 50.1k | split_part_size); | 675 | | // add dist string offset | 676 | 50.1k | string_pos += split_part_size; | 677 | 50.1k | } | 678 | 50.7k | column_string_offsets.push_back(string_pos); | 679 | | // array offset + 1 | 680 | 50.7k | dest_pos++; | 681 | | // add src string str_pos to next search start | 682 | 50.7k | str_pos += split_part_size + delimiter_ref.size; | 683 | 50.7k | } | 684 | 830 | } | 685 | 838 | dest_offsets.push_back(dest_pos); | 686 | 838 | } | 687 | 91 | } |
_ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 626 | 8 | size_t size) const { | 627 | 8 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); | 628 | 8 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); | 629 | 8 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); | 630 | 8 | column_string_chars.reserve(0); | 631 | | | 632 | 8 | ColumnArray::Offset64 string_pos = 0; | 633 | 8 | ColumnArray::Offset64 dest_pos = 0; | 634 | | | 635 | 8 | StringSearch search; | 636 | 8 | StringRef delimiter_ref_for_search; | 637 | | | 638 | | if constexpr (delimiter_const) { | 639 | | delimiter_ref_for_search = delimiter_column.get_data_at(0); | 640 | | search.set_pattern(&delimiter_ref_for_search); | 641 | | } | 642 | | | 643 | 32 | for (size_t i = 0; i < size; i++) { | 644 | 24 | const StringRef str_ref = | 645 | 24 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 646 | 24 | const StringRef delimiter_ref = | 647 | 24 | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); | 648 | | | 649 | 24 | if (str_ref.size == 0) { | 650 | 16 | dest_offsets.push_back(dest_pos); | 651 | 16 | continue; | 652 | 16 | } | 653 | 8 | if (delimiter_ref.size == 0) { | 654 | 8 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, | 655 | 8 | string_pos, dest_pos); | 656 | 8 | } else { | 657 | 0 | if constexpr (!delimiter_const) { | 658 | 0 | search.set_pattern(&delimiter_ref); | 659 | 0 | } | 660 | 0 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 661 | 0 | const size_t str_offset = str_pos; | 662 | 0 | const size_t old_size = column_string_chars.size(); | 663 | | // search first match delimter_ref index from src string among str_offset to end | 664 | 0 | const char* result_start = | 665 | 0 | search.search(str_ref.data + str_offset, str_ref.size - str_offset); | 666 | | // compute split part size | 667 | 0 | const size_t split_part_size = result_start - str_ref.data - str_offset; | 668 | | // save dist string split part | 669 | 0 | if (split_part_size > 0) { | 670 | 0 | const size_t new_size = old_size + split_part_size; | 671 | 0 | column_string_chars.resize(new_size); | 672 | 0 | memcpy_small_allow_read_write_overflow15( | 673 | 0 | column_string_chars.data() + old_size, str_ref.data + str_offset, | 674 | 0 | split_part_size); | 675 | | // add dist string offset | 676 | 0 | string_pos += split_part_size; | 677 | 0 | } | 678 | 0 | column_string_offsets.push_back(string_pos); | 679 | | // array offset + 1 | 680 | 0 | dest_pos++; | 681 | | // add src string str_pos to next search start | 682 | 0 | str_pos += split_part_size + delimiter_ref.size; | 683 | 0 | } | 684 | 0 | } | 685 | 8 | dest_offsets.push_back(dest_pos); | 686 | 8 | } | 687 | 8 | } |
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
688 | | |
689 | | void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars, |
690 | | ColumnString::Offsets& column_string_offsets, |
691 | | ColumnArray::Offset64& string_pos, |
692 | 27 | ColumnArray::Offset64& dest_pos) const { |
693 | 27 | const size_t old_size = column_string_chars.size(); |
694 | 27 | const size_t new_size = old_size + str_ref.size; |
695 | 27 | column_string_chars.resize(new_size); |
696 | 27 | memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size); |
697 | 27 | if (simd::VStringFunctions::is_ascii(str_ref)) { |
698 | 24 | const auto size = str_ref.size; |
699 | | |
700 | 24 | const auto nested_old_size = column_string_offsets.size(); |
701 | 24 | const auto nested_new_size = nested_old_size + size; |
702 | 24 | column_string_offsets.resize(nested_new_size); |
703 | 24 | std::iota(column_string_offsets.data() + nested_old_size, |
704 | 24 | column_string_offsets.data() + nested_new_size, string_pos + 1); |
705 | | |
706 | 24 | string_pos += size; |
707 | 24 | dest_pos += size; |
708 | | // The above code is equivalent to the code in the following comment. |
709 | | // for (size_t i = 0; i < str_ref.size; i++) { |
710 | | // string_pos++; |
711 | | // column_string_offsets.push_back(string_pos); |
712 | | // (*dest_nested_null_map).push_back(false); |
713 | | // dest_pos++; |
714 | | // } |
715 | 24 | } else { |
716 | 22 | for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) { |
717 | 19 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]]; |
718 | | |
719 | 19 | string_pos += utf8_char_len; |
720 | 19 | column_string_offsets.push_back(string_pos); |
721 | 19 | dest_pos++; |
722 | 19 | } |
723 | 3 | } |
724 | 27 | } |
725 | | }; |
726 | | |
727 | | enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS }; |
728 | | |
729 | | template <FunctionCountSubStringType type> |
730 | | class FunctionCountSubString : public IFunction { |
731 | | public: |
732 | | static constexpr auto name = "count_substrings"; |
733 | | static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3; |
734 | | |
735 | 277 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv Line | Count | Source | 735 | 74 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv Line | Count | Source | 735 | 203 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
|
736 | | using NullMapType = PaddedPODArray<UInt8>; |
737 | | |
738 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev |
739 | | |
740 | 0 | size_t get_number_of_arguments() const override { return arg_count; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv |
741 | | |
742 | 265 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
743 | 265 | return std::make_shared<DataTypeInt32>(); |
744 | 265 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 742 | 68 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 743 | 68 | return std::make_shared<DataTypeInt32>(); | 744 | 68 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 742 | 197 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 743 | 197 | return std::make_shared<DataTypeInt32>(); | 744 | 197 | } |
|
745 | | |
746 | 10 | DataTypes get_variadic_argument_types_impl() const override { |
747 | 10 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
748 | 5 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; |
749 | 5 | } else { |
750 | 5 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
751 | 5 | std::make_shared<DataTypeInt32>()}; |
752 | 5 | } |
753 | 10 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv Line | Count | Source | 746 | 5 | DataTypes get_variadic_argument_types_impl() const override { | 747 | 5 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 748 | 5 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 749 | | } else { | 750 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 751 | | std::make_shared<DataTypeInt32>()}; | 752 | | } | 753 | 5 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv Line | Count | Source | 746 | 5 | DataTypes get_variadic_argument_types_impl() const override { | 747 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 748 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 749 | 5 | } else { | 750 | 5 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 751 | 5 | std::make_shared<DataTypeInt32>()}; | 752 | 5 | } | 753 | 5 | } |
|
754 | | |
755 | 267 | bool is_variadic() const override { return true; }_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv Line | Count | Source | 755 | 69 | bool is_variadic() const override { return true; } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv Line | Count | Source | 755 | 198 | bool is_variadic() const override { return true; } |
|
756 | | |
757 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
758 | 234 | uint32_t result, size_t input_rows_count) const override { |
759 | 234 | DCHECK(arg_count); |
760 | 234 | bool col_const[arg_count]; |
761 | 234 | ColumnPtr argument_columns[arg_count]; |
762 | 878 | for (int i = 0; i < arg_count; ++i) { |
763 | 644 | std::tie(argument_columns[i], col_const[i]) = |
764 | 644 | unpack_if_const(block.get_by_position(arguments[i]).column); |
765 | 644 | } |
766 | | |
767 | 234 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); |
768 | 234 | auto& dest_column_data = dest_column_ptr->get_data(); |
769 | | |
770 | 234 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
771 | 58 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
772 | 58 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
773 | 58 | std::visit( |
774 | 58 | [&](auto str_const, auto pattern_const) { |
775 | 58 | _execute<str_const, pattern_const>(src_column_string, pattern_column, |
776 | 58 | dest_column_data, input_rows_count); |
777 | 58 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_ Line | Count | Source | 774 | 32 | [&](auto str_const, auto pattern_const) { | 775 | 32 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 776 | 32 | dest_column_data, input_rows_count); | 777 | 32 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_ Line | Count | Source | 774 | 13 | [&](auto str_const, auto pattern_const) { | 775 | 13 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 776 | 13 | dest_column_data, input_rows_count); | 777 | 13 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_ Line | Count | Source | 774 | 13 | [&](auto str_const, auto pattern_const) { | 775 | 13 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 776 | 13 | dest_column_data, input_rows_count); | 777 | 13 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_ |
778 | 58 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); |
779 | 176 | } else { |
780 | 176 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
781 | 176 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
782 | 176 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); |
783 | 176 | std::visit( |
784 | 176 | [&](auto str_const, auto pattern_const, auto start_pos_const) { |
785 | 176 | _execute<str_const, pattern_const, start_pos_const>( |
786 | 176 | src_column_string, pattern_column, start_pos_column, |
787 | 176 | dest_column_data, input_rows_count); |
788 | 176 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 784 | 36 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 36 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 36 | src_column_string, pattern_column, start_pos_column, | 787 | 36 | dest_column_data, input_rows_count); | 788 | 36 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 784 | 29 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 29 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 29 | src_column_string, pattern_column, start_pos_column, | 787 | 29 | dest_column_data, input_rows_count); | 788 | 29 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 784 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 22 | src_column_string, pattern_column, start_pos_column, | 787 | 22 | dest_column_data, input_rows_count); | 788 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 784 | 23 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 23 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 23 | src_column_string, pattern_column, start_pos_column, | 787 | 23 | dest_column_data, input_rows_count); | 788 | 23 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 784 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 22 | src_column_string, pattern_column, start_pos_column, | 787 | 22 | dest_column_data, input_rows_count); | 788 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 784 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 22 | src_column_string, pattern_column, start_pos_column, | 787 | 22 | dest_column_data, input_rows_count); | 788 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 784 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 22 | src_column_string, pattern_column, start_pos_column, | 787 | 22 | dest_column_data, input_rows_count); | 788 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
789 | 176 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
790 | 176 | make_bool_variant(col_const[2])); |
791 | 176 | } |
792 | | |
793 | 234 | block.replace_by_position(result, std::move(dest_column_ptr)); |
794 | 234 | return Status::OK(); |
795 | 234 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 758 | 58 | uint32_t result, size_t input_rows_count) const override { | 759 | 58 | DCHECK(arg_count); | 760 | 58 | bool col_const[arg_count]; | 761 | 58 | ColumnPtr argument_columns[arg_count]; | 762 | 174 | for (int i = 0; i < arg_count; ++i) { | 763 | 116 | std::tie(argument_columns[i], col_const[i]) = | 764 | 116 | unpack_if_const(block.get_by_position(arguments[i]).column); | 765 | 116 | } | 766 | | | 767 | 58 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 768 | 58 | auto& dest_column_data = dest_column_ptr->get_data(); | 769 | | | 770 | 58 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 771 | 58 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 772 | 58 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 773 | 58 | std::visit( | 774 | 58 | [&](auto str_const, auto pattern_const) { | 775 | 58 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 776 | 58 | dest_column_data, input_rows_count); | 777 | 58 | }, | 778 | 58 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 779 | | } else { | 780 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 781 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 782 | | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 783 | | std::visit( | 784 | | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | | _execute<str_const, pattern_const, start_pos_const>( | 786 | | src_column_string, pattern_column, start_pos_column, | 787 | | dest_column_data, input_rows_count); | 788 | | }, | 789 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 790 | | make_bool_variant(col_const[2])); | 791 | | } | 792 | | | 793 | 58 | block.replace_by_position(result, std::move(dest_column_ptr)); | 794 | 58 | return Status::OK(); | 795 | 58 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 758 | 176 | uint32_t result, size_t input_rows_count) const override { | 759 | 176 | DCHECK(arg_count); | 760 | 176 | bool col_const[arg_count]; | 761 | 176 | ColumnPtr argument_columns[arg_count]; | 762 | 704 | for (int i = 0; i < arg_count; ++i) { | 763 | 528 | std::tie(argument_columns[i], col_const[i]) = | 764 | 528 | unpack_if_const(block.get_by_position(arguments[i]).column); | 765 | 528 | } | 766 | | | 767 | 176 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 768 | 176 | auto& dest_column_data = dest_column_ptr->get_data(); | 769 | | | 770 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 771 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 772 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 773 | | std::visit( | 774 | | [&](auto str_const, auto pattern_const) { | 775 | | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 776 | | dest_column_data, input_rows_count); | 777 | | }, | 778 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 779 | 176 | } else { | 780 | 176 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 781 | 176 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 782 | 176 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 783 | 176 | std::visit( | 784 | 176 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 785 | 176 | _execute<str_const, pattern_const, start_pos_const>( | 786 | 176 | src_column_string, pattern_column, start_pos_column, | 787 | 176 | dest_column_data, input_rows_count); | 788 | 176 | }, | 789 | 176 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 790 | 176 | make_bool_variant(col_const[2])); | 791 | 176 | } | 792 | | | 793 | 176 | block.replace_by_position(result, std::move(dest_column_ptr)); | 794 | 176 | return Status::OK(); | 795 | 176 | } |
|
796 | | |
797 | | private: |
798 | | template <bool src_const, bool pattern_const> |
799 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
800 | 58 | ColumnInt32::Container& dest_column_data, size_t size) const { |
801 | 58 | if constexpr (pattern_const) { |
802 | 13 | const StringRef pattern_ref = pattern_column.get_data_at(0); |
803 | 13 | if (pattern_ref.size == 0) { |
804 | 3 | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); |
805 | 3 | return; |
806 | 3 | } |
807 | | |
808 | 10 | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); |
809 | 48 | for (size_t i = 0; i < size; i++) { |
810 | 38 | const StringRef str_ref = |
811 | 38 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
812 | 38 | dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher); |
813 | 38 | } |
814 | 10 | return; |
815 | 13 | } |
816 | | |
817 | 200 | for (size_t i = 0; i < size; i++) { |
818 | 142 | const StringRef str_ref = |
819 | 142 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
820 | | |
821 | 142 | const StringRef pattern_ref = |
822 | 142 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
823 | 142 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); |
824 | 142 | } |
825 | 58 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 800 | 32 | ColumnInt32::Container& dest_column_data, size_t size) const { | 801 | | if constexpr (pattern_const) { | 802 | | const StringRef pattern_ref = pattern_column.get_data_at(0); | 803 | | if (pattern_ref.size == 0) { | 804 | | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 805 | | return; | 806 | | } | 807 | | | 808 | | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 809 | | for (size_t i = 0; i < size; i++) { | 810 | | const StringRef str_ref = | 811 | | src_column_string.get_data_at(index_check_const<src_const>(i)); | 812 | | dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher); | 813 | | } | 814 | | return; | 815 | | } | 816 | | | 817 | 133 | for (size_t i = 0; i < size; i++) { | 818 | 101 | const StringRef str_ref = | 819 | 101 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 820 | | | 821 | 101 | const StringRef pattern_ref = | 822 | 101 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 823 | 101 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 824 | 101 | } | 825 | 32 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 800 | 13 | ColumnInt32::Container& dest_column_data, size_t size) const { | 801 | 13 | if constexpr (pattern_const) { | 802 | 13 | const StringRef pattern_ref = pattern_column.get_data_at(0); | 803 | 13 | if (pattern_ref.size == 0) { | 804 | 3 | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 805 | 3 | return; | 806 | 3 | } | 807 | | | 808 | 10 | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 809 | 48 | for (size_t i = 0; i < size; i++) { | 810 | 38 | const StringRef str_ref = | 811 | 38 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 812 | 38 | dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher); | 813 | 38 | } | 814 | 10 | return; | 815 | 13 | } | 816 | | | 817 | 13 | for (size_t i = 0; i < size; i++) { | 818 | 0 | const StringRef str_ref = | 819 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 820 | |
| 821 | 0 | const StringRef pattern_ref = | 822 | 0 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 823 | 0 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 824 | 0 | } | 825 | 13 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 800 | 13 | ColumnInt32::Container& dest_column_data, size_t size) const { | 801 | | if constexpr (pattern_const) { | 802 | | const StringRef pattern_ref = pattern_column.get_data_at(0); | 803 | | if (pattern_ref.size == 0) { | 804 | | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 805 | | return; | 806 | | } | 807 | | | 808 | | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 809 | | for (size_t i = 0; i < size; i++) { | 810 | | const StringRef str_ref = | 811 | | src_column_string.get_data_at(index_check_const<src_const>(i)); | 812 | | dest_column_data[i] = find_str_count_with_searcher(str_ref, pattern_ref, searcher); | 813 | | } | 814 | | return; | 815 | | } | 816 | | | 817 | 54 | for (size_t i = 0; i < size; i++) { | 818 | 41 | const StringRef str_ref = | 819 | 41 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 820 | | | 821 | 41 | const StringRef pattern_ref = | 822 | 41 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 823 | 41 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 824 | 41 | } | 825 | 13 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
826 | | |
827 | | template <bool src_const, bool pattern_const, bool start_pos_const> |
828 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
829 | | const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data, |
830 | 176 | size_t size) const { |
831 | 176 | if constexpr (pattern_const) { |
832 | 67 | const StringRef pattern_ref = pattern_column.get_data_at(0); |
833 | 67 | if (pattern_ref.size == 0) { |
834 | 15 | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); |
835 | 15 | return; |
836 | 15 | } |
837 | | |
838 | 52 | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); |
839 | 118 | for (size_t i = 0; i < size; i++) { |
840 | 66 | const StringRef str_ref = |
841 | 66 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
842 | 66 | const int32_t start_pos = |
843 | 66 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; |
844 | 66 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); |
845 | | |
846 | 66 | if (start_pos < 0 || start_byte_len >= str_ref.size) { |
847 | 31 | dest_column_data[i] = 0; |
848 | 35 | } else { |
849 | 35 | dest_column_data[i] = find_str_count_with_searcher( |
850 | 35 | str_ref.substring(start_byte_len), pattern_ref, searcher); |
851 | 35 | } |
852 | 66 | } |
853 | 52 | return; |
854 | 67 | } |
855 | | |
856 | 330 | for (size_t i = 0; i < size; i++) { |
857 | 154 | const StringRef str_ref = |
858 | 154 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
859 | 154 | const StringRef pattern_ref = |
860 | 154 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
861 | | // 1-based index |
862 | 154 | int32_t start_pos = |
863 | 154 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; |
864 | | |
865 | 154 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); |
866 | | |
867 | 154 | if (start_pos < 0 || start_byte_len >= str_ref.size) { |
868 | 88 | dest_column_data[i] = 0; |
869 | 88 | } else { |
870 | 66 | dest_column_data[i] = |
871 | 66 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); |
872 | 66 | } |
873 | 154 | } |
874 | 176 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 36 | size_t size) const { | 831 | | if constexpr (pattern_const) { | 832 | | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | | if (pattern_ref.size == 0) { | 834 | | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | | return; | 836 | | } | 837 | | | 838 | | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | | for (size_t i = 0; i < size; i++) { | 840 | | const StringRef str_ref = | 841 | | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | | const int32_t start_pos = | 843 | | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | | dest_column_data[i] = 0; | 848 | | } else { | 849 | | dest_column_data[i] = find_str_count_with_searcher( | 850 | | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | | } | 852 | | } | 853 | | return; | 854 | | } | 855 | | | 856 | 97 | for (size_t i = 0; i < size; i++) { | 857 | 61 | const StringRef str_ref = | 858 | 61 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 61 | const StringRef pattern_ref = | 860 | 61 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 61 | int32_t start_pos = | 863 | 61 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | | | 865 | 61 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | | | 867 | 61 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 38 | dest_column_data[i] = 0; | 869 | 38 | } else { | 870 | 23 | dest_column_data[i] = | 871 | 23 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 23 | } | 873 | 61 | } | 874 | 36 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 29 | size_t size) const { | 831 | | if constexpr (pattern_const) { | 832 | | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | | if (pattern_ref.size == 0) { | 834 | | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | | return; | 836 | | } | 837 | | | 838 | | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | | for (size_t i = 0; i < size; i++) { | 840 | | const StringRef str_ref = | 841 | | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | | const int32_t start_pos = | 843 | | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | | dest_column_data[i] = 0; | 848 | | } else { | 849 | | dest_column_data[i] = find_str_count_with_searcher( | 850 | | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | | } | 852 | | } | 853 | | return; | 854 | | } | 855 | | | 856 | 78 | for (size_t i = 0; i < size; i++) { | 857 | 49 | const StringRef str_ref = | 858 | 49 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 49 | const StringRef pattern_ref = | 860 | 49 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 49 | int32_t start_pos = | 863 | 49 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | | | 865 | 49 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | | | 867 | 49 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 22 | dest_column_data[i] = 0; | 869 | 27 | } else { | 870 | 27 | dest_column_data[i] = | 871 | 27 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 27 | } | 873 | 49 | } | 874 | 29 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 22 | size_t size) const { | 831 | 22 | if constexpr (pattern_const) { | 832 | 22 | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | 22 | if (pattern_ref.size == 0) { | 834 | 5 | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | 5 | return; | 836 | 5 | } | 837 | | | 838 | 17 | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | 34 | for (size_t i = 0; i < size; i++) { | 840 | 17 | const StringRef str_ref = | 841 | 17 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | 17 | const int32_t start_pos = | 843 | 17 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | 17 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | 17 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | 9 | dest_column_data[i] = 0; | 848 | 9 | } else { | 849 | 8 | dest_column_data[i] = find_str_count_with_searcher( | 850 | 8 | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | 8 | } | 852 | 17 | } | 853 | 17 | return; | 854 | 22 | } | 855 | | | 856 | 22 | for (size_t i = 0; i < size; i++) { | 857 | 0 | const StringRef str_ref = | 858 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 0 | const StringRef pattern_ref = | 860 | 0 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 0 | int32_t start_pos = | 863 | 0 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | |
| 865 | 0 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | |
| 867 | 0 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 0 | dest_column_data[i] = 0; | 869 | 0 | } else { | 870 | 0 | dest_column_data[i] = | 871 | 0 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 0 | } | 873 | 0 | } | 874 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 23 | size_t size) const { | 831 | 23 | if constexpr (pattern_const) { | 832 | 23 | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | 23 | if (pattern_ref.size == 0) { | 834 | 5 | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | 5 | return; | 836 | 5 | } | 837 | | | 838 | 18 | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | 50 | for (size_t i = 0; i < size; i++) { | 840 | 32 | const StringRef str_ref = | 841 | 32 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | 32 | const int32_t start_pos = | 843 | 32 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | 32 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | 32 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | 13 | dest_column_data[i] = 0; | 848 | 19 | } else { | 849 | 19 | dest_column_data[i] = find_str_count_with_searcher( | 850 | 19 | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | 19 | } | 852 | 32 | } | 853 | 18 | return; | 854 | 23 | } | 855 | | | 856 | 23 | for (size_t i = 0; i < size; i++) { | 857 | 0 | const StringRef str_ref = | 858 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 0 | const StringRef pattern_ref = | 860 | 0 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 0 | int32_t start_pos = | 863 | 0 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | |
| 865 | 0 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | |
| 867 | 0 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 0 | dest_column_data[i] = 0; | 869 | 0 | } else { | 870 | 0 | dest_column_data[i] = | 871 | 0 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 0 | } | 873 | 0 | } | 874 | 23 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 22 | size_t size) const { | 831 | | if constexpr (pattern_const) { | 832 | | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | | if (pattern_ref.size == 0) { | 834 | | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | | return; | 836 | | } | 837 | | | 838 | | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | | for (size_t i = 0; i < size; i++) { | 840 | | const StringRef str_ref = | 841 | | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | | const int32_t start_pos = | 843 | | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | | dest_column_data[i] = 0; | 848 | | } else { | 849 | | dest_column_data[i] = find_str_count_with_searcher( | 850 | | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | | } | 852 | | } | 853 | | return; | 854 | | } | 855 | | | 856 | 44 | for (size_t i = 0; i < size; i++) { | 857 | 22 | const StringRef str_ref = | 858 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 22 | const StringRef pattern_ref = | 860 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 22 | int32_t start_pos = | 863 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | | | 865 | 22 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | | | 867 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 14 | dest_column_data[i] = 0; | 869 | 14 | } else { | 870 | 8 | dest_column_data[i] = | 871 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 8 | } | 873 | 22 | } | 874 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 22 | size_t size) const { | 831 | | if constexpr (pattern_const) { | 832 | | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | | if (pattern_ref.size == 0) { | 834 | | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | | return; | 836 | | } | 837 | | | 838 | | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | | for (size_t i = 0; i < size; i++) { | 840 | | const StringRef str_ref = | 841 | | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | | const int32_t start_pos = | 843 | | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | | dest_column_data[i] = 0; | 848 | | } else { | 849 | | dest_column_data[i] = find_str_count_with_searcher( | 850 | | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | | } | 852 | | } | 853 | | return; | 854 | | } | 855 | | | 856 | 44 | for (size_t i = 0; i < size; i++) { | 857 | 22 | const StringRef str_ref = | 858 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 22 | const StringRef pattern_ref = | 860 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 22 | int32_t start_pos = | 863 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | | | 865 | 22 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | | | 867 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 14 | dest_column_data[i] = 0; | 869 | 14 | } else { | 870 | 8 | dest_column_data[i] = | 871 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 8 | } | 873 | 22 | } | 874 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 830 | 22 | size_t size) const { | 831 | 22 | if constexpr (pattern_const) { | 832 | 22 | const StringRef pattern_ref = pattern_column.get_data_at(0); | 833 | 22 | if (pattern_ref.size == 0) { | 834 | 5 | std::fill(dest_column_data.begin(), dest_column_data.end(), 0); | 835 | 5 | return; | 836 | 5 | } | 837 | | | 838 | 17 | const ASCIICaseSensitiveStringSearcher searcher(pattern_ref.data, pattern_ref.size); | 839 | 34 | for (size_t i = 0; i < size; i++) { | 840 | 17 | const StringRef str_ref = | 841 | 17 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 842 | 17 | const int32_t start_pos = | 843 | 17 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 844 | 17 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 845 | | | 846 | 17 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 847 | 9 | dest_column_data[i] = 0; | 848 | 9 | } else { | 849 | 8 | dest_column_data[i] = find_str_count_with_searcher( | 850 | 8 | str_ref.substring(start_byte_len), pattern_ref, searcher); | 851 | 8 | } | 852 | 17 | } | 853 | 17 | return; | 854 | 22 | } | 855 | | | 856 | 22 | for (size_t i = 0; i < size; i++) { | 857 | 0 | const StringRef str_ref = | 858 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 859 | 0 | const StringRef pattern_ref = | 860 | 0 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 861 | | // 1-based index | 862 | 0 | int32_t start_pos = | 863 | 0 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 864 | |
| 865 | 0 | const auto start_byte_len = get_start_byte_len(str_ref, start_pos); | 866 | |
| 867 | 0 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 868 | 0 | dest_column_data[i] = 0; | 869 | 0 | } else { | 870 | 0 | dest_column_data[i] = | 871 | 0 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 872 | 0 | } | 873 | 0 | } | 874 | 22 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
875 | | |
876 | 220 | size_t get_start_byte_len(const StringRef str_ref, int32_t start_pos) const { |
877 | 220 | const char* p = str_ref.begin(); |
878 | 220 | const char* end = str_ref.end(); |
879 | 220 | int char_size = 0; |
880 | 1.34k | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { |
881 | 1.12k | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; |
882 | 1.12k | } |
883 | 220 | return p - str_ref.begin(); |
884 | 220 | } |
885 | | |
886 | 387 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { |
887 | 387 | size_t old_size = pos; |
888 | 387 | size_t str_size = str_ref.size; |
889 | 1.51k | while (pos < str_size && |
890 | 1.51k | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, |
891 | 1.36k | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { |
892 | 1.13k | pos++; |
893 | 1.13k | } |
894 | 387 | return pos - old_size; |
895 | 387 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 886 | 223 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 887 | 223 | size_t old_size = pos; | 888 | 223 | size_t str_size = str_ref.size; | 889 | 753 | while (pos < str_size && | 890 | 753 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 891 | 661 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 892 | 530 | pos++; | 893 | 530 | } | 894 | 223 | return pos - old_size; | 895 | 223 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 886 | 164 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 887 | 164 | size_t old_size = pos; | 888 | 164 | size_t str_size = str_ref.size; | 889 | 764 | while (pos < str_size && | 890 | 764 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 891 | 700 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 892 | 600 | pos++; | 893 | 600 | } | 894 | 164 | return pos - old_size; | 895 | 164 | } |
|
896 | | |
897 | 208 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { |
898 | 208 | int count = 0; |
899 | 208 | if (str_ref.size == 0 || pattern_ref.size == 0) { |
900 | 52 | return 0; |
901 | 156 | } else { |
902 | 387 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
903 | 387 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); |
904 | 387 | if (res_pos == (str_ref.size - str_pos)) { |
905 | 156 | break; // not find |
906 | 156 | } |
907 | 231 | count++; |
908 | 231 | str_pos = str_pos + res_pos + pattern_ref.size; |
909 | 231 | } |
910 | 156 | } |
911 | 156 | return count; |
912 | 208 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 897 | 142 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 898 | 142 | int count = 0; | 899 | 142 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 900 | 50 | return 0; | 901 | 92 | } else { | 902 | 223 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 903 | 223 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 904 | 223 | if (res_pos == (str_ref.size - str_pos)) { | 905 | 92 | break; // not find | 906 | 92 | } | 907 | 131 | count++; | 908 | 131 | str_pos = str_pos + res_pos + pattern_ref.size; | 909 | 131 | } | 910 | 92 | } | 911 | 92 | return count; | 912 | 142 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 897 | 66 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 898 | 66 | int count = 0; | 899 | 66 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 900 | 2 | return 0; | 901 | 64 | } else { | 902 | 164 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 903 | 164 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 904 | 164 | if (res_pos == (str_ref.size - str_pos)) { | 905 | 64 | break; // not find | 906 | 64 | } | 907 | 100 | count++; | 908 | 100 | str_pos = str_pos + res_pos + pattern_ref.size; | 909 | 100 | } | 910 | 64 | } | 911 | 64 | return count; | 912 | 66 | } |
|
913 | | |
914 | | int find_str_count_with_searcher(const StringRef str_ref, StringRef pattern_ref, |
915 | 73 | const ASCIICaseSensitiveStringSearcher& searcher) const { |
916 | 73 | if (str_ref.size == 0 || pattern_ref.size == 0) { |
917 | 9 | return 0; |
918 | 9 | } |
919 | | |
920 | 64 | int count = 0; |
921 | 64 | const char* pos = str_ref.data; |
922 | 64 | const char* const end = str_ref.data + str_ref.size; |
923 | 142 | while (pos < end) { |
924 | 142 | const char* match = searcher.search(pos, end); |
925 | 142 | if (match == end) { |
926 | 64 | break; |
927 | 64 | } |
928 | 78 | ++count; |
929 | 78 | pos = match + pattern_ref.size; |
930 | 78 | } |
931 | 64 | return count; |
932 | 73 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE28find_str_count_with_searcherENS_9StringRefES3_RKNS_14StringSearcherILb1ELb1EEE Line | Count | Source | 915 | 38 | const ASCIICaseSensitiveStringSearcher& searcher) const { | 916 | 38 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 917 | 9 | return 0; | 918 | 9 | } | 919 | | | 920 | 29 | int count = 0; | 921 | 29 | const char* pos = str_ref.data; | 922 | 29 | const char* const end = str_ref.data + str_ref.size; | 923 | 68 | while (pos < end) { | 924 | 68 | const char* match = searcher.search(pos, end); | 925 | 68 | if (match == end) { | 926 | 29 | break; | 927 | 29 | } | 928 | 39 | ++count; | 929 | 39 | pos = match + pattern_ref.size; | 930 | 39 | } | 931 | 29 | return count; | 932 | 38 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE28find_str_count_with_searcherENS_9StringRefES3_RKNS_14StringSearcherILb1ELb1EEE Line | Count | Source | 915 | 35 | const ASCIICaseSensitiveStringSearcher& searcher) const { | 916 | 35 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 917 | 0 | return 0; | 918 | 0 | } | 919 | | | 920 | 35 | int count = 0; | 921 | 35 | const char* pos = str_ref.data; | 922 | 35 | const char* const end = str_ref.data + str_ref.size; | 923 | 74 | while (pos < end) { | 924 | 74 | const char* match = searcher.search(pos, end); | 925 | 74 | if (match == end) { | 926 | 35 | break; | 927 | 35 | } | 928 | 39 | ++count; | 929 | 39 | pos = match + pattern_ref.size; | 930 | 39 | } | 931 | 35 | return count; | 932 | 35 | } |
|
933 | | }; |
934 | | |
935 | 5 | void register_function_string_search(SimpleFunctionFactory& factory) { |
936 | 5 | factory.register_function<FunctionStringLocatePos>(); |
937 | 5 | factory.register_function<FunctionSplitPart>(); |
938 | 5 | factory.register_function<FunctionSplitByString>(); |
939 | 5 | factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>(); |
940 | 5 | factory.register_function< |
941 | 5 | FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>(); |
942 | 5 | factory.register_function<FunctionSubstringIndex>(); |
943 | | |
944 | 5 | factory.register_alias(FunctionStringLocatePos::name, "position"); |
945 | 5 | } |
946 | | |
947 | | #include "common/compile_check_avoid_end.h" |
948 | | } // namespace doris |