be/src/exprs/function/function_string_search.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cstddef> |
19 | | #include <cstring> |
20 | | #include <numeric> |
21 | | #include <string> |
22 | | #include <string_view> |
23 | | #include <vector> |
24 | | |
25 | | #include "common/status.h" |
26 | | #include "core/assert_cast.h" |
27 | | #include "core/block/block.h" |
28 | | #include "core/block/column_numbers.h" |
29 | | #include "core/column/column_array.h" |
30 | | #include "core/column/column_const.h" |
31 | | #include "core/column/column_nullable.h" |
32 | | #include "core/column/column_string.h" |
33 | | #include "core/column/column_vector.h" |
34 | | #include "core/data_type/data_type_array.h" |
35 | | #include "core/data_type/data_type_nullable.h" |
36 | | #include "core/data_type/data_type_number.h" |
37 | | #include "core/data_type/data_type_string.h" |
38 | | #include "core/data_type/define_primitive_type.h" |
39 | | #include "core/memcmp_small.h" |
40 | | #include "core/memcpy_small.h" |
41 | | #include "core/pod_array_fwd.h" |
42 | | #include "core/string_ref.h" |
43 | | #include "exec/common/stringop_substring.h" |
44 | | #include "exec/common/template_helpers.hpp" |
45 | | #include "exec/common/util.hpp" |
46 | | #include "exprs/function/function.h" |
47 | | #include "exprs/function/function_helpers.h" |
48 | | #include "exprs/function/simple_function_factory.h" |
49 | | #include "exprs/function_context.h" |
50 | | #include "util/simd/vstring_function.h" |
51 | | #include "util/string_search.hpp" |
52 | | |
53 | | namespace doris { |
54 | | #include "common/compile_check_avoid_begin.h" |
55 | | |
56 | | class FunctionStringLocatePos : public IFunction { |
57 | | public: |
58 | | static constexpr auto name = "locate"; |
59 | 816 | static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); } |
60 | 0 | String get_name() const override { return name; } |
61 | 0 | size_t get_number_of_arguments() const override { return 3; } |
62 | | |
63 | 814 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
64 | 814 | return std::make_shared<DataTypeInt32>(); |
65 | 814 | } |
66 | | |
67 | 1 | DataTypes get_variadic_argument_types_impl() const override { |
68 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
69 | 1 | std::make_shared<DataTypeInt32>()}; |
70 | 1 | } |
71 | | |
72 | 815 | bool is_variadic() const override { return true; } |
73 | | |
74 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
75 | 576 | uint32_t result, size_t input_rows_count) const override { |
76 | 576 | if (arguments.size() != 3) { |
77 | 0 | return Status::InvalidArgument("Function {} requires 3 arguments, but got {}", |
78 | 0 | get_name(), arguments.size()); |
79 | 0 | } |
80 | 576 | bool col_const[3]; |
81 | 576 | ColumnPtr argument_columns[3]; |
82 | 2.30k | for (int i = 0; i < 3; ++i) { |
83 | 1.72k | std::tie(argument_columns[i], col_const[i]) = |
84 | 1.72k | unpack_if_const(block.get_by_position(arguments[i]).column); |
85 | 1.72k | } |
86 | | |
87 | 576 | const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get()); |
88 | 576 | const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get()); |
89 | 576 | const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
90 | | |
91 | 576 | ColumnInt32::MutablePtr col_res = ColumnInt32::create(); |
92 | 576 | auto& vec_res = col_res->get_data(); |
93 | 576 | vec_res.resize(block.rows()); |
94 | | |
95 | 576 | const bool is_ascii = col_left->is_ascii() && col_right->is_ascii(); |
96 | | |
97 | 576 | if (col_const[0]) { |
98 | 246 | std::visit( |
99 | 246 | [&](auto is_ascii, auto str_const, auto pos_const) { |
100 | 246 | scalar_search<is_ascii, str_const, pos_const>( |
101 | 246 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, |
102 | 246 | input_rows_count); |
103 | 246 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 99 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 22 | input_rows_count); | 103 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 99 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 22 | input_rows_count); | 103 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 99 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 22 | input_rows_count); | 103 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 99 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 60 | input_rows_count); | 103 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 99 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 60 | input_rows_count); | 103 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 99 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 60 | input_rows_count); | 103 | 60 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
104 | 246 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
105 | 246 | make_bool_variant(col_const[2])); |
106 | | |
107 | 330 | } else { |
108 | 330 | std::visit( |
109 | 330 | [&](auto is_ascii, auto str_const, auto pos_const) { |
110 | 330 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, |
111 | 330 | col_pos->get_data(), vec_res, |
112 | 330 | input_rows_count); |
113 | 330 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 23 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 23 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 23 | col_pos->get_data(), vec_res, | 112 | 23 | input_rows_count); | 113 | 23 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 109 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 22 | col_pos->get_data(), vec_res, | 112 | 22 | input_rows_count); | 113 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 22 | col_pos->get_data(), vec_res, | 112 | 22 | input_rows_count); | 113 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Line | Count | Source | 109 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 22 | col_pos->get_data(), vec_res, | 112 | 22 | input_rows_count); | 113 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 109 | 61 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 61 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 61 | col_pos->get_data(), vec_res, | 112 | 61 | input_rows_count); | 113 | 61 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 60 | col_pos->get_data(), vec_res, | 112 | 60 | input_rows_count); | 113 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 109 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 60 | col_pos->get_data(), vec_res, | 112 | 60 | input_rows_count); | 113 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 60 | col_pos->get_data(), vec_res, | 112 | 60 | input_rows_count); | 113 | 60 | }, |
|
114 | 330 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
115 | 330 | make_bool_variant(col_const[2])); |
116 | 330 | } |
117 | 576 | block.replace_by_position(result, std::move(col_res)); |
118 | 576 | return Status::OK(); |
119 | 576 | } |
120 | | |
121 | | private: |
122 | | template <bool is_ascii, bool str_const, bool pos_const> |
123 | | void scalar_search(const StringRef& ldata, const ColumnString* col_right, |
124 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
125 | 246 | size_t size) const { |
126 | 246 | res.resize(size); |
127 | 246 | StringRef substr(ldata.data, ldata.size); |
128 | 246 | StringSearch search {&substr}; |
129 | | |
130 | 492 | for (int i = 0; i < size; ++i) { |
131 | 246 | res[i] = locate_pos<is_ascii>(substr, |
132 | 246 | col_right->get_data_at(index_check_const<str_const>(i)), |
133 | 246 | search, posdata[index_check_const<pos_const>(i)]); |
134 | 246 | } |
135 | 246 | } _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 22 | size_t size) const { | 126 | 22 | res.resize(size); | 127 | 22 | StringRef substr(ldata.data, ldata.size); | 128 | 22 | StringSearch search {&substr}; | 129 | | | 130 | 44 | for (int i = 0; i < size; ++i) { | 131 | 22 | res[i] = locate_pos<is_ascii>(substr, | 132 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 22 | } | 135 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 22 | size_t size) const { | 126 | 22 | res.resize(size); | 127 | 22 | StringRef substr(ldata.data, ldata.size); | 128 | 22 | StringSearch search {&substr}; | 129 | | | 130 | 44 | for (int i = 0; i < size; ++i) { | 131 | 22 | res[i] = locate_pos<is_ascii>(substr, | 132 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 22 | } | 135 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 22 | size_t size) const { | 126 | 22 | res.resize(size); | 127 | 22 | StringRef substr(ldata.data, ldata.size); | 128 | 22 | StringSearch search {&substr}; | 129 | | | 130 | 44 | for (int i = 0; i < size; ++i) { | 131 | 22 | res[i] = locate_pos<is_ascii>(substr, | 132 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 22 | } | 135 | 22 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 60 | size_t size) const { | 126 | 60 | res.resize(size); | 127 | 60 | StringRef substr(ldata.data, ldata.size); | 128 | 60 | StringSearch search {&substr}; | 129 | | | 130 | 120 | for (int i = 0; i < size; ++i) { | 131 | 60 | res[i] = locate_pos<is_ascii>(substr, | 132 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 60 | } | 135 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 60 | size_t size) const { | 126 | 60 | res.resize(size); | 127 | 60 | StringRef substr(ldata.data, ldata.size); | 128 | 60 | StringSearch search {&substr}; | 129 | | | 130 | 120 | for (int i = 0; i < size; ++i) { | 131 | 60 | res[i] = locate_pos<is_ascii>(substr, | 132 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 60 | } | 135 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 60 | size_t size) const { | 126 | 60 | res.resize(size); | 127 | 60 | StringRef substr(ldata.data, ldata.size); | 128 | 60 | StringSearch search {&substr}; | 129 | | | 130 | 120 | for (int i = 0; i < size; ++i) { | 131 | 60 | res[i] = locate_pos<is_ascii>(substr, | 132 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 60 | } | 135 | 60 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m |
136 | | |
137 | | template <bool is_ascii, bool str_const, bool pos_const> |
138 | | void vector_search(const ColumnString* col_left, const ColumnString* col_right, |
139 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
140 | 330 | size_t size) const { |
141 | 330 | res.resize(size); |
142 | 330 | StringSearch search; |
143 | 774 | for (int i = 0; i < size; ++i) { |
144 | 444 | StringRef substr = col_left->get_data_at(i); |
145 | 444 | search.set_pattern(&substr); |
146 | 444 | res[i] = locate_pos<is_ascii>(substr, |
147 | 444 | col_right->get_data_at(index_check_const<str_const>(i)), |
148 | 444 | search, posdata[index_check_const<pos_const>(i)]); |
149 | 444 | } |
150 | 330 | } _ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 23 | size_t size) const { | 141 | 23 | res.resize(size); | 142 | 23 | StringSearch search; | 143 | 71 | for (int i = 0; i < size; ++i) { | 144 | 48 | StringRef substr = col_left->get_data_at(i); | 145 | 48 | search.set_pattern(&substr); | 146 | 48 | res[i] = locate_pos<is_ascii>(substr, | 147 | 48 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 48 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 48 | } | 150 | 23 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 22 | size_t size) const { | 141 | 22 | res.resize(size); | 142 | 22 | StringSearch search; | 143 | 44 | for (int i = 0; i < size; ++i) { | 144 | 22 | StringRef substr = col_left->get_data_at(i); | 145 | 22 | search.set_pattern(&substr); | 146 | 22 | res[i] = locate_pos<is_ascii>(substr, | 147 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 22 | } | 150 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 22 | size_t size) const { | 141 | 22 | res.resize(size); | 142 | 22 | StringSearch search; | 143 | 44 | for (int i = 0; i < size; ++i) { | 144 | 22 | StringRef substr = col_left->get_data_at(i); | 145 | 22 | search.set_pattern(&substr); | 146 | 22 | res[i] = locate_pos<is_ascii>(substr, | 147 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 22 | } | 150 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 22 | size_t size) const { | 141 | 22 | res.resize(size); | 142 | 22 | StringSearch search; | 143 | 44 | for (int i = 0; i < size; ++i) { | 144 | 22 | StringRef substr = col_left->get_data_at(i); | 145 | 22 | search.set_pattern(&substr); | 146 | 22 | res[i] = locate_pos<is_ascii>(substr, | 147 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 22 | } | 150 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 61 | size_t size) const { | 141 | 61 | res.resize(size); | 142 | 61 | StringSearch search; | 143 | 211 | for (int i = 0; i < size; ++i) { | 144 | 150 | StringRef substr = col_left->get_data_at(i); | 145 | 150 | search.set_pattern(&substr); | 146 | 150 | res[i] = locate_pos<is_ascii>(substr, | 147 | 150 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 150 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 150 | } | 150 | 61 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 60 | size_t size) const { | 141 | 60 | res.resize(size); | 142 | 60 | StringSearch search; | 143 | 120 | for (int i = 0; i < size; ++i) { | 144 | 60 | StringRef substr = col_left->get_data_at(i); | 145 | 60 | search.set_pattern(&substr); | 146 | 60 | res[i] = locate_pos<is_ascii>(substr, | 147 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 60 | } | 150 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 60 | size_t size) const { | 141 | 60 | res.resize(size); | 142 | 60 | StringSearch search; | 143 | 120 | for (int i = 0; i < size; ++i) { | 144 | 60 | StringRef substr = col_left->get_data_at(i); | 145 | 60 | search.set_pattern(&substr); | 146 | 60 | res[i] = locate_pos<is_ascii>(substr, | 147 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 60 | } | 150 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 60 | size_t size) const { | 141 | 60 | res.resize(size); | 142 | 60 | StringSearch search; | 143 | 120 | for (int i = 0; i < size; ++i) { | 144 | 60 | StringRef substr = col_left->get_data_at(i); | 145 | 60 | search.set_pattern(&substr); | 146 | 60 | res[i] = locate_pos<is_ascii>(substr, | 147 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 60 | } | 150 | 60 | } |
|
151 | | |
152 | | template <bool is_ascii> |
153 | 690 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { |
154 | 690 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { |
155 | | // BEHAVIOR COMPATIBLE WITH MYSQL |
156 | | // locate('','') locate('','',1) locate('','',2) |
157 | | // 1 1 0 |
158 | 11 | return 1; |
159 | 11 | } |
160 | 679 | if (is_ascii) { |
161 | 499 | return locate_pos_ascii(substr, str, search, start_pos); |
162 | 499 | } else { |
163 | 180 | return locate_pos_utf8(substr, str, search, start_pos); |
164 | 180 | } |
165 | 679 | } _ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 153 | 180 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 154 | 180 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 155 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 156 | | // locate('','') locate('','',1) locate('','',2) | 157 | | // 1 1 0 | 158 | 0 | return 1; | 159 | 0 | } | 160 | 180 | if (is_ascii) { | 161 | 0 | return locate_pos_ascii(substr, str, search, start_pos); | 162 | 180 | } else { | 163 | 180 | return locate_pos_utf8(substr, str, search, start_pos); | 164 | 180 | } | 165 | 180 | } |
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 153 | 510 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 154 | 510 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 155 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 156 | | // locate('','') locate('','',1) locate('','',2) | 157 | | // 1 1 0 | 158 | 11 | return 1; | 159 | 11 | } | 160 | 499 | if (is_ascii) { | 161 | 499 | return locate_pos_ascii(substr, str, search, start_pos); | 162 | 499 | } else { | 163 | 0 | return locate_pos_utf8(substr, str, search, start_pos); | 164 | 0 | } | 165 | 499 | } |
|
166 | | |
167 | | int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search, |
168 | 180 | int start_pos) const { |
169 | 180 | std::vector<size_t> index; |
170 | 180 | size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index); |
171 | 180 | if (start_pos <= 0 || start_pos > char_len) { |
172 | 43 | return 0; |
173 | 43 | } |
174 | 137 | if (substr.size == 0) { |
175 | 17 | return start_pos; |
176 | 17 | } |
177 | | // Input start_pos starts from 1. |
178 | 120 | StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]); |
179 | 120 | int32_t match_pos = search.search(&adjusted_str); |
180 | 120 | if (match_pos >= 0) { |
181 | | // Hive returns the position in the original string starting from 1. |
182 | 104 | return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos); |
183 | 104 | } else { |
184 | 16 | return 0; |
185 | 16 | } |
186 | 120 | } |
187 | | |
188 | | int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search, |
189 | 499 | int start_pos) const { |
190 | 499 | if (start_pos <= 0 || start_pos > str.size) { |
191 | 367 | return 0; |
192 | 367 | } |
193 | 132 | if (substr.size == 0) { |
194 | 36 | return start_pos; |
195 | 36 | } |
196 | | // Input start_pos starts from 1. |
197 | 96 | StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1); |
198 | 96 | int32_t match_pos = search.search(&adjusted_str); |
199 | 96 | if (match_pos >= 0) { |
200 | | // Hive returns the position in the original string starting from 1. |
201 | 40 | return start_pos + match_pos; |
202 | 56 | } else { |
203 | 56 | return 0; |
204 | 56 | } |
205 | 96 | } |
206 | | }; |
207 | | |
208 | | class FunctionSplitPart : public IFunction { |
209 | | public: |
210 | | static constexpr auto name = "split_part"; |
211 | 2 | static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); } |
212 | 1 | String get_name() const override { return name; } |
213 | 0 | size_t get_number_of_arguments() const override { return 3; } |
214 | | |
215 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
216 | 0 | return make_nullable(std::make_shared<DataTypeString>()); |
217 | 0 | } |
218 | | |
219 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
220 | 0 | uint32_t result, size_t input_rows_count) const override { |
221 | 0 | DCHECK_EQ(arguments.size(), 3); |
222 | |
|
223 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
224 | | // Create a zero column to simply implement |
225 | 0 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
226 | 0 | auto res = ColumnString::create(); |
227 | |
|
228 | 0 | auto& null_map_data = null_map->get_data(); |
229 | 0 | auto& res_offsets = res->get_offsets(); |
230 | 0 | auto& res_chars = res->get_chars(); |
231 | 0 | res_offsets.resize(input_rows_count); |
232 | |
|
233 | 0 | const size_t argument_size = arguments.size(); |
234 | 0 | std::vector<ColumnPtr> argument_columns(argument_size); |
235 | 0 | for (size_t i = 0; i < argument_size; ++i) { |
236 | 0 | argument_columns[i] = |
237 | 0 | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
238 | 0 | if (const auto* nullable = |
239 | 0 | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
240 | | // Danger: Here must dispose the null map data first! Because |
241 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
242 | | // of column nullable mem of null map |
243 | 0 | VectorizedUtils::update_null_map(null_map->get_data(), |
244 | 0 | nullable->get_null_map_data()); |
245 | 0 | argument_columns[i] = nullable->get_nested_column_ptr(); |
246 | 0 | } |
247 | 0 | } |
248 | |
|
249 | 0 | const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
250 | |
|
251 | 0 | const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get()); |
252 | |
|
253 | 0 | const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
254 | 0 | const auto& part_num_col_data = part_num_col->get_data(); |
255 | |
|
256 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
257 | 0 | if (part_num_col_data[i] == 0) { |
258 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
259 | 0 | continue; |
260 | 0 | } |
261 | | |
262 | 0 | auto delimiter = delimiter_col->get_data_at(i); |
263 | 0 | auto delimiter_str = delimiter_col->get_data_at(i).to_string(); |
264 | 0 | auto part_number = part_num_col_data[i]; |
265 | 0 | auto str = str_col->get_data_at(i); |
266 | 0 | if (delimiter.size == 0) { |
267 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
268 | 0 | continue; |
269 | 0 | } |
270 | | |
271 | 0 | if (part_number > 0) { |
272 | 0 | if (delimiter.size == 1) { |
273 | | // If delimiter is a char, use memchr to split |
274 | 0 | int32_t pre_offset = -1; |
275 | 0 | int32_t offset = -1; |
276 | 0 | int32_t num = 0; |
277 | 0 | while (num < part_number) { |
278 | 0 | pre_offset = offset; |
279 | 0 | size_t n = str.size - offset - 1; |
280 | 0 | const char* pos = reinterpret_cast<const char*>( |
281 | 0 | memchr(str.data + offset + 1, delimiter_str[0], n)); |
282 | 0 | if (pos != nullptr) { |
283 | 0 | offset = pos - str.data; |
284 | 0 | num++; |
285 | 0 | } else { |
286 | 0 | offset = str.size; |
287 | 0 | num = (num == 0) ? 0 : num + 1; |
288 | 0 | break; |
289 | 0 | } |
290 | 0 | } |
291 | |
|
292 | 0 | if (num == part_number) { |
293 | 0 | StringOP::push_value_string( |
294 | 0 | std::string_view { |
295 | 0 | reinterpret_cast<const char*>(str.data + pre_offset + 1), |
296 | 0 | (size_t)offset - pre_offset - 1}, |
297 | 0 | i, res_chars, res_offsets); |
298 | 0 | } else { |
299 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
300 | 0 | } |
301 | 0 | } else { |
302 | | // If delimiter is a string, use memmem to split |
303 | 0 | int32_t pre_offset = -delimiter.size; |
304 | 0 | int32_t offset = -delimiter.size; |
305 | 0 | int32_t num = 0; |
306 | 0 | while (num < part_number) { |
307 | 0 | pre_offset = offset; |
308 | 0 | size_t n = str.size - offset - delimiter.size; |
309 | 0 | char* pos = |
310 | 0 | reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size, |
311 | 0 | n, delimiter.data, delimiter.size)); |
312 | 0 | if (pos != nullptr) { |
313 | 0 | offset = pos - str.data; |
314 | 0 | num++; |
315 | 0 | } else { |
316 | 0 | offset = str.size; |
317 | 0 | num = (num == 0) ? 0 : num + 1; |
318 | 0 | break; |
319 | 0 | } |
320 | 0 | } |
321 | |
|
322 | 0 | if (num == part_number) { |
323 | 0 | StringOP::push_value_string( |
324 | 0 | std::string_view {reinterpret_cast<const char*>( |
325 | 0 | str.data + pre_offset + delimiter.size), |
326 | 0 | (size_t)offset - pre_offset - delimiter.size}, |
327 | 0 | i, res_chars, res_offsets); |
328 | 0 | } else { |
329 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
330 | 0 | } |
331 | 0 | } |
332 | 0 | } else { |
333 | 0 | part_number = -part_number; |
334 | 0 | auto str_str = str.to_string(); |
335 | 0 | int32_t offset = str.size; |
336 | 0 | int32_t pre_offset = offset; |
337 | 0 | int32_t num = 0; |
338 | 0 | auto substr = str_str; |
339 | 0 | while (num <= part_number && offset >= 0) { |
340 | 0 | offset = (int)substr.rfind(delimiter, offset); |
341 | 0 | if (offset != -1) { |
342 | 0 | if (++num == part_number) { |
343 | 0 | break; |
344 | 0 | } |
345 | 0 | pre_offset = offset; |
346 | 0 | offset = offset - 1; |
347 | 0 | substr = str_str.substr(0, pre_offset); |
348 | 0 | } else { |
349 | 0 | break; |
350 | 0 | } |
351 | 0 | } |
352 | 0 | num = (offset == -1 && num != 0) ? num + 1 : num; |
353 | |
|
354 | 0 | if (num == part_number) { |
355 | 0 | if (offset == -1) { |
356 | 0 | StringOP::push_value_string(std::string_view {str.data, (size_t)pre_offset}, |
357 | 0 | i, res_chars, res_offsets); |
358 | 0 | } else { |
359 | 0 | StringOP::push_value_string( |
360 | 0 | std::string_view {str_str.substr( |
361 | 0 | offset + delimiter.size, |
362 | 0 | (size_t)pre_offset - offset - delimiter.size)}, |
363 | 0 | i, res_chars, res_offsets); |
364 | 0 | } |
365 | 0 | } else { |
366 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
367 | 0 | } |
368 | 0 | } |
369 | 0 | } |
370 | |
|
371 | 0 | block.get_by_position(result).column = |
372 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
373 | 0 | return Status::OK(); |
374 | 0 | } |
375 | | }; |
376 | | |
377 | | class FunctionSubstringIndex : public IFunction { |
378 | | public: |
379 | | static constexpr auto name = "substring_index"; |
380 | 2 | static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); } |
381 | 1 | String get_name() const override { return name; } |
382 | 0 | size_t get_number_of_arguments() const override { return 3; } |
383 | | |
384 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
385 | 0 | return std::make_shared<DataTypeString>(); |
386 | 0 | } |
387 | | |
388 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
389 | 0 | uint32_t result, size_t input_rows_count) const override { |
390 | 0 | DCHECK_EQ(arguments.size(), 3); |
391 | | |
392 | | // Create a zero column to simply implement |
393 | 0 | auto res = ColumnString::create(); |
394 | |
|
395 | 0 | auto& res_offsets = res->get_offsets(); |
396 | 0 | auto& res_chars = res->get_chars(); |
397 | 0 | res_offsets.resize(input_rows_count); |
398 | 0 | ColumnPtr content_column; |
399 | 0 | bool content_const = false; |
400 | 0 | std::tie(content_column, content_const) = |
401 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
402 | |
|
403 | 0 | const auto* str_col = assert_cast<const ColumnString*>(content_column.get()); |
404 | | |
405 | | // Handle both constant and non-constant delimiter parameters |
406 | 0 | ColumnPtr delimiter_column_ptr; |
407 | 0 | bool delimiter_const = false; |
408 | 0 | std::tie(delimiter_column_ptr, delimiter_const) = |
409 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
410 | 0 | const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get()); |
411 | |
|
412 | 0 | ColumnPtr part_num_column_ptr; |
413 | 0 | bool part_num_const = false; |
414 | 0 | std::tie(part_num_column_ptr, part_num_const) = |
415 | 0 | unpack_if_const(block.get_by_position(arguments[2]).column); |
416 | 0 | const ColumnInt32* part_num_col = |
417 | 0 | assert_cast<const ColumnInt32*>(part_num_column_ptr.get()); |
418 | | |
419 | | // For constant multi-character delimiters, create StringRef and StringSearch only once |
420 | 0 | std::optional<StringRef> const_delimiter_ref; |
421 | 0 | std::optional<StringSearch> const_search; |
422 | 0 | if (delimiter_const && delimiter_col->get_data_at(0).size > 1) { |
423 | 0 | const_delimiter_ref.emplace(delimiter_col->get_data_at(0)); |
424 | 0 | const_search.emplace(&const_delimiter_ref.value()); |
425 | 0 | } |
426 | |
|
427 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
428 | 0 | auto str = str_col->get_data_at(content_const ? 0 : i); |
429 | 0 | auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i); |
430 | 0 | int32_t delimiter_size = delimiter.size; |
431 | |
|
432 | 0 | auto part_number = part_num_col->get_element(part_num_const ? 0 : i); |
433 | |
|
434 | 0 | if (part_number == 0 || delimiter_size == 0) { |
435 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
436 | 0 | continue; |
437 | 0 | } |
438 | | |
439 | 0 | if (part_number > 0) { |
440 | 0 | if (delimiter_size == 1) { |
441 | 0 | int32_t offset = -1; |
442 | 0 | int32_t num = 0; |
443 | 0 | while (num < part_number) { |
444 | 0 | size_t n = str.size - offset - 1; |
445 | 0 | const char* pos = reinterpret_cast<const char*>( |
446 | 0 | memchr(str.data + offset + 1, delimiter.data[0], n)); |
447 | 0 | if (pos != nullptr) { |
448 | 0 | offset = pos - str.data; |
449 | 0 | num++; |
450 | 0 | } else { |
451 | 0 | offset = str.size; |
452 | 0 | num = (num == 0) ? 0 : num + 1; |
453 | 0 | break; |
454 | 0 | } |
455 | 0 | } |
456 | |
|
457 | 0 | if (num == part_number) { |
458 | 0 | StringOP::push_value_string(std::string_view {str.data, (size_t)offset}, i, |
459 | 0 | res_chars, res_offsets); |
460 | 0 | } else { |
461 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
462 | 0 | res_chars, res_offsets); |
463 | 0 | } |
464 | 0 | } else { |
465 | | // For multi-character delimiters |
466 | | // Use pre-created StringRef and StringSearch for constant delimiters |
467 | 0 | StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value() |
468 | 0 | : StringRef(delimiter); |
469 | 0 | const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr; |
470 | 0 | StringSearch local_search(&delimiter_ref); |
471 | 0 | if (!search_ptr) { |
472 | 0 | search_ptr = &local_search; |
473 | 0 | } |
474 | |
|
475 | 0 | int32_t offset = -delimiter_size; |
476 | 0 | int32_t num = 0; |
477 | 0 | while (num < part_number) { |
478 | 0 | size_t n = str.size - offset - delimiter_size; |
479 | | // search first match delimter_ref index from src string among str_offset to end |
480 | 0 | const char* pos = search_ptr->search(str.data + offset + delimiter_size, n); |
481 | 0 | if (pos < str.data + str.size) { |
482 | 0 | offset = pos - str.data; |
483 | 0 | num++; |
484 | 0 | } else { |
485 | 0 | offset = str.size; |
486 | 0 | num = (num == 0) ? 0 : num + 1; |
487 | 0 | break; |
488 | 0 | } |
489 | 0 | } |
490 | |
|
491 | 0 | if (num == part_number) { |
492 | 0 | StringOP::push_value_string(std::string_view {str.data, (size_t)offset}, i, |
493 | 0 | res_chars, res_offsets); |
494 | 0 | } else { |
495 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
496 | 0 | res_chars, res_offsets); |
497 | 0 | } |
498 | 0 | } |
499 | 0 | } else { |
500 | 0 | int neg_part_number = -part_number; |
501 | 0 | auto str_str = str.to_string(); |
502 | 0 | int32_t offset = str.size; |
503 | 0 | int32_t pre_offset = offset; |
504 | 0 | int32_t num = 0; |
505 | 0 | auto substr = str_str; |
506 | | |
507 | | // Use pre-created StringRef for constant delimiters |
508 | 0 | StringRef delimiter_str = const_delimiter_ref |
509 | 0 | ? const_delimiter_ref.value() |
510 | 0 | : StringRef(delimiter.data, delimiter.size); |
511 | |
|
512 | 0 | while (num <= neg_part_number && offset >= 0) { |
513 | 0 | offset = (int)substr.rfind(delimiter_str, offset); |
514 | 0 | if (offset != -1) { |
515 | 0 | if (++num == neg_part_number) { |
516 | 0 | break; |
517 | 0 | } |
518 | 0 | pre_offset = offset; |
519 | 0 | offset = offset - 1; |
520 | 0 | substr = str_str.substr(0, pre_offset); |
521 | 0 | } else { |
522 | 0 | break; |
523 | 0 | } |
524 | 0 | } |
525 | 0 | num = (offset == -1 && num != 0) ? num + 1 : num; |
526 | |
|
527 | 0 | if (num == neg_part_number) { |
528 | 0 | if (offset == -1) { |
529 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
530 | 0 | res_chars, res_offsets); |
531 | 0 | } else { |
532 | 0 | StringOP::push_value_string( |
533 | 0 | std::string_view {str.data + offset + delimiter_size, |
534 | 0 | str.size - offset - delimiter_size}, |
535 | 0 | i, res_chars, res_offsets); |
536 | 0 | } |
537 | 0 | } else { |
538 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars, |
539 | 0 | res_offsets); |
540 | 0 | } |
541 | 0 | } |
542 | 0 | } |
543 | |
|
544 | 0 | block.get_by_position(result).column = std::move(res); |
545 | 0 | return Status::OK(); |
546 | 0 | } |
547 | | }; |
548 | | |
549 | | class FunctionSplitByString : public IFunction { |
550 | | public: |
551 | | static constexpr auto name = "split_by_string"; |
552 | | |
553 | 2 | static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); } |
554 | | using NullMapType = PaddedPODArray<UInt8>; |
555 | | |
556 | 1 | String get_name() const override { return name; } |
557 | | |
558 | 1 | bool is_variadic() const override { return false; } |
559 | | |
560 | 0 | size_t get_number_of_arguments() const override { return 2; } |
561 | | |
562 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
563 | 0 | DCHECK(is_string_type(arguments[0]->get_primitive_type())) |
564 | 0 | << "first argument for function: " << name << " should be string" |
565 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
566 | 0 | DCHECK(is_string_type(arguments[1]->get_primitive_type())) |
567 | 0 | << "second argument for function: " << name << " should be string" |
568 | 0 | << " and arguments[1] is " << arguments[1]->get_name(); |
569 | 0 | return std::make_shared<DataTypeArray>(make_nullable(arguments[0])); |
570 | 0 | } |
571 | | |
572 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
573 | 0 | uint32_t result, size_t input_rows_count) const override { |
574 | 0 | DCHECK_EQ(arguments.size(), 2); |
575 | |
|
576 | 0 | const auto& [src_column, left_const] = |
577 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
578 | 0 | const auto& [right_column, right_const] = |
579 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
580 | |
|
581 | 0 | DataTypePtr right_column_type = block.get_by_position(arguments[1]).type; |
582 | 0 | DataTypePtr src_column_type = block.get_by_position(arguments[0]).type; |
583 | 0 | auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(), |
584 | 0 | ColumnArray::ColumnOffsets::create()); |
585 | |
|
586 | 0 | dest_column_ptr->resize(0); |
587 | 0 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
588 | |
|
589 | 0 | auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data()); |
590 | 0 | auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get(); |
591 | |
|
592 | 0 | const auto* col_str = assert_cast<const ColumnString*>(src_column.get()); |
593 | |
|
594 | 0 | const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get()); |
595 | |
|
596 | 0 | std::visit( |
597 | 0 | [&](auto src_const, auto delimiter_const) { |
598 | 0 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, |
599 | 0 | *dest_nested_column, dest_offsets, |
600 | 0 | input_rows_count); |
601 | 0 | }, Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_ |
602 | 0 | make_bool_variant(left_const), make_bool_variant(right_const)); |
603 | | |
604 | | // all elements in dest_nested_column are not null |
605 | 0 | dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(), |
606 | 0 | false); |
607 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
608 | |
|
609 | 0 | return Status::OK(); |
610 | 0 | } |
611 | | |
612 | | private: |
613 | | template <bool src_const, bool delimiter_const> |
614 | | void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column, |
615 | | IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets, |
616 | 0 | size_t size) const { |
617 | 0 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); |
618 | 0 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); |
619 | 0 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); |
620 | 0 | column_string_chars.reserve(0); |
621 | |
|
622 | 0 | ColumnArray::Offset64 string_pos = 0; |
623 | 0 | ColumnArray::Offset64 dest_pos = 0; |
624 | |
|
625 | 0 | StringSearch search; |
626 | 0 | StringRef delimiter_ref_for_search; |
627 | |
|
628 | 0 | if constexpr (delimiter_const) { |
629 | 0 | delimiter_ref_for_search = delimiter_column.get_data_at(0); |
630 | 0 | search.set_pattern(&delimiter_ref_for_search); |
631 | 0 | } |
632 | |
|
633 | 0 | for (size_t i = 0; i < size; i++) { |
634 | 0 | const StringRef str_ref = |
635 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
636 | 0 | const StringRef delimiter_ref = |
637 | 0 | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); |
638 | |
|
639 | 0 | if (str_ref.size == 0) { |
640 | 0 | dest_offsets.push_back(dest_pos); |
641 | 0 | continue; |
642 | 0 | } |
643 | 0 | if (delimiter_ref.size == 0) { |
644 | 0 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, |
645 | 0 | string_pos, dest_pos); |
646 | 0 | } else { |
647 | 0 | if constexpr (!delimiter_const) { |
648 | 0 | search.set_pattern(&delimiter_ref); |
649 | 0 | } |
650 | 0 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
651 | 0 | const size_t str_offset = str_pos; |
652 | 0 | const size_t old_size = column_string_chars.size(); |
653 | | // search first match delimter_ref index from src string among str_offset to end |
654 | 0 | const char* result_start = |
655 | 0 | search.search(str_ref.data + str_offset, str_ref.size - str_offset); |
656 | | // compute split part size |
657 | 0 | const size_t split_part_size = result_start - str_ref.data - str_offset; |
658 | | // save dist string split part |
659 | 0 | if (split_part_size > 0) { |
660 | 0 | const size_t new_size = old_size + split_part_size; |
661 | 0 | column_string_chars.resize(new_size); |
662 | 0 | memcpy_small_allow_read_write_overflow15( |
663 | 0 | column_string_chars.data() + old_size, str_ref.data + str_offset, |
664 | 0 | split_part_size); |
665 | | // add dist string offset |
666 | 0 | string_pos += split_part_size; |
667 | 0 | } |
668 | 0 | column_string_offsets.push_back(string_pos); |
669 | | // array offset + 1 |
670 | 0 | dest_pos++; |
671 | | // add src string str_pos to next search start |
672 | 0 | str_pos += split_part_size + delimiter_ref.size; |
673 | 0 | } |
674 | 0 | } |
675 | 0 | dest_offsets.push_back(dest_pos); |
676 | 0 | } |
677 | 0 | } Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
678 | | |
679 | | void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars, |
680 | | ColumnString::Offsets& column_string_offsets, |
681 | | ColumnArray::Offset64& string_pos, |
682 | 0 | ColumnArray::Offset64& dest_pos) const { |
683 | 0 | const size_t old_size = column_string_chars.size(); |
684 | 0 | const size_t new_size = old_size + str_ref.size; |
685 | 0 | column_string_chars.resize(new_size); |
686 | 0 | memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size); |
687 | 0 | if (simd::VStringFunctions::is_ascii(str_ref)) { |
688 | 0 | const auto size = str_ref.size; |
689 | |
|
690 | 0 | const auto nested_old_size = column_string_offsets.size(); |
691 | 0 | const auto nested_new_size = nested_old_size + size; |
692 | 0 | column_string_offsets.resize(nested_new_size); |
693 | 0 | std::iota(column_string_offsets.data() + nested_old_size, |
694 | 0 | column_string_offsets.data() + nested_new_size, string_pos + 1); |
695 | |
|
696 | 0 | string_pos += size; |
697 | 0 | dest_pos += size; |
698 | | // The above code is equivalent to the code in the following comment. |
699 | | // for (size_t i = 0; i < str_ref.size; i++) { |
700 | | // string_pos++; |
701 | | // column_string_offsets.push_back(string_pos); |
702 | | // (*dest_nested_null_map).push_back(false); |
703 | | // dest_pos++; |
704 | | // } |
705 | 0 | } else { |
706 | 0 | for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) { |
707 | 0 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]]; |
708 | |
|
709 | 0 | string_pos += utf8_char_len; |
710 | 0 | column_string_offsets.push_back(string_pos); |
711 | 0 | dest_pos++; |
712 | 0 | } |
713 | 0 | } |
714 | 0 | } |
715 | | }; |
716 | | |
717 | | enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS }; |
718 | | |
719 | | template <FunctionCountSubStringType type> |
720 | | class FunctionCountSubString : public IFunction { |
721 | | public: |
722 | | static constexpr auto name = "count_substrings"; |
723 | | static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3; |
724 | | |
725 | 223 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv Line | Count | Source | 725 | 45 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv Line | Count | Source | 725 | 178 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
|
726 | | using NullMapType = PaddedPODArray<UInt8>; |
727 | | |
728 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev |
729 | | |
730 | 0 | size_t get_number_of_arguments() const override { return arg_count; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv |
731 | | |
732 | 219 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
733 | 219 | return std::make_shared<DataTypeInt32>(); |
734 | 219 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 732 | 43 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 733 | 43 | return std::make_shared<DataTypeInt32>(); | 734 | 43 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 732 | 176 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 733 | 176 | return std::make_shared<DataTypeInt32>(); | 734 | 176 | } |
|
735 | | |
736 | 2 | DataTypes get_variadic_argument_types_impl() const override { |
737 | 2 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
738 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; |
739 | 1 | } else { |
740 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
741 | 1 | std::make_shared<DataTypeInt32>()}; |
742 | 1 | } |
743 | 2 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv Line | Count | Source | 736 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 737 | 1 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 738 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 739 | | } else { | 740 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 741 | | std::make_shared<DataTypeInt32>()}; | 742 | | } | 743 | 1 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv Line | Count | Source | 736 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 737 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 738 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 739 | 1 | } else { | 740 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 741 | 1 | std::make_shared<DataTypeInt32>()}; | 742 | 1 | } | 743 | 1 | } |
|
744 | | |
745 | 221 | bool is_variadic() const override { return true; }_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv Line | Count | Source | 745 | 44 | bool is_variadic() const override { return true; } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv Line | Count | Source | 745 | 177 | bool is_variadic() const override { return true; } |
|
746 | | |
747 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
748 | 189 | uint32_t result, size_t input_rows_count) const override { |
749 | 189 | DCHECK(arg_count); |
750 | 189 | bool col_const[arg_count]; |
751 | 189 | ColumnPtr argument_columns[arg_count]; |
752 | 722 | for (int i = 0; i < arg_count; ++i) { |
753 | 533 | std::tie(argument_columns[i], col_const[i]) = |
754 | 533 | unpack_if_const(block.get_by_position(arguments[i]).column); |
755 | 533 | } |
756 | | |
757 | 189 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); |
758 | 189 | auto& dest_column_data = dest_column_ptr->get_data(); |
759 | | |
760 | 189 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
761 | 34 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
762 | 34 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
763 | 34 | std::visit( |
764 | 34 | [&](auto str_const, auto pattern_const) { |
765 | 34 | _execute<str_const, pattern_const>(src_column_string, pattern_column, |
766 | 34 | dest_column_data, input_rows_count); |
767 | 34 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_ Line | Count | Source | 764 | 12 | [&](auto str_const, auto pattern_const) { | 765 | 12 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 766 | 12 | dest_column_data, input_rows_count); | 767 | 12 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_ Line | Count | Source | 764 | 11 | [&](auto str_const, auto pattern_const) { | 765 | 11 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 766 | 11 | dest_column_data, input_rows_count); | 767 | 11 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_ Line | Count | Source | 764 | 11 | [&](auto str_const, auto pattern_const) { | 765 | 11 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 766 | 11 | dest_column_data, input_rows_count); | 767 | 11 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_ |
768 | 34 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); |
769 | 155 | } else { |
770 | 155 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
771 | 155 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
772 | 155 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); |
773 | 155 | std::visit( |
774 | 155 | [&](auto str_const, auto pattern_const, auto start_pos_const) { |
775 | 155 | _execute<str_const, pattern_const, start_pos_const>( |
776 | 155 | src_column_string, pattern_column, start_pos_column, |
777 | 155 | dest_column_data, input_rows_count); |
778 | 155 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 774 | 23 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 23 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 23 | src_column_string, pattern_column, start_pos_column, | 777 | 23 | dest_column_data, input_rows_count); | 778 | 23 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 774 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 22 | src_column_string, pattern_column, start_pos_column, | 777 | 22 | dest_column_data, input_rows_count); | 778 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 774 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 22 | src_column_string, pattern_column, start_pos_column, | 777 | 22 | dest_column_data, input_rows_count); | 778 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 774 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 22 | src_column_string, pattern_column, start_pos_column, | 777 | 22 | dest_column_data, input_rows_count); | 778 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 774 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 22 | src_column_string, pattern_column, start_pos_column, | 777 | 22 | dest_column_data, input_rows_count); | 778 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 774 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 22 | src_column_string, pattern_column, start_pos_column, | 777 | 22 | dest_column_data, input_rows_count); | 778 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 774 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 22 | src_column_string, pattern_column, start_pos_column, | 777 | 22 | dest_column_data, input_rows_count); | 778 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
779 | 155 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
780 | 155 | make_bool_variant(col_const[2])); |
781 | 155 | } |
782 | | |
783 | 189 | block.replace_by_position(result, std::move(dest_column_ptr)); |
784 | 189 | return Status::OK(); |
785 | 189 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 748 | 34 | uint32_t result, size_t input_rows_count) const override { | 749 | 34 | DCHECK(arg_count); | 750 | 34 | bool col_const[arg_count]; | 751 | 34 | ColumnPtr argument_columns[arg_count]; | 752 | 102 | for (int i = 0; i < arg_count; ++i) { | 753 | 68 | std::tie(argument_columns[i], col_const[i]) = | 754 | 68 | unpack_if_const(block.get_by_position(arguments[i]).column); | 755 | 68 | } | 756 | | | 757 | 34 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 758 | 34 | auto& dest_column_data = dest_column_ptr->get_data(); | 759 | | | 760 | 34 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 761 | 34 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 762 | 34 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 763 | 34 | std::visit( | 764 | 34 | [&](auto str_const, auto pattern_const) { | 765 | 34 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 766 | 34 | dest_column_data, input_rows_count); | 767 | 34 | }, | 768 | 34 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 769 | | } else { | 770 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 771 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 772 | | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 773 | | std::visit( | 774 | | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | | _execute<str_const, pattern_const, start_pos_const>( | 776 | | src_column_string, pattern_column, start_pos_column, | 777 | | dest_column_data, input_rows_count); | 778 | | }, | 779 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 780 | | make_bool_variant(col_const[2])); | 781 | | } | 782 | | | 783 | 34 | block.replace_by_position(result, std::move(dest_column_ptr)); | 784 | 34 | return Status::OK(); | 785 | 34 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 748 | 155 | uint32_t result, size_t input_rows_count) const override { | 749 | 155 | DCHECK(arg_count); | 750 | 155 | bool col_const[arg_count]; | 751 | 155 | ColumnPtr argument_columns[arg_count]; | 752 | 620 | for (int i = 0; i < arg_count; ++i) { | 753 | 465 | std::tie(argument_columns[i], col_const[i]) = | 754 | 465 | unpack_if_const(block.get_by_position(arguments[i]).column); | 755 | 465 | } | 756 | | | 757 | 155 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 758 | 155 | auto& dest_column_data = dest_column_ptr->get_data(); | 759 | | | 760 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 761 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 762 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 763 | | std::visit( | 764 | | [&](auto str_const, auto pattern_const) { | 765 | | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 766 | | dest_column_data, input_rows_count); | 767 | | }, | 768 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 769 | 155 | } else { | 770 | 155 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 771 | 155 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 772 | 155 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 773 | 155 | std::visit( | 774 | 155 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 775 | 155 | _execute<str_const, pattern_const, start_pos_const>( | 776 | 155 | src_column_string, pattern_column, start_pos_column, | 777 | 155 | dest_column_data, input_rows_count); | 778 | 155 | }, | 779 | 155 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 780 | 155 | make_bool_variant(col_const[2])); | 781 | 155 | } | 782 | | | 783 | 155 | block.replace_by_position(result, std::move(dest_column_ptr)); | 784 | 155 | return Status::OK(); | 785 | 155 | } |
|
786 | | |
787 | | private: |
788 | | template <bool src_const, bool pattern_const> |
789 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
790 | 34 | ColumnInt32::Container& dest_column_data, size_t size) const { |
791 | 81 | for (size_t i = 0; i < size; i++) { |
792 | 47 | const StringRef str_ref = |
793 | 47 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
794 | | |
795 | 47 | const StringRef pattern_ref = |
796 | 47 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
797 | 47 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); |
798 | 47 | } |
799 | 34 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 790 | 12 | ColumnInt32::Container& dest_column_data, size_t size) const { | 791 | 37 | for (size_t i = 0; i < size; i++) { | 792 | 25 | const StringRef str_ref = | 793 | 25 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 794 | | | 795 | 25 | const StringRef pattern_ref = | 796 | 25 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 797 | 25 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 798 | 25 | } | 799 | 12 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 790 | 11 | ColumnInt32::Container& dest_column_data, size_t size) const { | 791 | 22 | for (size_t i = 0; i < size; i++) { | 792 | 11 | const StringRef str_ref = | 793 | 11 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 794 | | | 795 | 11 | const StringRef pattern_ref = | 796 | 11 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 797 | 11 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 798 | 11 | } | 799 | 11 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 790 | 11 | ColumnInt32::Container& dest_column_data, size_t size) const { | 791 | 22 | for (size_t i = 0; i < size; i++) { | 792 | 11 | const StringRef str_ref = | 793 | 11 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 794 | | | 795 | 11 | const StringRef pattern_ref = | 796 | 11 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 797 | 11 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 798 | 11 | } | 799 | 11 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
800 | | |
801 | | template <bool src_const, bool pattern_const, bool start_pos_const> |
802 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
803 | | const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data, |
804 | 155 | size_t size) const { |
805 | 334 | for (size_t i = 0; i < size; i++) { |
806 | 179 | const StringRef str_ref = |
807 | 179 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
808 | 179 | const StringRef pattern_ref = |
809 | 179 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
810 | | // 1-based index |
811 | 179 | int32_t start_pos = |
812 | 179 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; |
813 | | |
814 | 179 | const char* p = str_ref.begin(); |
815 | 179 | const char* end = str_ref.end(); |
816 | 179 | int char_size = 0; |
817 | 1.22k | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { |
818 | 1.04k | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; |
819 | 1.04k | } |
820 | 179 | const auto start_byte_len = p - str_ref.begin(); |
821 | | |
822 | 179 | if (start_pos < 0 || start_byte_len >= str_ref.size) { |
823 | 115 | dest_column_data[i] = 0; |
824 | 115 | } else { |
825 | 64 | dest_column_data[i] = |
826 | 64 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); |
827 | 64 | } |
828 | 179 | } |
829 | 155 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 23 | size_t size) const { | 805 | 70 | for (size_t i = 0; i < size; i++) { | 806 | 47 | const StringRef str_ref = | 807 | 47 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 47 | const StringRef pattern_ref = | 809 | 47 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 47 | int32_t start_pos = | 812 | 47 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 47 | const char* p = str_ref.begin(); | 815 | 47 | const char* end = str_ref.end(); | 816 | 47 | int char_size = 0; | 817 | 316 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 269 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 269 | } | 820 | 47 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 47 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 31 | dest_column_data[i] = 0; | 824 | 31 | } else { | 825 | 16 | dest_column_data[i] = | 826 | 16 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 16 | } | 828 | 47 | } | 829 | 23 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 22 | size_t size) const { | 805 | 44 | for (size_t i = 0; i < size; i++) { | 806 | 22 | const StringRef str_ref = | 807 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 22 | const StringRef pattern_ref = | 809 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 22 | int32_t start_pos = | 812 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 22 | const char* p = str_ref.begin(); | 815 | 22 | const char* end = str_ref.end(); | 816 | 22 | int char_size = 0; | 817 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 129 | } | 820 | 22 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 14 | dest_column_data[i] = 0; | 824 | 14 | } else { | 825 | 8 | dest_column_data[i] = | 826 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 8 | } | 828 | 22 | } | 829 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 22 | size_t size) const { | 805 | 44 | for (size_t i = 0; i < size; i++) { | 806 | 22 | const StringRef str_ref = | 807 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 22 | const StringRef pattern_ref = | 809 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 22 | int32_t start_pos = | 812 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 22 | const char* p = str_ref.begin(); | 815 | 22 | const char* end = str_ref.end(); | 816 | 22 | int char_size = 0; | 817 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 129 | } | 820 | 22 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 14 | dest_column_data[i] = 0; | 824 | 14 | } else { | 825 | 8 | dest_column_data[i] = | 826 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 8 | } | 828 | 22 | } | 829 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 22 | size_t size) const { | 805 | 44 | for (size_t i = 0; i < size; i++) { | 806 | 22 | const StringRef str_ref = | 807 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 22 | const StringRef pattern_ref = | 809 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 22 | int32_t start_pos = | 812 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 22 | const char* p = str_ref.begin(); | 815 | 22 | const char* end = str_ref.end(); | 816 | 22 | int char_size = 0; | 817 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 129 | } | 820 | 22 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 14 | dest_column_data[i] = 0; | 824 | 14 | } else { | 825 | 8 | dest_column_data[i] = | 826 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 8 | } | 828 | 22 | } | 829 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 22 | size_t size) const { | 805 | 44 | for (size_t i = 0; i < size; i++) { | 806 | 22 | const StringRef str_ref = | 807 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 22 | const StringRef pattern_ref = | 809 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 22 | int32_t start_pos = | 812 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 22 | const char* p = str_ref.begin(); | 815 | 22 | const char* end = str_ref.end(); | 816 | 22 | int char_size = 0; | 817 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 129 | } | 820 | 22 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 14 | dest_column_data[i] = 0; | 824 | 14 | } else { | 825 | 8 | dest_column_data[i] = | 826 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 8 | } | 828 | 22 | } | 829 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 22 | size_t size) const { | 805 | 44 | for (size_t i = 0; i < size; i++) { | 806 | 22 | const StringRef str_ref = | 807 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 22 | const StringRef pattern_ref = | 809 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 22 | int32_t start_pos = | 812 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 22 | const char* p = str_ref.begin(); | 815 | 22 | const char* end = str_ref.end(); | 816 | 22 | int char_size = 0; | 817 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 129 | } | 820 | 22 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 14 | dest_column_data[i] = 0; | 824 | 14 | } else { | 825 | 8 | dest_column_data[i] = | 826 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 8 | } | 828 | 22 | } | 829 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 804 | 22 | size_t size) const { | 805 | 44 | for (size_t i = 0; i < size; i++) { | 806 | 22 | const StringRef str_ref = | 807 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 808 | 22 | const StringRef pattern_ref = | 809 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 810 | | // 1-based index | 811 | 22 | int32_t start_pos = | 812 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 813 | | | 814 | 22 | const char* p = str_ref.begin(); | 815 | 22 | const char* end = str_ref.end(); | 816 | 22 | int char_size = 0; | 817 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 818 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 819 | 129 | } | 820 | 22 | const auto start_byte_len = p - str_ref.begin(); | 821 | | | 822 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 823 | 14 | dest_column_data[i] = 0; | 824 | 14 | } else { | 825 | 8 | dest_column_data[i] = | 826 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 827 | 8 | } | 828 | 22 | } | 829 | 22 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
830 | | |
831 | 208 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { |
832 | 208 | size_t old_size = pos; |
833 | 208 | size_t str_size = str_ref.size; |
834 | 1.15k | while (pos < str_size && |
835 | 1.15k | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, |
836 | 1.06k | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { |
837 | 948 | pos++; |
838 | 948 | } |
839 | 208 | return pos - old_size; |
840 | 208 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 831 | 56 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 832 | 56 | size_t old_size = pos; | 833 | 56 | size_t str_size = str_ref.size; | 834 | 372 | while (pos < str_size && | 835 | 372 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 836 | 344 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 837 | 316 | pos++; | 838 | 316 | } | 839 | 56 | return pos - old_size; | 840 | 56 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 831 | 152 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 832 | 152 | size_t old_size = pos; | 833 | 152 | size_t str_size = str_ref.size; | 834 | 784 | while (pos < str_size && | 835 | 784 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 836 | 720 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 837 | 632 | pos++; | 838 | 632 | } | 839 | 152 | return pos - old_size; | 840 | 152 | } |
|
841 | | |
842 | 111 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { |
843 | 111 | int count = 0; |
844 | 111 | if (str_ref.size == 0 || pattern_ref.size == 0) { |
845 | 19 | return 0; |
846 | 92 | } else { |
847 | 208 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
848 | 208 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); |
849 | 208 | if (res_pos == (str_ref.size - str_pos)) { |
850 | 92 | break; // not find |
851 | 92 | } |
852 | 116 | count++; |
853 | 116 | str_pos = str_pos + res_pos + pattern_ref.size; |
854 | 116 | } |
855 | 92 | } |
856 | 92 | return count; |
857 | 111 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 842 | 47 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 843 | 47 | int count = 0; | 844 | 47 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 845 | 19 | return 0; | 846 | 28 | } else { | 847 | 56 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 848 | 56 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 849 | 56 | if (res_pos == (str_ref.size - str_pos)) { | 850 | 28 | break; // not find | 851 | 28 | } | 852 | 28 | count++; | 853 | 28 | str_pos = str_pos + res_pos + pattern_ref.size; | 854 | 28 | } | 855 | 28 | } | 856 | 28 | return count; | 857 | 47 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 842 | 64 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 843 | 64 | int count = 0; | 844 | 64 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 845 | 0 | return 0; | 846 | 64 | } else { | 847 | 152 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 848 | 152 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 849 | 152 | if (res_pos == (str_ref.size - str_pos)) { | 850 | 64 | break; // not find | 851 | 64 | } | 852 | 88 | count++; | 853 | 88 | str_pos = str_pos + res_pos + pattern_ref.size; | 854 | 88 | } | 855 | 64 | } | 856 | 64 | return count; | 857 | 64 | } |
|
858 | | }; |
859 | | |
860 | 1 | void register_function_string_search(SimpleFunctionFactory& factory) { |
861 | 1 | factory.register_function<FunctionStringLocatePos>(); |
862 | 1 | factory.register_function<FunctionSplitPart>(); |
863 | 1 | factory.register_function<FunctionSplitByString>(); |
864 | 1 | factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>(); |
865 | 1 | factory.register_function< |
866 | 1 | FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>(); |
867 | 1 | factory.register_function<FunctionSubstringIndex>(); |
868 | | |
869 | 1 | factory.register_alias(FunctionStringLocatePos::name, "position"); |
870 | 1 | } |
871 | | |
872 | | #include "common/compile_check_avoid_end.h" |
873 | | } // namespace doris |