be/src/exprs/function/function_string_search.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cstddef> |
19 | | #include <cstring> |
20 | | #include <numeric> |
21 | | #include <string> |
22 | | #include <string_view> |
23 | | #include <vector> |
24 | | |
25 | | #include "common/status.h" |
26 | | #include "core/assert_cast.h" |
27 | | #include "core/block/block.h" |
28 | | #include "core/block/column_numbers.h" |
29 | | #include "core/column/column_array.h" |
30 | | #include "core/column/column_const.h" |
31 | | #include "core/column/column_nullable.h" |
32 | | #include "core/column/column_string.h" |
33 | | #include "core/column/column_vector.h" |
34 | | #include "core/data_type/data_type_array.h" |
35 | | #include "core/data_type/data_type_nullable.h" |
36 | | #include "core/data_type/data_type_number.h" |
37 | | #include "core/data_type/data_type_string.h" |
38 | | #include "core/data_type/define_primitive_type.h" |
39 | | #include "core/memcmp_small.h" |
40 | | #include "core/memcpy_small.h" |
41 | | #include "core/pod_array_fwd.h" |
42 | | #include "core/string_ref.h" |
43 | | #include "exec/common/stringop_substring.h" |
44 | | #include "exec/common/template_helpers.hpp" |
45 | | #include "exec/common/util.hpp" |
46 | | #include "exprs/function/function.h" |
47 | | #include "exprs/function/function_helpers.h" |
48 | | #include "exprs/function/simple_function_factory.h" |
49 | | #include "exprs/function_context.h" |
50 | | #include "util/simd/vstring_function.h" |
51 | | #include "util/string_search.hpp" |
52 | | |
53 | | namespace doris { |
54 | | #include "common/compile_check_avoid_begin.h" |
55 | | |
56 | | class FunctionStringLocatePos : public IFunction { |
57 | | public: |
58 | | static constexpr auto name = "locate"; |
59 | 816 | static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); } |
60 | 0 | String get_name() const override { return name; } |
61 | 0 | size_t get_number_of_arguments() const override { return 3; } |
62 | | |
63 | 814 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
64 | 814 | return std::make_shared<DataTypeInt32>(); |
65 | 814 | } |
66 | | |
67 | 1 | DataTypes get_variadic_argument_types_impl() const override { |
68 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
69 | 1 | std::make_shared<DataTypeInt32>()}; |
70 | 1 | } |
71 | | |
72 | 815 | bool is_variadic() const override { return true; } |
73 | | |
74 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
75 | 576 | uint32_t result, size_t input_rows_count) const override { |
76 | 576 | if (arguments.size() != 3) { |
77 | 0 | return Status::InvalidArgument("Function {} requires 3 arguments, but got {}", |
78 | 0 | get_name(), arguments.size()); |
79 | 0 | } |
80 | 576 | bool col_const[3]; |
81 | 576 | ColumnPtr argument_columns[3]; |
82 | 2.30k | for (int i = 0; i < 3; ++i) { |
83 | 1.72k | std::tie(argument_columns[i], col_const[i]) = |
84 | 1.72k | unpack_if_const(block.get_by_position(arguments[i]).column); |
85 | 1.72k | } |
86 | | |
87 | 576 | const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get()); |
88 | 576 | const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get()); |
89 | 576 | const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
90 | | |
91 | 576 | ColumnInt32::MutablePtr col_res = ColumnInt32::create(); |
92 | 576 | auto& vec_res = col_res->get_data(); |
93 | 576 | vec_res.resize(block.rows()); |
94 | | |
95 | 576 | const bool is_ascii = col_left->is_ascii() && col_right->is_ascii(); |
96 | | |
97 | 576 | if (col_const[0]) { |
98 | 246 | std::visit( |
99 | 246 | [&](auto is_ascii, auto str_const, auto pos_const) { |
100 | 246 | scalar_search<is_ascii, str_const, pos_const>( |
101 | 246 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, |
102 | 246 | input_rows_count); |
103 | 246 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 99 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 22 | input_rows_count); | 103 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 99 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 22 | input_rows_count); | 103 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 99 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 22 | input_rows_count); | 103 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 99 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 60 | input_rows_count); | 103 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 99 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 60 | input_rows_count); | 103 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 99 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 100 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 101 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 102 | 60 | input_rows_count); | 103 | 60 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
104 | 246 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
105 | 246 | make_bool_variant(col_const[2])); |
106 | | |
107 | 330 | } else { |
108 | 330 | std::visit( |
109 | 330 | [&](auto is_ascii, auto str_const, auto pos_const) { |
110 | 330 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, |
111 | 330 | col_pos->get_data(), vec_res, |
112 | 330 | input_rows_count); |
113 | 330 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 23 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 23 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 23 | col_pos->get_data(), vec_res, | 112 | 23 | input_rows_count); | 113 | 23 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 109 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 22 | col_pos->get_data(), vec_res, | 112 | 22 | input_rows_count); | 113 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 22 | col_pos->get_data(), vec_res, | 112 | 22 | input_rows_count); | 113 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Line | Count | Source | 109 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 22 | col_pos->get_data(), vec_res, | 112 | 22 | input_rows_count); | 113 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 109 | 61 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 61 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 61 | col_pos->get_data(), vec_res, | 112 | 61 | input_rows_count); | 113 | 61 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 60 | col_pos->get_data(), vec_res, | 112 | 60 | input_rows_count); | 113 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 109 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 60 | col_pos->get_data(), vec_res, | 112 | 60 | input_rows_count); | 113 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 109 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 110 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 111 | 60 | col_pos->get_data(), vec_res, | 112 | 60 | input_rows_count); | 113 | 60 | }, |
|
114 | 330 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
115 | 330 | make_bool_variant(col_const[2])); |
116 | 330 | } |
117 | 576 | block.replace_by_position(result, std::move(col_res)); |
118 | 576 | return Status::OK(); |
119 | 576 | } |
120 | | |
121 | | private: |
122 | | template <bool is_ascii, bool str_const, bool pos_const> |
123 | | void scalar_search(const StringRef& ldata, const ColumnString* col_right, |
124 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
125 | 246 | size_t size) const { |
126 | 246 | res.resize(size); |
127 | 246 | StringRef substr(ldata.data, ldata.size); |
128 | 246 | StringSearch search {&substr}; |
129 | | |
130 | 492 | for (int i = 0; i < size; ++i) { |
131 | 246 | res[i] = locate_pos<is_ascii>(substr, |
132 | 246 | col_right->get_data_at(index_check_const<str_const>(i)), |
133 | 246 | search, posdata[index_check_const<pos_const>(i)]); |
134 | 246 | } |
135 | 246 | } _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 22 | size_t size) const { | 126 | 22 | res.resize(size); | 127 | 22 | StringRef substr(ldata.data, ldata.size); | 128 | 22 | StringSearch search {&substr}; | 129 | | | 130 | 44 | for (int i = 0; i < size; ++i) { | 131 | 22 | res[i] = locate_pos<is_ascii>(substr, | 132 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 22 | } | 135 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 22 | size_t size) const { | 126 | 22 | res.resize(size); | 127 | 22 | StringRef substr(ldata.data, ldata.size); | 128 | 22 | StringSearch search {&substr}; | 129 | | | 130 | 44 | for (int i = 0; i < size; ++i) { | 131 | 22 | res[i] = locate_pos<is_ascii>(substr, | 132 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 22 | } | 135 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 22 | size_t size) const { | 126 | 22 | res.resize(size); | 127 | 22 | StringRef substr(ldata.data, ldata.size); | 128 | 22 | StringSearch search {&substr}; | 129 | | | 130 | 44 | for (int i = 0; i < size; ++i) { | 131 | 22 | res[i] = locate_pos<is_ascii>(substr, | 132 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 22 | } | 135 | 22 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 60 | size_t size) const { | 126 | 60 | res.resize(size); | 127 | 60 | StringRef substr(ldata.data, ldata.size); | 128 | 60 | StringSearch search {&substr}; | 129 | | | 130 | 120 | for (int i = 0; i < size; ++i) { | 131 | 60 | res[i] = locate_pos<is_ascii>(substr, | 132 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 60 | } | 135 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 60 | size_t size) const { | 126 | 60 | res.resize(size); | 127 | 60 | StringRef substr(ldata.data, ldata.size); | 128 | 60 | StringSearch search {&substr}; | 129 | | | 130 | 120 | for (int i = 0; i < size; ++i) { | 131 | 60 | res[i] = locate_pos<is_ascii>(substr, | 132 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 60 | } | 135 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 125 | 60 | size_t size) const { | 126 | 60 | res.resize(size); | 127 | 60 | StringRef substr(ldata.data, ldata.size); | 128 | 60 | StringSearch search {&substr}; | 129 | | | 130 | 120 | for (int i = 0; i < size; ++i) { | 131 | 60 | res[i] = locate_pos<is_ascii>(substr, | 132 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 133 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 134 | 60 | } | 135 | 60 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m |
136 | | |
137 | | template <bool is_ascii, bool str_const, bool pos_const> |
138 | | void vector_search(const ColumnString* col_left, const ColumnString* col_right, |
139 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
140 | 330 | size_t size) const { |
141 | 330 | res.resize(size); |
142 | 330 | StringSearch search; |
143 | 774 | for (int i = 0; i < size; ++i) { |
144 | 444 | StringRef substr = col_left->get_data_at(i); |
145 | 444 | search.set_pattern(&substr); |
146 | 444 | res[i] = locate_pos<is_ascii>(substr, |
147 | 444 | col_right->get_data_at(index_check_const<str_const>(i)), |
148 | 444 | search, posdata[index_check_const<pos_const>(i)]); |
149 | 444 | } |
150 | 330 | } _ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 23 | size_t size) const { | 141 | 23 | res.resize(size); | 142 | 23 | StringSearch search; | 143 | 71 | for (int i = 0; i < size; ++i) { | 144 | 48 | StringRef substr = col_left->get_data_at(i); | 145 | 48 | search.set_pattern(&substr); | 146 | 48 | res[i] = locate_pos<is_ascii>(substr, | 147 | 48 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 48 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 48 | } | 150 | 23 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 22 | size_t size) const { | 141 | 22 | res.resize(size); | 142 | 22 | StringSearch search; | 143 | 44 | for (int i = 0; i < size; ++i) { | 144 | 22 | StringRef substr = col_left->get_data_at(i); | 145 | 22 | search.set_pattern(&substr); | 146 | 22 | res[i] = locate_pos<is_ascii>(substr, | 147 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 22 | } | 150 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 22 | size_t size) const { | 141 | 22 | res.resize(size); | 142 | 22 | StringSearch search; | 143 | 44 | for (int i = 0; i < size; ++i) { | 144 | 22 | StringRef substr = col_left->get_data_at(i); | 145 | 22 | search.set_pattern(&substr); | 146 | 22 | res[i] = locate_pos<is_ascii>(substr, | 147 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 22 | } | 150 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 22 | size_t size) const { | 141 | 22 | res.resize(size); | 142 | 22 | StringSearch search; | 143 | 44 | for (int i = 0; i < size; ++i) { | 144 | 22 | StringRef substr = col_left->get_data_at(i); | 145 | 22 | search.set_pattern(&substr); | 146 | 22 | res[i] = locate_pos<is_ascii>(substr, | 147 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 22 | } | 150 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 61 | size_t size) const { | 141 | 61 | res.resize(size); | 142 | 61 | StringSearch search; | 143 | 211 | for (int i = 0; i < size; ++i) { | 144 | 150 | StringRef substr = col_left->get_data_at(i); | 145 | 150 | search.set_pattern(&substr); | 146 | 150 | res[i] = locate_pos<is_ascii>(substr, | 147 | 150 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 150 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 150 | } | 150 | 61 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 60 | size_t size) const { | 141 | 60 | res.resize(size); | 142 | 60 | StringSearch search; | 143 | 120 | for (int i = 0; i < size; ++i) { | 144 | 60 | StringRef substr = col_left->get_data_at(i); | 145 | 60 | search.set_pattern(&substr); | 146 | 60 | res[i] = locate_pos<is_ascii>(substr, | 147 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 60 | } | 150 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 60 | size_t size) const { | 141 | 60 | res.resize(size); | 142 | 60 | StringSearch search; | 143 | 120 | for (int i = 0; i < size; ++i) { | 144 | 60 | StringRef substr = col_left->get_data_at(i); | 145 | 60 | search.set_pattern(&substr); | 146 | 60 | res[i] = locate_pos<is_ascii>(substr, | 147 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 60 | } | 150 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 140 | 60 | size_t size) const { | 141 | 60 | res.resize(size); | 142 | 60 | StringSearch search; | 143 | 120 | for (int i = 0; i < size; ++i) { | 144 | 60 | StringRef substr = col_left->get_data_at(i); | 145 | 60 | search.set_pattern(&substr); | 146 | 60 | res[i] = locate_pos<is_ascii>(substr, | 147 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 148 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 149 | 60 | } | 150 | 60 | } |
|
151 | | |
152 | | template <bool is_ascii> |
153 | 690 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { |
154 | 690 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { |
155 | | // BEHAVIOR COMPATIBLE WITH MYSQL |
156 | | // locate('','') locate('','',1) locate('','',2) |
157 | | // 1 1 0 |
158 | 11 | return 1; |
159 | 11 | } |
160 | 679 | if (is_ascii) { |
161 | 499 | return locate_pos_ascii(substr, str, search, start_pos); |
162 | 499 | } else { |
163 | 180 | return locate_pos_utf8(substr, str, search, start_pos); |
164 | 180 | } |
165 | 679 | } _ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 153 | 180 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 154 | 180 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 155 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 156 | | // locate('','') locate('','',1) locate('','',2) | 157 | | // 1 1 0 | 158 | 0 | return 1; | 159 | 0 | } | 160 | 180 | if (is_ascii) { | 161 | 0 | return locate_pos_ascii(substr, str, search, start_pos); | 162 | 180 | } else { | 163 | 180 | return locate_pos_utf8(substr, str, search, start_pos); | 164 | 180 | } | 165 | 180 | } |
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 153 | 510 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 154 | 510 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 155 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 156 | | // locate('','') locate('','',1) locate('','',2) | 157 | | // 1 1 0 | 158 | 11 | return 1; | 159 | 11 | } | 160 | 499 | if (is_ascii) { | 161 | 499 | return locate_pos_ascii(substr, str, search, start_pos); | 162 | 499 | } else { | 163 | 0 | return locate_pos_utf8(substr, str, search, start_pos); | 164 | 0 | } | 165 | 499 | } |
|
166 | | |
167 | | int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search, |
168 | 180 | int start_pos) const { |
169 | 180 | std::vector<size_t> index; |
170 | 180 | size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index); |
171 | 180 | if (start_pos <= 0 || start_pos > char_len) { |
172 | 43 | return 0; |
173 | 43 | } |
174 | 137 | if (substr.size == 0) { |
175 | 17 | return start_pos; |
176 | 17 | } |
177 | | // Input start_pos starts from 1. |
178 | 120 | StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]); |
179 | 120 | int32_t match_pos = search.search(&adjusted_str); |
180 | 120 | if (match_pos >= 0) { |
181 | | // Hive returns the position in the original string starting from 1. |
182 | 104 | return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos); |
183 | 104 | } else { |
184 | 16 | return 0; |
185 | 16 | } |
186 | 120 | } |
187 | | |
188 | | int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search, |
189 | 499 | int start_pos) const { |
190 | 499 | if (start_pos <= 0 || start_pos > str.size) { |
191 | 367 | return 0; |
192 | 367 | } |
193 | 132 | if (substr.size == 0) { |
194 | 36 | return start_pos; |
195 | 36 | } |
196 | | // Input start_pos starts from 1. |
197 | 96 | StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1); |
198 | 96 | int32_t match_pos = search.search(&adjusted_str); |
199 | 96 | if (match_pos >= 0) { |
200 | | // Hive returns the position in the original string starting from 1. |
201 | 40 | return start_pos + match_pos; |
202 | 56 | } else { |
203 | 56 | return 0; |
204 | 56 | } |
205 | 96 | } |
206 | | }; |
207 | | |
208 | | class FunctionSplitPart : public IFunction { |
209 | | public: |
210 | | static constexpr auto name = "split_part"; |
211 | 2 | static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); } |
212 | 1 | String get_name() const override { return name; } |
213 | 0 | size_t get_number_of_arguments() const override { return 3; } |
214 | | |
215 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
216 | 0 | return make_nullable(std::make_shared<DataTypeString>()); |
217 | 0 | } |
218 | | |
219 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
220 | 0 | uint32_t result, size_t input_rows_count) const override { |
221 | 0 | DCHECK_EQ(arguments.size(), 3); |
222 | |
|
223 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
224 | | // Create a zero column to simply implement |
225 | 0 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
226 | 0 | auto res = ColumnString::create(); |
227 | |
|
228 | 0 | auto& null_map_data = null_map->get_data(); |
229 | 0 | auto& res_offsets = res->get_offsets(); |
230 | 0 | auto& res_chars = res->get_chars(); |
231 | 0 | res_offsets.resize(input_rows_count); |
232 | |
|
233 | 0 | const size_t argument_size = arguments.size(); |
234 | 0 | std::vector<ColumnPtr> argument_columns(argument_size); |
235 | 0 | for (size_t i = 0; i < argument_size; ++i) { |
236 | 0 | argument_columns[i] = |
237 | 0 | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
238 | 0 | if (const auto* nullable = |
239 | 0 | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
240 | | // Danger: Here must dispose the null map data first! Because |
241 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
242 | | // of column nullable mem of null map |
243 | 0 | VectorizedUtils::update_null_map(null_map->get_data(), |
244 | 0 | nullable->get_null_map_data()); |
245 | 0 | argument_columns[i] = nullable->get_nested_column_ptr(); |
246 | 0 | } |
247 | 0 | } |
248 | |
|
249 | 0 | const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
250 | |
|
251 | 0 | const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get()); |
252 | |
|
253 | 0 | const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
254 | 0 | const auto& part_num_col_data = part_num_col->get_data(); |
255 | |
|
256 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
257 | 0 | if (part_num_col_data[i] == 0) { |
258 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
259 | 0 | continue; |
260 | 0 | } |
261 | | |
262 | 0 | auto delimiter = delimiter_col->get_data_at(i); |
263 | 0 | auto delimiter_str = delimiter_col->get_data_at(i).to_string(); |
264 | 0 | auto part_number = part_num_col_data[i]; |
265 | 0 | auto str = str_col->get_data_at(i); |
266 | 0 | if (delimiter.size == 0) { |
267 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
268 | 0 | continue; |
269 | 0 | } |
270 | | |
271 | 0 | if (part_number > 0) { |
272 | 0 | if (delimiter.size == 1) { |
273 | | // If delimiter is a char, use memchr to split |
274 | 0 | int32_t pre_offset = -1; |
275 | 0 | int32_t offset = -1; |
276 | 0 | int32_t num = 0; |
277 | 0 | while (num < part_number) { |
278 | 0 | pre_offset = offset; |
279 | 0 | size_t n = str.size - offset - 1; |
280 | 0 | const char* pos = reinterpret_cast<const char*>( |
281 | 0 | memchr(str.data + offset + 1, delimiter_str[0], n)); |
282 | 0 | if (pos != nullptr) { |
283 | 0 | offset = pos - str.data; |
284 | 0 | num++; |
285 | 0 | } else { |
286 | 0 | offset = str.size; |
287 | 0 | num = (num == 0) ? 0 : num + 1; |
288 | 0 | break; |
289 | 0 | } |
290 | 0 | } |
291 | |
|
292 | 0 | if (num == part_number) { |
293 | 0 | StringOP::push_value_string( |
294 | 0 | std::string_view { |
295 | 0 | reinterpret_cast<const char*>(str.data + pre_offset + 1), |
296 | 0 | (size_t)offset - pre_offset - 1}, |
297 | 0 | i, res_chars, res_offsets); |
298 | 0 | } else { |
299 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
300 | 0 | } |
301 | 0 | } else { |
302 | | // If delimiter is a string, use memmem to split |
303 | 0 | int32_t pre_offset = -delimiter.size; |
304 | 0 | int32_t offset = -delimiter.size; |
305 | 0 | int32_t num = 0; |
306 | 0 | while (num < part_number) { |
307 | 0 | pre_offset = offset; |
308 | 0 | size_t n = str.size - offset - delimiter.size; |
309 | 0 | char* pos = |
310 | 0 | reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size, |
311 | 0 | n, delimiter.data, delimiter.size)); |
312 | 0 | if (pos != nullptr) { |
313 | 0 | offset = pos - str.data; |
314 | 0 | num++; |
315 | 0 | } else { |
316 | 0 | offset = str.size; |
317 | 0 | num = (num == 0) ? 0 : num + 1; |
318 | 0 | break; |
319 | 0 | } |
320 | 0 | } |
321 | |
|
322 | 0 | if (num == part_number) { |
323 | 0 | StringOP::push_value_string( |
324 | 0 | std::string_view {reinterpret_cast<const char*>( |
325 | 0 | str.data + pre_offset + delimiter.size), |
326 | 0 | (size_t)offset - pre_offset - delimiter.size}, |
327 | 0 | i, res_chars, res_offsets); |
328 | 0 | } else { |
329 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
330 | 0 | } |
331 | 0 | } |
332 | 0 | } else { |
333 | 0 | part_number = -part_number; |
334 | 0 | auto str_str = str.to_string(); |
335 | 0 | int32_t offset = str.size; |
336 | 0 | int32_t pre_offset = offset; |
337 | 0 | int32_t num = 0; |
338 | 0 | auto substr = str_str; |
339 | 0 | while (num <= part_number && offset >= 0) { |
340 | 0 | offset = (int)substr.rfind(delimiter, offset); |
341 | 0 | if (offset != -1) { |
342 | 0 | if (++num == part_number) { |
343 | 0 | break; |
344 | 0 | } |
345 | 0 | pre_offset = offset; |
346 | 0 | offset = offset - 1; |
347 | 0 | substr = str_str.substr(0, pre_offset); |
348 | 0 | } else { |
349 | 0 | break; |
350 | 0 | } |
351 | 0 | } |
352 | 0 | num = (offset == -1 && num != 0) ? num + 1 : num; |
353 | |
|
354 | 0 | if (num == part_number) { |
355 | 0 | if (offset == -1) { |
356 | 0 | StringOP::push_value_string( |
357 | 0 | std::string_view {reinterpret_cast<const char*>(str.data), |
358 | 0 | (size_t)pre_offset}, |
359 | 0 | i, res_chars, res_offsets); |
360 | 0 | } else { |
361 | 0 | StringOP::push_value_string( |
362 | 0 | std::string_view {str_str.substr( |
363 | 0 | offset + delimiter.size, |
364 | 0 | (size_t)pre_offset - offset - delimiter.size)}, |
365 | 0 | i, res_chars, res_offsets); |
366 | 0 | } |
367 | 0 | } else { |
368 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
369 | 0 | } |
370 | 0 | } |
371 | 0 | } |
372 | |
|
373 | 0 | block.get_by_position(result).column = |
374 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
375 | 0 | return Status::OK(); |
376 | 0 | } |
377 | | }; |
378 | | |
379 | | class FunctionSubstringIndex : public IFunction { |
380 | | public: |
381 | | static constexpr auto name = "substring_index"; |
382 | 2 | static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); } |
383 | 1 | String get_name() const override { return name; } |
384 | 0 | size_t get_number_of_arguments() const override { return 3; } |
385 | | |
386 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
387 | 0 | return std::make_shared<DataTypeString>(); |
388 | 0 | } |
389 | | |
390 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
391 | 0 | uint32_t result, size_t input_rows_count) const override { |
392 | 0 | DCHECK_EQ(arguments.size(), 3); |
393 | | |
394 | | // Create a zero column to simply implement |
395 | 0 | auto res = ColumnString::create(); |
396 | |
|
397 | 0 | auto& res_offsets = res->get_offsets(); |
398 | 0 | auto& res_chars = res->get_chars(); |
399 | 0 | res_offsets.resize(input_rows_count); |
400 | 0 | ColumnPtr content_column; |
401 | 0 | bool content_const = false; |
402 | 0 | std::tie(content_column, content_const) = |
403 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
404 | |
|
405 | 0 | const auto* str_col = assert_cast<const ColumnString*>(content_column.get()); |
406 | | |
407 | | // Handle both constant and non-constant delimiter parameters |
408 | 0 | ColumnPtr delimiter_column_ptr; |
409 | 0 | bool delimiter_const = false; |
410 | 0 | std::tie(delimiter_column_ptr, delimiter_const) = |
411 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
412 | 0 | const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get()); |
413 | |
|
414 | 0 | ColumnPtr part_num_column_ptr; |
415 | 0 | bool part_num_const = false; |
416 | 0 | std::tie(part_num_column_ptr, part_num_const) = |
417 | 0 | unpack_if_const(block.get_by_position(arguments[2]).column); |
418 | 0 | const ColumnInt32* part_num_col = |
419 | 0 | assert_cast<const ColumnInt32*>(part_num_column_ptr.get()); |
420 | | |
421 | | // For constant multi-character delimiters, create StringRef and StringSearch only once |
422 | 0 | std::optional<StringRef> const_delimiter_ref; |
423 | 0 | std::optional<StringSearch> const_search; |
424 | 0 | if (delimiter_const && delimiter_col->get_data_at(0).size > 1) { |
425 | 0 | const_delimiter_ref.emplace(delimiter_col->get_data_at(0)); |
426 | 0 | const_search.emplace(&const_delimiter_ref.value()); |
427 | 0 | } |
428 | |
|
429 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
430 | 0 | auto str = str_col->get_data_at(content_const ? 0 : i); |
431 | 0 | auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i); |
432 | 0 | int32_t delimiter_size = delimiter.size; |
433 | |
|
434 | 0 | auto part_number = part_num_col->get_element(part_num_const ? 0 : i); |
435 | |
|
436 | 0 | if (part_number == 0 || delimiter_size == 0) { |
437 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
438 | 0 | continue; |
439 | 0 | } |
440 | | |
441 | 0 | if (part_number > 0) { |
442 | 0 | if (delimiter_size == 1) { |
443 | 0 | int32_t offset = -1; |
444 | 0 | int32_t num = 0; |
445 | 0 | while (num < part_number) { |
446 | 0 | size_t n = str.size - offset - 1; |
447 | 0 | const char* pos = reinterpret_cast<const char*>( |
448 | 0 | memchr(str.data + offset + 1, delimiter.data[0], n)); |
449 | 0 | if (pos != nullptr) { |
450 | 0 | offset = pos - str.data; |
451 | 0 | num++; |
452 | 0 | } else { |
453 | 0 | offset = str.size; |
454 | 0 | num = (num == 0) ? 0 : num + 1; |
455 | 0 | break; |
456 | 0 | } |
457 | 0 | } |
458 | |
|
459 | 0 | if (num == part_number) { |
460 | 0 | StringOP::push_value_string( |
461 | 0 | std::string_view {reinterpret_cast<const char*>(str.data), |
462 | 0 | (size_t)offset}, |
463 | 0 | i, res_chars, res_offsets); |
464 | 0 | } else { |
465 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
466 | 0 | res_chars, res_offsets); |
467 | 0 | } |
468 | 0 | } else { |
469 | | // For multi-character delimiters |
470 | | // Use pre-created StringRef and StringSearch for constant delimiters |
471 | 0 | StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value() |
472 | 0 | : StringRef(delimiter); |
473 | 0 | const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr; |
474 | 0 | StringSearch local_search(&delimiter_ref); |
475 | 0 | if (!search_ptr) { |
476 | 0 | search_ptr = &local_search; |
477 | 0 | } |
478 | |
|
479 | 0 | int32_t offset = -delimiter_size; |
480 | 0 | int32_t num = 0; |
481 | 0 | while (num < part_number) { |
482 | 0 | size_t n = str.size - offset - delimiter_size; |
483 | | // search first match delimter_ref index from src string among str_offset to end |
484 | 0 | const char* pos = search_ptr->search(str.data + offset + delimiter_size, n); |
485 | 0 | if (pos < str.data + str.size) { |
486 | 0 | offset = pos - str.data; |
487 | 0 | num++; |
488 | 0 | } else { |
489 | 0 | offset = str.size; |
490 | 0 | num = (num == 0) ? 0 : num + 1; |
491 | 0 | break; |
492 | 0 | } |
493 | 0 | } |
494 | |
|
495 | 0 | if (num == part_number) { |
496 | 0 | StringOP::push_value_string( |
497 | 0 | std::string_view {reinterpret_cast<const char*>(str.data), |
498 | 0 | (size_t)offset}, |
499 | 0 | i, res_chars, res_offsets); |
500 | 0 | } else { |
501 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
502 | 0 | res_chars, res_offsets); |
503 | 0 | } |
504 | 0 | } |
505 | 0 | } else { |
506 | 0 | int neg_part_number = -part_number; |
507 | 0 | auto str_str = str.to_string(); |
508 | 0 | int32_t offset = str.size; |
509 | 0 | int32_t pre_offset = offset; |
510 | 0 | int32_t num = 0; |
511 | 0 | auto substr = str_str; |
512 | | |
513 | | // Use pre-created StringRef for constant delimiters |
514 | 0 | StringRef delimiter_str = |
515 | 0 | const_delimiter_ref |
516 | 0 | ? const_delimiter_ref.value() |
517 | 0 | : StringRef(reinterpret_cast<const char*>(delimiter.data), |
518 | 0 | delimiter.size); |
519 | |
|
520 | 0 | while (num <= neg_part_number && offset >= 0) { |
521 | 0 | offset = (int)substr.rfind(delimiter_str, offset); |
522 | 0 | if (offset != -1) { |
523 | 0 | if (++num == neg_part_number) { |
524 | 0 | break; |
525 | 0 | } |
526 | 0 | pre_offset = offset; |
527 | 0 | offset = offset - 1; |
528 | 0 | substr = str_str.substr(0, pre_offset); |
529 | 0 | } else { |
530 | 0 | break; |
531 | 0 | } |
532 | 0 | } |
533 | 0 | num = (offset == -1 && num != 0) ? num + 1 : num; |
534 | |
|
535 | 0 | if (num == neg_part_number) { |
536 | 0 | if (offset == -1) { |
537 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
538 | 0 | res_chars, res_offsets); |
539 | 0 | } else { |
540 | 0 | StringOP::push_value_string( |
541 | 0 | std::string_view {str.data + offset + delimiter_size, |
542 | 0 | str.size - offset - delimiter_size}, |
543 | 0 | i, res_chars, res_offsets); |
544 | 0 | } |
545 | 0 | } else { |
546 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars, |
547 | 0 | res_offsets); |
548 | 0 | } |
549 | 0 | } |
550 | 0 | } |
551 | |
|
552 | 0 | block.get_by_position(result).column = std::move(res); |
553 | 0 | return Status::OK(); |
554 | 0 | } |
555 | | }; |
556 | | |
557 | | class FunctionSplitByString : public IFunction { |
558 | | public: |
559 | | static constexpr auto name = "split_by_string"; |
560 | | |
561 | 2 | static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); } |
562 | | using NullMapType = PaddedPODArray<UInt8>; |
563 | | |
564 | 1 | String get_name() const override { return name; } |
565 | | |
566 | 1 | bool is_variadic() const override { return false; } |
567 | | |
568 | 0 | size_t get_number_of_arguments() const override { return 2; } |
569 | | |
570 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
571 | 0 | DCHECK(is_string_type(arguments[0]->get_primitive_type())) |
572 | 0 | << "first argument for function: " << name << " should be string" |
573 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
574 | 0 | DCHECK(is_string_type(arguments[1]->get_primitive_type())) |
575 | 0 | << "second argument for function: " << name << " should be string" |
576 | 0 | << " and arguments[1] is " << arguments[1]->get_name(); |
577 | 0 | return std::make_shared<DataTypeArray>(make_nullable(arguments[0])); |
578 | 0 | } |
579 | | |
580 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
581 | 0 | uint32_t result, size_t input_rows_count) const override { |
582 | 0 | DCHECK_EQ(arguments.size(), 2); |
583 | |
|
584 | 0 | const auto& [src_column, left_const] = |
585 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
586 | 0 | const auto& [right_column, right_const] = |
587 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
588 | |
|
589 | 0 | DataTypePtr right_column_type = block.get_by_position(arguments[1]).type; |
590 | 0 | DataTypePtr src_column_type = block.get_by_position(arguments[0]).type; |
591 | 0 | auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(), |
592 | 0 | ColumnArray::ColumnOffsets::create()); |
593 | |
|
594 | 0 | dest_column_ptr->resize(0); |
595 | 0 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
596 | |
|
597 | 0 | auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data()); |
598 | 0 | auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get(); |
599 | |
|
600 | 0 | const auto* col_str = assert_cast<const ColumnString*>(src_column.get()); |
601 | |
|
602 | 0 | const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get()); |
603 | |
|
604 | 0 | std::visit( |
605 | 0 | [&](auto src_const, auto delimiter_const) { |
606 | 0 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, |
607 | 0 | *dest_nested_column, dest_offsets, |
608 | 0 | input_rows_count); |
609 | 0 | }, Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_ |
610 | 0 | make_bool_variant(left_const), make_bool_variant(right_const)); |
611 | | |
612 | | // all elements in dest_nested_column are not null |
613 | 0 | dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(), |
614 | 0 | false); |
615 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
616 | |
|
617 | 0 | return Status::OK(); |
618 | 0 | } |
619 | | |
620 | | private: |
621 | | template <bool src_const, bool delimiter_const> |
622 | | void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column, |
623 | | IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets, |
624 | 0 | size_t size) const { |
625 | 0 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); |
626 | 0 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); |
627 | 0 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); |
628 | 0 | column_string_chars.reserve(0); |
629 | |
|
630 | 0 | ColumnArray::Offset64 string_pos = 0; |
631 | 0 | ColumnArray::Offset64 dest_pos = 0; |
632 | |
|
633 | 0 | StringSearch search; |
634 | 0 | StringRef delimiter_ref_for_search; |
635 | |
|
636 | 0 | if constexpr (delimiter_const) { |
637 | 0 | delimiter_ref_for_search = delimiter_column.get_data_at(0); |
638 | 0 | search.set_pattern(&delimiter_ref_for_search); |
639 | 0 | } |
640 | |
|
641 | 0 | for (size_t i = 0; i < size; i++) { |
642 | 0 | const StringRef str_ref = |
643 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
644 | 0 | const StringRef delimiter_ref = |
645 | 0 | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); |
646 | |
|
647 | 0 | if (str_ref.size == 0) { |
648 | 0 | dest_offsets.push_back(dest_pos); |
649 | 0 | continue; |
650 | 0 | } |
651 | 0 | if (delimiter_ref.size == 0) { |
652 | 0 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, |
653 | 0 | string_pos, dest_pos); |
654 | 0 | } else { |
655 | 0 | if constexpr (!delimiter_const) { |
656 | 0 | search.set_pattern(&delimiter_ref); |
657 | 0 | } |
658 | 0 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
659 | 0 | const size_t str_offset = str_pos; |
660 | 0 | const size_t old_size = column_string_chars.size(); |
661 | | // search first match delimter_ref index from src string among str_offset to end |
662 | 0 | const char* result_start = |
663 | 0 | search.search(str_ref.data + str_offset, str_ref.size - str_offset); |
664 | | // compute split part size |
665 | 0 | const size_t split_part_size = result_start - str_ref.data - str_offset; |
666 | | // save dist string split part |
667 | 0 | if (split_part_size > 0) { |
668 | 0 | const size_t new_size = old_size + split_part_size; |
669 | 0 | column_string_chars.resize(new_size); |
670 | 0 | memcpy_small_allow_read_write_overflow15( |
671 | 0 | column_string_chars.data() + old_size, str_ref.data + str_offset, |
672 | 0 | split_part_size); |
673 | | // add dist string offset |
674 | 0 | string_pos += split_part_size; |
675 | 0 | } |
676 | 0 | column_string_offsets.push_back(string_pos); |
677 | | // array offset + 1 |
678 | 0 | dest_pos++; |
679 | | // add src string str_pos to next search start |
680 | 0 | str_pos += split_part_size + delimiter_ref.size; |
681 | 0 | } |
682 | 0 | } |
683 | 0 | dest_offsets.push_back(dest_pos); |
684 | 0 | } |
685 | 0 | } Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
686 | | |
687 | | void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars, |
688 | | ColumnString::Offsets& column_string_offsets, |
689 | | ColumnArray::Offset64& string_pos, |
690 | 0 | ColumnArray::Offset64& dest_pos) const { |
691 | 0 | const size_t old_size = column_string_chars.size(); |
692 | 0 | const size_t new_size = old_size + str_ref.size; |
693 | 0 | column_string_chars.resize(new_size); |
694 | 0 | memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size); |
695 | 0 | if (simd::VStringFunctions::is_ascii(str_ref)) { |
696 | 0 | const auto size = str_ref.size; |
697 | |
|
698 | 0 | const auto nested_old_size = column_string_offsets.size(); |
699 | 0 | const auto nested_new_size = nested_old_size + size; |
700 | 0 | column_string_offsets.resize(nested_new_size); |
701 | 0 | std::iota(column_string_offsets.data() + nested_old_size, |
702 | 0 | column_string_offsets.data() + nested_new_size, string_pos + 1); |
703 | |
|
704 | 0 | string_pos += size; |
705 | 0 | dest_pos += size; |
706 | | // The above code is equivalent to the code in the following comment. |
707 | | // for (size_t i = 0; i < str_ref.size; i++) { |
708 | | // string_pos++; |
709 | | // column_string_offsets.push_back(string_pos); |
710 | | // (*dest_nested_null_map).push_back(false); |
711 | | // dest_pos++; |
712 | | // } |
713 | 0 | } else { |
714 | 0 | for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) { |
715 | 0 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]]; |
716 | |
|
717 | 0 | string_pos += utf8_char_len; |
718 | 0 | column_string_offsets.push_back(string_pos); |
719 | 0 | dest_pos++; |
720 | 0 | } |
721 | 0 | } |
722 | 0 | } |
723 | | }; |
724 | | |
725 | | enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS }; |
726 | | |
727 | | template <FunctionCountSubStringType type> |
728 | | class FunctionCountSubString : public IFunction { |
729 | | public: |
730 | | static constexpr auto name = "count_substrings"; |
731 | | static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3; |
732 | | |
733 | 223 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv Line | Count | Source | 733 | 45 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv Line | Count | Source | 733 | 178 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
|
734 | | using NullMapType = PaddedPODArray<UInt8>; |
735 | | |
736 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev |
737 | | |
738 | 0 | size_t get_number_of_arguments() const override { return arg_count; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv |
739 | | |
740 | 219 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
741 | 219 | return std::make_shared<DataTypeInt32>(); |
742 | 219 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 740 | 43 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 741 | 43 | return std::make_shared<DataTypeInt32>(); | 742 | 43 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 740 | 176 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 741 | 176 | return std::make_shared<DataTypeInt32>(); | 742 | 176 | } |
|
743 | | |
744 | 2 | DataTypes get_variadic_argument_types_impl() const override { |
745 | 2 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
746 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; |
747 | 1 | } else { |
748 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
749 | 1 | std::make_shared<DataTypeInt32>()}; |
750 | 1 | } |
751 | 2 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv Line | Count | Source | 744 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 745 | 1 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 746 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 747 | | } else { | 748 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 749 | | std::make_shared<DataTypeInt32>()}; | 750 | | } | 751 | 1 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv Line | Count | Source | 744 | 1 | DataTypes get_variadic_argument_types_impl() const override { | 745 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 746 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 747 | 1 | } else { | 748 | 1 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 749 | 1 | std::make_shared<DataTypeInt32>()}; | 750 | 1 | } | 751 | 1 | } |
|
752 | | |
753 | 221 | bool is_variadic() const override { return true; }_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv Line | Count | Source | 753 | 44 | bool is_variadic() const override { return true; } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv Line | Count | Source | 753 | 177 | bool is_variadic() const override { return true; } |
|
754 | | |
755 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
756 | 189 | uint32_t result, size_t input_rows_count) const override { |
757 | 189 | DCHECK(arg_count); |
758 | 189 | bool col_const[arg_count]; |
759 | 189 | ColumnPtr argument_columns[arg_count]; |
760 | 722 | for (int i = 0; i < arg_count; ++i) { |
761 | 533 | std::tie(argument_columns[i], col_const[i]) = |
762 | 533 | unpack_if_const(block.get_by_position(arguments[i]).column); |
763 | 533 | } |
764 | | |
765 | 189 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); |
766 | 189 | auto& dest_column_data = dest_column_ptr->get_data(); |
767 | | |
768 | 189 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
769 | 34 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
770 | 34 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
771 | 34 | std::visit( |
772 | 34 | [&](auto str_const, auto pattern_const) { |
773 | 34 | _execute<str_const, pattern_const>(src_column_string, pattern_column, |
774 | 34 | dest_column_data, input_rows_count); |
775 | 34 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_ Line | Count | Source | 772 | 12 | [&](auto str_const, auto pattern_const) { | 773 | 12 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 774 | 12 | dest_column_data, input_rows_count); | 775 | 12 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_ Line | Count | Source | 772 | 11 | [&](auto str_const, auto pattern_const) { | 773 | 11 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 774 | 11 | dest_column_data, input_rows_count); | 775 | 11 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_ Line | Count | Source | 772 | 11 | [&](auto str_const, auto pattern_const) { | 773 | 11 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 774 | 11 | dest_column_data, input_rows_count); | 775 | 11 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_ |
776 | 34 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); |
777 | 155 | } else { |
778 | 155 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
779 | 155 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
780 | 155 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); |
781 | 155 | std::visit( |
782 | 155 | [&](auto str_const, auto pattern_const, auto start_pos_const) { |
783 | 155 | _execute<str_const, pattern_const, start_pos_const>( |
784 | 155 | src_column_string, pattern_column, start_pos_column, |
785 | 155 | dest_column_data, input_rows_count); |
786 | 155 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 782 | 23 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 23 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 23 | src_column_string, pattern_column, start_pos_column, | 785 | 23 | dest_column_data, input_rows_count); | 786 | 23 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 782 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 22 | src_column_string, pattern_column, start_pos_column, | 785 | 22 | dest_column_data, input_rows_count); | 786 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 782 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 22 | src_column_string, pattern_column, start_pos_column, | 785 | 22 | dest_column_data, input_rows_count); | 786 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 782 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 22 | src_column_string, pattern_column, start_pos_column, | 785 | 22 | dest_column_data, input_rows_count); | 786 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 782 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 22 | src_column_string, pattern_column, start_pos_column, | 785 | 22 | dest_column_data, input_rows_count); | 786 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 782 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 22 | src_column_string, pattern_column, start_pos_column, | 785 | 22 | dest_column_data, input_rows_count); | 786 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 782 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 22 | src_column_string, pattern_column, start_pos_column, | 785 | 22 | dest_column_data, input_rows_count); | 786 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
787 | 155 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
788 | 155 | make_bool_variant(col_const[2])); |
789 | 155 | } |
790 | | |
791 | 189 | block.replace_by_position(result, std::move(dest_column_ptr)); |
792 | 189 | return Status::OK(); |
793 | 189 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 756 | 34 | uint32_t result, size_t input_rows_count) const override { | 757 | 34 | DCHECK(arg_count); | 758 | 34 | bool col_const[arg_count]; | 759 | 34 | ColumnPtr argument_columns[arg_count]; | 760 | 102 | for (int i = 0; i < arg_count; ++i) { | 761 | 68 | std::tie(argument_columns[i], col_const[i]) = | 762 | 68 | unpack_if_const(block.get_by_position(arguments[i]).column); | 763 | 68 | } | 764 | | | 765 | 34 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 766 | 34 | auto& dest_column_data = dest_column_ptr->get_data(); | 767 | | | 768 | 34 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 769 | 34 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 770 | 34 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 771 | 34 | std::visit( | 772 | 34 | [&](auto str_const, auto pattern_const) { | 773 | 34 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 774 | 34 | dest_column_data, input_rows_count); | 775 | 34 | }, | 776 | 34 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 777 | | } else { | 778 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 779 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 780 | | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 781 | | std::visit( | 782 | | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | | _execute<str_const, pattern_const, start_pos_const>( | 784 | | src_column_string, pattern_column, start_pos_column, | 785 | | dest_column_data, input_rows_count); | 786 | | }, | 787 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 788 | | make_bool_variant(col_const[2])); | 789 | | } | 790 | | | 791 | 34 | block.replace_by_position(result, std::move(dest_column_ptr)); | 792 | 34 | return Status::OK(); | 793 | 34 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 756 | 155 | uint32_t result, size_t input_rows_count) const override { | 757 | 155 | DCHECK(arg_count); | 758 | 155 | bool col_const[arg_count]; | 759 | 155 | ColumnPtr argument_columns[arg_count]; | 760 | 620 | for (int i = 0; i < arg_count; ++i) { | 761 | 465 | std::tie(argument_columns[i], col_const[i]) = | 762 | 465 | unpack_if_const(block.get_by_position(arguments[i]).column); | 763 | 465 | } | 764 | | | 765 | 155 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 766 | 155 | auto& dest_column_data = dest_column_ptr->get_data(); | 767 | | | 768 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 769 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 770 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 771 | | std::visit( | 772 | | [&](auto str_const, auto pattern_const) { | 773 | | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 774 | | dest_column_data, input_rows_count); | 775 | | }, | 776 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 777 | 155 | } else { | 778 | 155 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 779 | 155 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 780 | 155 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 781 | 155 | std::visit( | 782 | 155 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 783 | 155 | _execute<str_const, pattern_const, start_pos_const>( | 784 | 155 | src_column_string, pattern_column, start_pos_column, | 785 | 155 | dest_column_data, input_rows_count); | 786 | 155 | }, | 787 | 155 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 788 | 155 | make_bool_variant(col_const[2])); | 789 | 155 | } | 790 | | | 791 | 155 | block.replace_by_position(result, std::move(dest_column_ptr)); | 792 | 155 | return Status::OK(); | 793 | 155 | } |
|
794 | | |
795 | | private: |
796 | | template <bool src_const, bool pattern_const> |
797 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
798 | 34 | ColumnInt32::Container& dest_column_data, size_t size) const { |
799 | 81 | for (size_t i = 0; i < size; i++) { |
800 | 47 | const StringRef str_ref = |
801 | 47 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
802 | | |
803 | 47 | const StringRef pattern_ref = |
804 | 47 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
805 | 47 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); |
806 | 47 | } |
807 | 34 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 798 | 12 | ColumnInt32::Container& dest_column_data, size_t size) const { | 799 | 37 | for (size_t i = 0; i < size; i++) { | 800 | 25 | const StringRef str_ref = | 801 | 25 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 802 | | | 803 | 25 | const StringRef pattern_ref = | 804 | 25 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 805 | 25 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 806 | 25 | } | 807 | 12 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 798 | 11 | ColumnInt32::Container& dest_column_data, size_t size) const { | 799 | 22 | for (size_t i = 0; i < size; i++) { | 800 | 11 | const StringRef str_ref = | 801 | 11 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 802 | | | 803 | 11 | const StringRef pattern_ref = | 804 | 11 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 805 | 11 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 806 | 11 | } | 807 | 11 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 798 | 11 | ColumnInt32::Container& dest_column_data, size_t size) const { | 799 | 22 | for (size_t i = 0; i < size; i++) { | 800 | 11 | const StringRef str_ref = | 801 | 11 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 802 | | | 803 | 11 | const StringRef pattern_ref = | 804 | 11 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 805 | 11 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 806 | 11 | } | 807 | 11 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
808 | | |
809 | | template <bool src_const, bool pattern_const, bool start_pos_const> |
810 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
811 | | const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data, |
812 | 155 | size_t size) const { |
813 | 334 | for (size_t i = 0; i < size; i++) { |
814 | 179 | const StringRef str_ref = |
815 | 179 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
816 | 179 | const StringRef pattern_ref = |
817 | 179 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
818 | | // 1-based index |
819 | 179 | int32_t start_pos = |
820 | 179 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; |
821 | | |
822 | 179 | const char* p = str_ref.begin(); |
823 | 179 | const char* end = str_ref.end(); |
824 | 179 | int char_size = 0; |
825 | 1.22k | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { |
826 | 1.04k | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; |
827 | 1.04k | } |
828 | 179 | const auto start_byte_len = p - str_ref.begin(); |
829 | | |
830 | 179 | if (start_pos < 0 || start_byte_len >= str_ref.size) { |
831 | 115 | dest_column_data[i] = 0; |
832 | 115 | } else { |
833 | 64 | dest_column_data[i] = |
834 | 64 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); |
835 | 64 | } |
836 | 179 | } |
837 | 155 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 23 | size_t size) const { | 813 | 70 | for (size_t i = 0; i < size; i++) { | 814 | 47 | const StringRef str_ref = | 815 | 47 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 47 | const StringRef pattern_ref = | 817 | 47 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 47 | int32_t start_pos = | 820 | 47 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 47 | const char* p = str_ref.begin(); | 823 | 47 | const char* end = str_ref.end(); | 824 | 47 | int char_size = 0; | 825 | 316 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 269 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 269 | } | 828 | 47 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 47 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 31 | dest_column_data[i] = 0; | 832 | 31 | } else { | 833 | 16 | dest_column_data[i] = | 834 | 16 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 16 | } | 836 | 47 | } | 837 | 23 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 22 | size_t size) const { | 813 | 44 | for (size_t i = 0; i < size; i++) { | 814 | 22 | const StringRef str_ref = | 815 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 22 | const StringRef pattern_ref = | 817 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 22 | int32_t start_pos = | 820 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 22 | const char* p = str_ref.begin(); | 823 | 22 | const char* end = str_ref.end(); | 824 | 22 | int char_size = 0; | 825 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 129 | } | 828 | 22 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 14 | dest_column_data[i] = 0; | 832 | 14 | } else { | 833 | 8 | dest_column_data[i] = | 834 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 8 | } | 836 | 22 | } | 837 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 22 | size_t size) const { | 813 | 44 | for (size_t i = 0; i < size; i++) { | 814 | 22 | const StringRef str_ref = | 815 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 22 | const StringRef pattern_ref = | 817 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 22 | int32_t start_pos = | 820 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 22 | const char* p = str_ref.begin(); | 823 | 22 | const char* end = str_ref.end(); | 824 | 22 | int char_size = 0; | 825 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 129 | } | 828 | 22 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 14 | dest_column_data[i] = 0; | 832 | 14 | } else { | 833 | 8 | dest_column_data[i] = | 834 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 8 | } | 836 | 22 | } | 837 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 22 | size_t size) const { | 813 | 44 | for (size_t i = 0; i < size; i++) { | 814 | 22 | const StringRef str_ref = | 815 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 22 | const StringRef pattern_ref = | 817 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 22 | int32_t start_pos = | 820 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 22 | const char* p = str_ref.begin(); | 823 | 22 | const char* end = str_ref.end(); | 824 | 22 | int char_size = 0; | 825 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 129 | } | 828 | 22 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 14 | dest_column_data[i] = 0; | 832 | 14 | } else { | 833 | 8 | dest_column_data[i] = | 834 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 8 | } | 836 | 22 | } | 837 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 22 | size_t size) const { | 813 | 44 | for (size_t i = 0; i < size; i++) { | 814 | 22 | const StringRef str_ref = | 815 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 22 | const StringRef pattern_ref = | 817 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 22 | int32_t start_pos = | 820 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 22 | const char* p = str_ref.begin(); | 823 | 22 | const char* end = str_ref.end(); | 824 | 22 | int char_size = 0; | 825 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 129 | } | 828 | 22 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 14 | dest_column_data[i] = 0; | 832 | 14 | } else { | 833 | 8 | dest_column_data[i] = | 834 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 8 | } | 836 | 22 | } | 837 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 22 | size_t size) const { | 813 | 44 | for (size_t i = 0; i < size; i++) { | 814 | 22 | const StringRef str_ref = | 815 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 22 | const StringRef pattern_ref = | 817 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 22 | int32_t start_pos = | 820 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 22 | const char* p = str_ref.begin(); | 823 | 22 | const char* end = str_ref.end(); | 824 | 22 | int char_size = 0; | 825 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 129 | } | 828 | 22 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 14 | dest_column_data[i] = 0; | 832 | 14 | } else { | 833 | 8 | dest_column_data[i] = | 834 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 8 | } | 836 | 22 | } | 837 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 812 | 22 | size_t size) const { | 813 | 44 | for (size_t i = 0; i < size; i++) { | 814 | 22 | const StringRef str_ref = | 815 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 816 | 22 | const StringRef pattern_ref = | 817 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 818 | | // 1-based index | 819 | 22 | int32_t start_pos = | 820 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 821 | | | 822 | 22 | const char* p = str_ref.begin(); | 823 | 22 | const char* end = str_ref.end(); | 824 | 22 | int char_size = 0; | 825 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 826 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 827 | 129 | } | 828 | 22 | const auto start_byte_len = p - str_ref.begin(); | 829 | | | 830 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 831 | 14 | dest_column_data[i] = 0; | 832 | 14 | } else { | 833 | 8 | dest_column_data[i] = | 834 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 835 | 8 | } | 836 | 22 | } | 837 | 22 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
838 | | |
839 | 208 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { |
840 | 208 | size_t old_size = pos; |
841 | 208 | size_t str_size = str_ref.size; |
842 | 1.15k | while (pos < str_size && |
843 | 1.15k | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, |
844 | 1.06k | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { |
845 | 948 | pos++; |
846 | 948 | } |
847 | 208 | return pos - old_size; |
848 | 208 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 839 | 56 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 840 | 56 | size_t old_size = pos; | 841 | 56 | size_t str_size = str_ref.size; | 842 | 372 | while (pos < str_size && | 843 | 372 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 844 | 344 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 845 | 316 | pos++; | 846 | 316 | } | 847 | 56 | return pos - old_size; | 848 | 56 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 839 | 152 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 840 | 152 | size_t old_size = pos; | 841 | 152 | size_t str_size = str_ref.size; | 842 | 784 | while (pos < str_size && | 843 | 784 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 844 | 720 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 845 | 632 | pos++; | 846 | 632 | } | 847 | 152 | return pos - old_size; | 848 | 152 | } |
|
849 | | |
850 | 111 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { |
851 | 111 | int count = 0; |
852 | 111 | if (str_ref.size == 0 || pattern_ref.size == 0) { |
853 | 19 | return 0; |
854 | 92 | } else { |
855 | 208 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
856 | 208 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); |
857 | 208 | if (res_pos == (str_ref.size - str_pos)) { |
858 | 92 | break; // not find |
859 | 92 | } |
860 | 116 | count++; |
861 | 116 | str_pos = str_pos + res_pos + pattern_ref.size; |
862 | 116 | } |
863 | 92 | } |
864 | 92 | return count; |
865 | 111 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 850 | 47 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 851 | 47 | int count = 0; | 852 | 47 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 853 | 19 | return 0; | 854 | 28 | } else { | 855 | 56 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 856 | 56 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 857 | 56 | if (res_pos == (str_ref.size - str_pos)) { | 858 | 28 | break; // not find | 859 | 28 | } | 860 | 28 | count++; | 861 | 28 | str_pos = str_pos + res_pos + pattern_ref.size; | 862 | 28 | } | 863 | 28 | } | 864 | 28 | return count; | 865 | 47 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 850 | 64 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 851 | 64 | int count = 0; | 852 | 64 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 853 | 0 | return 0; | 854 | 64 | } else { | 855 | 152 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 856 | 152 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 857 | 152 | if (res_pos == (str_ref.size - str_pos)) { | 858 | 64 | break; // not find | 859 | 64 | } | 860 | 88 | count++; | 861 | 88 | str_pos = str_pos + res_pos + pattern_ref.size; | 862 | 88 | } | 863 | 64 | } | 864 | 64 | return count; | 865 | 64 | } |
|
866 | | }; |
867 | | |
868 | 1 | void register_function_string_search(SimpleFunctionFactory& factory) { |
869 | 1 | factory.register_function<FunctionStringLocatePos>(); |
870 | 1 | factory.register_function<FunctionSplitPart>(); |
871 | 1 | factory.register_function<FunctionSplitByString>(); |
872 | 1 | factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>(); |
873 | 1 | factory.register_function< |
874 | 1 | FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>(); |
875 | 1 | factory.register_function<FunctionSubstringIndex>(); |
876 | | |
877 | 1 | factory.register_alias(FunctionStringLocatePos::name, "position"); |
878 | 1 | } |
879 | | |
880 | | #include "common/compile_check_avoid_end.h" |
881 | | } // namespace doris |