be/src/exprs/function/function_string_mask.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <fmt/format.h> |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cstddef> |
22 | | |
23 | | #include "common/status.h" |
24 | | #include "core/assert_cast.h" |
25 | | #include "core/block/block.h" |
26 | | #include "core/block/column_numbers.h" |
27 | | #include "core/column/column_const.h" |
28 | | #include "core/column/column_string.h" |
29 | | #include "core/column/column_vector.h" |
30 | | #include "core/data_type/data_type_string.h" |
31 | | #include "core/memcpy_small.h" |
32 | | #include "core/string_ref.h" |
33 | | #include "exprs/function/function.h" |
34 | | #include "exprs/function/function_helpers.h" |
35 | | #include "exprs/function/simple_function_factory.h" |
36 | | #include "exprs/function_context.h" |
37 | | |
38 | | namespace doris { |
39 | | #include "common/compile_check_avoid_begin.h" |
40 | | |
41 | | template <bool Reverse> |
42 | | class FunctionMaskPartial; |
43 | | |
44 | | class FunctionMask : public IFunction { |
45 | | public: |
46 | | static constexpr auto name = "mask"; |
47 | | static constexpr unsigned char DEFAULT_UPPER_MASK = 'X'; |
48 | | static constexpr unsigned char DEFAULT_LOWER_MASK = 'x'; |
49 | | static constexpr unsigned char DEFAULT_NUMBER_MASK = 'n'; |
50 | 5 | String get_name() const override { return name; } |
51 | 59 | static FunctionPtr create() { return std::make_shared<FunctionMask>(); } |
52 | | |
53 | 50 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
54 | 50 | return std::make_shared<DataTypeString>(); |
55 | 50 | } |
56 | | |
57 | 0 | size_t get_number_of_arguments() const override { return 0; } |
58 | | |
59 | 144 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1, 2, 3}; } |
60 | | |
61 | 51 | bool is_variadic() const override { return true; } |
62 | | |
63 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
64 | 95 | uint32_t result, size_t input_rows_count) const override { |
65 | 95 | DCHECK_GE(arguments.size(), 1); |
66 | 95 | DCHECK_LE(arguments.size(), 4); |
67 | | |
68 | 95 | char upper = DEFAULT_UPPER_MASK, lower = DEFAULT_LOWER_MASK, number = DEFAULT_NUMBER_MASK; |
69 | | |
70 | 95 | auto res = ColumnString::create(); |
71 | 95 | const auto& source_column = |
72 | 95 | assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); |
73 | | |
74 | 95 | if (arguments.size() > 1) { |
75 | 38 | const auto& col = *block.get_by_position(arguments[1]).column; |
76 | 38 | auto string_ref = col.get_data_at(0); |
77 | 38 | if (string_ref.size > 0) { |
78 | 38 | upper = *string_ref.data; |
79 | 38 | } |
80 | 38 | } |
81 | | |
82 | 95 | if (arguments.size() > 2) { |
83 | 22 | const auto& col = *block.get_by_position(arguments[2]).column; |
84 | 22 | auto string_ref = col.get_data_at(0); |
85 | 22 | if (string_ref.size > 0) { |
86 | 22 | lower = *string_ref.data; |
87 | 22 | } |
88 | 22 | } |
89 | | |
90 | 95 | if (arguments.size() > 3) { |
91 | 12 | const auto& col = *block.get_by_position(arguments[3]).column; |
92 | 12 | auto string_ref = col.get_data_at(0); |
93 | 12 | if (string_ref.size > 0) { |
94 | 12 | number = *string_ref.data; |
95 | 12 | } |
96 | 12 | } |
97 | | |
98 | 95 | if (arguments.size() > 4) { |
99 | 0 | return Status::InvalidArgument( |
100 | 0 | fmt::format("too many arguments for function {}", get_name())); |
101 | 0 | } |
102 | | |
103 | 95 | vector_mask(source_column, *res, upper, lower, number); |
104 | | |
105 | 95 | block.get_by_position(result).column = std::move(res); |
106 | | |
107 | 95 | return Status::OK(); |
108 | 95 | } |
109 | | friend class FunctionMaskPartial<true>; |
110 | | friend class FunctionMaskPartial<false>; |
111 | | |
112 | | private: |
113 | | static void vector_mask(const ColumnString& source, ColumnString& result, const char upper, |
114 | 163 | const char lower, const char number) { |
115 | 163 | result.get_chars().resize(source.get_chars().size()); |
116 | 163 | result.get_offsets().resize(source.get_offsets().size()); |
117 | 163 | memcpy_small_allow_read_write_overflow15( |
118 | 163 | result.get_offsets().data(), source.get_offsets().data(), |
119 | 163 | source.get_offsets().size() * sizeof(ColumnString::Offset)); |
120 | | |
121 | 163 | const unsigned char* src = source.get_chars().data(); |
122 | 163 | const size_t size = source.get_chars().size(); |
123 | 163 | unsigned char* res = result.get_chars().data(); |
124 | 163 | mask(src, size, upper, lower, number, res); |
125 | 163 | } |
126 | | |
127 | | static void mask(const unsigned char* __restrict src, const size_t size, |
128 | | const unsigned char upper, const unsigned char lower, |
129 | 314 | const unsigned char number, unsigned char* __restrict res) { |
130 | 3.67k | for (size_t i = 0; i != size; ++i) { |
131 | 3.36k | auto c = src[i]; |
132 | 3.36k | if (c >= 'A' && c <= 'Z') { |
133 | 260 | res[i] = upper; |
134 | 3.10k | } else if (c >= 'a' && c <= 'z') { |
135 | 1.81k | res[i] = lower; |
136 | 1.81k | } else if (c >= '0' && c <= '9') { |
137 | 1.12k | res[i] = number; |
138 | 1.12k | } else { |
139 | 158 | res[i] = c; |
140 | 158 | } |
141 | 3.36k | } |
142 | 314 | } |
143 | | }; |
144 | | |
145 | | template <bool Reverse> |
146 | | class FunctionMaskPartial : public IFunction { |
147 | | public: |
148 | | static constexpr auto name = Reverse ? "mask_last_n" : "mask_first_n"; |
149 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE8get_nameB5cxx11Ev |
150 | 120 | static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }_ZN5doris19FunctionMaskPartialILb1EE6createEv Line | Count | Source | 150 | 60 | static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); } |
_ZN5doris19FunctionMaskPartialILb0EE6createEv Line | Count | Source | 150 | 60 | static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); } |
|
151 | | |
152 | 102 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
153 | 102 | return std::make_shared<DataTypeString>(); |
154 | 102 | } _ZNK5doris19FunctionMaskPartialILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 152 | 51 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 153 | 51 | return std::make_shared<DataTypeString>(); | 154 | 51 | } |
_ZNK5doris19FunctionMaskPartialILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 152 | 51 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 153 | 51 | return std::make_shared<DataTypeString>(); | 154 | 51 | } |
|
155 | | |
156 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE23get_number_of_argumentsEv |
157 | | |
158 | 104 | bool is_variadic() const override { return true; }_ZNK5doris19FunctionMaskPartialILb1EE11is_variadicEv Line | Count | Source | 158 | 52 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionMaskPartialILb0EE11is_variadicEv Line | Count | Source | 158 | 52 | bool is_variadic() const override { return true; } |
|
159 | | |
160 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
161 | 206 | uint32_t result, size_t input_rows_count) const override { |
162 | 206 | auto res = ColumnString::create(); |
163 | 206 | auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
164 | 206 | const auto& source_column = assert_cast<const ColumnString&>(*col); |
165 | | |
166 | 206 | if (arguments.size() == 1) { // no 2nd arg, just mask all |
167 | 70 | FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK, |
168 | 70 | FunctionMask::DEFAULT_LOWER_MASK, |
169 | 70 | FunctionMask::DEFAULT_NUMBER_MASK); |
170 | 136 | } else { |
171 | 136 | const auto& [col_2nd, is_const] = |
172 | 136 | unpack_if_const(block.get_by_position(arguments[1]).column); |
173 | | |
174 | 136 | const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd); |
175 | | |
176 | 136 | if (is_const) { |
177 | 64 | RETURN_IF_ERROR(vector<true>(source_column, col_n, *res)); |
178 | 72 | } else { |
179 | 72 | RETURN_IF_ERROR(vector<false>(source_column, col_n, *res)); |
180 | 72 | } |
181 | 136 | } |
182 | | |
183 | 196 | block.get_by_position(result).column = std::move(res); |
184 | | |
185 | 196 | return Status::OK(); |
186 | 206 | } _ZNK5doris19FunctionMaskPartialILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 161 | 103 | uint32_t result, size_t input_rows_count) const override { | 162 | 103 | auto res = ColumnString::create(); | 163 | 103 | auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 164 | 103 | const auto& source_column = assert_cast<const ColumnString&>(*col); | 165 | | | 166 | 103 | if (arguments.size() == 1) { // no 2nd arg, just mask all | 167 | 35 | FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK, | 168 | 35 | FunctionMask::DEFAULT_LOWER_MASK, | 169 | 35 | FunctionMask::DEFAULT_NUMBER_MASK); | 170 | 68 | } else { | 171 | 68 | const auto& [col_2nd, is_const] = | 172 | 68 | unpack_if_const(block.get_by_position(arguments[1]).column); | 173 | | | 174 | 68 | const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd); | 175 | | | 176 | 68 | if (is_const) { | 177 | 32 | RETURN_IF_ERROR(vector<true>(source_column, col_n, *res)); | 178 | 36 | } else { | 179 | 36 | RETURN_IF_ERROR(vector<false>(source_column, col_n, *res)); | 180 | 36 | } | 181 | 68 | } | 182 | | | 183 | 98 | block.get_by_position(result).column = std::move(res); | 184 | | | 185 | 98 | return Status::OK(); | 186 | 103 | } |
_ZNK5doris19FunctionMaskPartialILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 161 | 103 | uint32_t result, size_t input_rows_count) const override { | 162 | 103 | auto res = ColumnString::create(); | 163 | 103 | auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 164 | 103 | const auto& source_column = assert_cast<const ColumnString&>(*col); | 165 | | | 166 | 103 | if (arguments.size() == 1) { // no 2nd arg, just mask all | 167 | 35 | FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK, | 168 | 35 | FunctionMask::DEFAULT_LOWER_MASK, | 169 | 35 | FunctionMask::DEFAULT_NUMBER_MASK); | 170 | 68 | } else { | 171 | 68 | const auto& [col_2nd, is_const] = | 172 | 68 | unpack_if_const(block.get_by_position(arguments[1]).column); | 173 | | | 174 | 68 | const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd); | 175 | | | 176 | 68 | if (is_const) { | 177 | 32 | RETURN_IF_ERROR(vector<true>(source_column, col_n, *res)); | 178 | 36 | } else { | 179 | 36 | RETURN_IF_ERROR(vector<false>(source_column, col_n, *res)); | 180 | 36 | } | 181 | 68 | } | 182 | | | 183 | 98 | block.get_by_position(result).column = std::move(res); | 184 | | | 185 | 98 | return Status::OK(); | 186 | 103 | } |
|
187 | | |
188 | | private: |
189 | | template <bool is_const> |
190 | 138 | static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) { |
191 | 138 | const auto num_rows = src.size(); |
192 | 138 | const auto* chars = src.get_chars().data(); |
193 | 138 | const auto* offsets = src.get_offsets().data(); |
194 | 138 | result.get_chars().resize(src.get_chars().size()); |
195 | 138 | result.get_offsets().resize(src.get_offsets().size()); |
196 | 138 | memcpy_small_allow_read_write_overflow15( |
197 | 138 | result.get_offsets().data(), src.get_offsets().data(), |
198 | 138 | src.get_offsets().size() * sizeof(ColumnString::Offset)); |
199 | 138 | auto* res = result.get_chars().data(); |
200 | | |
201 | 138 | const auto& col_n_data = col_n.get_data(); |
202 | | |
203 | 286 | for (ssize_t i = 0; i != num_rows; ++i) { |
204 | 158 | auto offset = offsets[i - 1]; |
205 | 158 | int len = offsets[i] - offset; |
206 | 158 | const int n = col_n_data[index_check_const<is_const>(i)]; |
207 | | |
208 | 158 | if (n < 0) [[unlikely]] { |
209 | 10 | return Status::InvalidArgument( |
210 | 10 | "function {} only accept non-negative input for 2nd argument but got {}", |
211 | 10 | name, n); |
212 | 10 | } |
213 | | |
214 | 148 | if constexpr (Reverse) { |
215 | 74 | auto start = std::max(len - n, 0); |
216 | 74 | if (start > 0) { |
217 | 48 | memcpy(&res[offset], &chars[offset], start); |
218 | 48 | } |
219 | 74 | offset += start; |
220 | 74 | } else { |
221 | 74 | if (n < len) { |
222 | 48 | memcpy(&res[offset + n], &chars[offset + n], len - n); |
223 | 48 | } |
224 | 74 | } |
225 | | |
226 | 148 | len = std::min(n, len); |
227 | 148 | FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK, |
228 | 148 | FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK, |
229 | 148 | &res[offset]); |
230 | 148 | } |
231 | | |
232 | 128 | return Status::OK(); |
233 | 138 | } _ZN5doris19FunctionMaskPartialILb1EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Line | Count | Source | 190 | 32 | static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) { | 191 | 32 | const auto num_rows = src.size(); | 192 | 32 | const auto* chars = src.get_chars().data(); | 193 | 32 | const auto* offsets = src.get_offsets().data(); | 194 | 32 | result.get_chars().resize(src.get_chars().size()); | 195 | 32 | result.get_offsets().resize(src.get_offsets().size()); | 196 | 32 | memcpy_small_allow_read_write_overflow15( | 197 | 32 | result.get_offsets().data(), src.get_offsets().data(), | 198 | 32 | src.get_offsets().size() * sizeof(ColumnString::Offset)); | 199 | 32 | auto* res = result.get_chars().data(); | 200 | | | 201 | 32 | const auto& col_n_data = col_n.get_data(); | 202 | | | 203 | 72 | for (ssize_t i = 0; i != num_rows; ++i) { | 204 | 40 | auto offset = offsets[i - 1]; | 205 | 40 | int len = offsets[i] - offset; | 206 | 40 | const int n = col_n_data[index_check_const<is_const>(i)]; | 207 | | | 208 | 40 | if (n < 0) [[unlikely]] { | 209 | 0 | return Status::InvalidArgument( | 210 | 0 | "function {} only accept non-negative input for 2nd argument but got {}", | 211 | 0 | name, n); | 212 | 0 | } | 213 | | | 214 | 40 | if constexpr (Reverse) { | 215 | 40 | auto start = std::max(len - n, 0); | 216 | 40 | if (start > 0) { | 217 | 20 | memcpy(&res[offset], &chars[offset], start); | 218 | 20 | } | 219 | 40 | offset += start; | 220 | | } else { | 221 | | if (n < len) { | 222 | | memcpy(&res[offset + n], &chars[offset + n], len - n); | 223 | | } | 224 | | } | 225 | | | 226 | 40 | len = std::min(n, len); | 227 | 40 | FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK, | 228 | 40 | FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK, | 229 | 40 | &res[offset]); | 230 | 40 | } | 231 | | | 232 | 32 | return Status::OK(); | 233 | 32 | } |
_ZN5doris19FunctionMaskPartialILb1EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Line | Count | Source | 190 | 37 | static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) { | 191 | 37 | const auto num_rows = src.size(); | 192 | 37 | const auto* chars = src.get_chars().data(); | 193 | 37 | const auto* offsets = src.get_offsets().data(); | 194 | 37 | result.get_chars().resize(src.get_chars().size()); | 195 | 37 | result.get_offsets().resize(src.get_offsets().size()); | 196 | 37 | memcpy_small_allow_read_write_overflow15( | 197 | 37 | result.get_offsets().data(), src.get_offsets().data(), | 198 | 37 | src.get_offsets().size() * sizeof(ColumnString::Offset)); | 199 | 37 | auto* res = result.get_chars().data(); | 200 | | | 201 | 37 | const auto& col_n_data = col_n.get_data(); | 202 | | | 203 | 71 | for (ssize_t i = 0; i != num_rows; ++i) { | 204 | 39 | auto offset = offsets[i - 1]; | 205 | 39 | int len = offsets[i] - offset; | 206 | 39 | const int n = col_n_data[index_check_const<is_const>(i)]; | 207 | | | 208 | 39 | if (n < 0) [[unlikely]] { | 209 | 5 | return Status::InvalidArgument( | 210 | 5 | "function {} only accept non-negative input for 2nd argument but got {}", | 211 | 5 | name, n); | 212 | 5 | } | 213 | | | 214 | 34 | if constexpr (Reverse) { | 215 | 34 | auto start = std::max(len - n, 0); | 216 | 34 | if (start > 0) { | 217 | 28 | memcpy(&res[offset], &chars[offset], start); | 218 | 28 | } | 219 | 34 | offset += start; | 220 | | } else { | 221 | | if (n < len) { | 222 | | memcpy(&res[offset + n], &chars[offset + n], len - n); | 223 | | } | 224 | | } | 225 | | | 226 | 34 | len = std::min(n, len); | 227 | 34 | FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK, | 228 | 34 | FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK, | 229 | 34 | &res[offset]); | 230 | 34 | } | 231 | | | 232 | 32 | return Status::OK(); | 233 | 37 | } |
_ZN5doris19FunctionMaskPartialILb0EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Line | Count | Source | 190 | 32 | static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) { | 191 | 32 | const auto num_rows = src.size(); | 192 | 32 | const auto* chars = src.get_chars().data(); | 193 | 32 | const auto* offsets = src.get_offsets().data(); | 194 | 32 | result.get_chars().resize(src.get_chars().size()); | 195 | 32 | result.get_offsets().resize(src.get_offsets().size()); | 196 | 32 | memcpy_small_allow_read_write_overflow15( | 197 | 32 | result.get_offsets().data(), src.get_offsets().data(), | 198 | 32 | src.get_offsets().size() * sizeof(ColumnString::Offset)); | 199 | 32 | auto* res = result.get_chars().data(); | 200 | | | 201 | 32 | const auto& col_n_data = col_n.get_data(); | 202 | | | 203 | 72 | for (ssize_t i = 0; i != num_rows; ++i) { | 204 | 40 | auto offset = offsets[i - 1]; | 205 | 40 | int len = offsets[i] - offset; | 206 | 40 | const int n = col_n_data[index_check_const<is_const>(i)]; | 207 | | | 208 | 40 | if (n < 0) [[unlikely]] { | 209 | 0 | return Status::InvalidArgument( | 210 | 0 | "function {} only accept non-negative input for 2nd argument but got {}", | 211 | 0 | name, n); | 212 | 0 | } | 213 | | | 214 | | if constexpr (Reverse) { | 215 | | auto start = std::max(len - n, 0); | 216 | | if (start > 0) { | 217 | | memcpy(&res[offset], &chars[offset], start); | 218 | | } | 219 | | offset += start; | 220 | 40 | } else { | 221 | 40 | if (n < len) { | 222 | 20 | memcpy(&res[offset + n], &chars[offset + n], len - n); | 223 | 20 | } | 224 | 40 | } | 225 | | | 226 | 40 | len = std::min(n, len); | 227 | 40 | FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK, | 228 | 40 | FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK, | 229 | 40 | &res[offset]); | 230 | 40 | } | 231 | | | 232 | 32 | return Status::OK(); | 233 | 32 | } |
_ZN5doris19FunctionMaskPartialILb0EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Line | Count | Source | 190 | 37 | static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) { | 191 | 37 | const auto num_rows = src.size(); | 192 | 37 | const auto* chars = src.get_chars().data(); | 193 | 37 | const auto* offsets = src.get_offsets().data(); | 194 | 37 | result.get_chars().resize(src.get_chars().size()); | 195 | 37 | result.get_offsets().resize(src.get_offsets().size()); | 196 | 37 | memcpy_small_allow_read_write_overflow15( | 197 | 37 | result.get_offsets().data(), src.get_offsets().data(), | 198 | 37 | src.get_offsets().size() * sizeof(ColumnString::Offset)); | 199 | 37 | auto* res = result.get_chars().data(); | 200 | | | 201 | 37 | const auto& col_n_data = col_n.get_data(); | 202 | | | 203 | 71 | for (ssize_t i = 0; i != num_rows; ++i) { | 204 | 39 | auto offset = offsets[i - 1]; | 205 | 39 | int len = offsets[i] - offset; | 206 | 39 | const int n = col_n_data[index_check_const<is_const>(i)]; | 207 | | | 208 | 39 | if (n < 0) [[unlikely]] { | 209 | 5 | return Status::InvalidArgument( | 210 | 5 | "function {} only accept non-negative input for 2nd argument but got {}", | 211 | 5 | name, n); | 212 | 5 | } | 213 | | | 214 | | if constexpr (Reverse) { | 215 | | auto start = std::max(len - n, 0); | 216 | | if (start > 0) { | 217 | | memcpy(&res[offset], &chars[offset], start); | 218 | | } | 219 | | offset += start; | 220 | 34 | } else { | 221 | 34 | if (n < len) { | 222 | 28 | memcpy(&res[offset + n], &chars[offset + n], len - n); | 223 | 28 | } | 224 | 34 | } | 225 | | | 226 | 34 | len = std::min(n, len); | 227 | 34 | FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK, | 228 | 34 | FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK, | 229 | 34 | &res[offset]); | 230 | 34 | } | 231 | | | 232 | 32 | return Status::OK(); | 233 | 37 | } |
|
234 | | }; |
235 | | |
236 | 8 | void register_function_string_mask(SimpleFunctionFactory& factory) { |
237 | 8 | factory.register_function<FunctionMask>(); |
238 | 8 | factory.register_function<FunctionMaskPartial<true>>(); |
239 | 8 | factory.register_function<FunctionMaskPartial<false>>(); |
240 | 8 | } |
241 | | |
242 | | #include "common/compile_check_avoid_end.h" |
243 | | } // namespace doris |