be/src/exprs/function/function_string_replace.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cstddef> |
21 | | #include <cstring> |
22 | | #include <string> |
23 | | #include <string_view> |
24 | | |
25 | | #include "common/compiler_util.h" |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/column/column_const.h" |
31 | | #include "core/column/column_nullable.h" |
32 | | #include "core/column/column_string.h" |
33 | | #include "core/column/column_vector.h" |
34 | | #include "core/data_type/data_type_nullable.h" |
35 | | #include "core/data_type/data_type_number.h" |
36 | | #include "core/data_type/data_type_string.h" |
37 | | #include "core/string_ref.h" |
38 | | #include "exec/common/stringop_substring.h" |
39 | | #include "exec/common/template_helpers.hpp" |
40 | | #include "exprs/function/function.h" |
41 | | #include "exprs/function/function_helpers.h" |
42 | | #include "exprs/function_context.h" |
43 | | #include "util/simd/vstring_function.h" |
44 | | |
45 | | namespace doris { |
46 | | #include "common/compile_check_avoid_begin.h" |
47 | | |
48 | | struct ReplaceImpl { |
49 | | static constexpr auto name = "replace"; |
50 | | }; |
51 | | |
52 | | struct ReplaceEmptyImpl { |
53 | | static constexpr auto name = "replace_empty"; |
54 | | }; |
55 | | |
56 | | template <typename Impl, bool empty> |
57 | | class FunctionReplace : public IFunction { |
58 | | public: |
59 | | static constexpr auto name = Impl::name; |
60 | 6.07k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv Line | Count | Source | 60 | 4.54k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); } |
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv Line | Count | Source | 60 | 1.52k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); } |
|
61 | 2 | String get_name() const override { return name; }_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev Line | Count | Source | 61 | 1 | String get_name() const override { return name; } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev Line | Count | Source | 61 | 1 | String get_name() const override { return name; } |
|
62 | 6.05k | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv Line | Count | Source | 62 | 4.53k | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv Line | Count | Source | 62 | 1.52k | size_t get_number_of_arguments() const override { return 3; } |
|
63 | | |
64 | 6.05k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
65 | 6.05k | return std::make_shared<DataTypeString>(); |
66 | 6.05k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 64 | 4.53k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 65 | 4.53k | return std::make_shared<DataTypeString>(); | 66 | 4.53k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 64 | 1.52k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 65 | 1.52k | return std::make_shared<DataTypeString>(); | 66 | 1.52k | } |
|
67 | | |
68 | 12 | DataTypes get_variadic_argument_types_impl() const override { |
69 | 12 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
70 | 12 | std::make_shared<DataTypeString>()}; |
71 | 12 | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv Line | Count | Source | 68 | 6 | DataTypes get_variadic_argument_types_impl() const override { | 69 | 6 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 70 | 6 | std::make_shared<DataTypeString>()}; | 71 | 6 | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv Line | Count | Source | 68 | 6 | DataTypes get_variadic_argument_types_impl() const override { | 69 | 6 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 70 | 6 | std::make_shared<DataTypeString>()}; | 71 | 6 | } |
|
72 | | |
73 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
74 | 4.69k | uint32_t result, size_t input_rows_count) const override { |
75 | | // We need a local variable to hold a reference to the converted column. |
76 | | // So that the converted column will not be released before we use it. |
77 | 4.69k | ColumnPtr col[3]; |
78 | 4.69k | bool col_const[3]; |
79 | 18.7k | for (size_t i = 0; i < 3; ++i) { |
80 | 14.0k | std::tie(col[i], col_const[i]) = |
81 | 14.0k | unpack_if_const(block.get_by_position(arguments[i]).column); |
82 | 14.0k | } |
83 | | |
84 | 4.69k | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); |
85 | 4.69k | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); |
86 | 4.69k | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); |
87 | | |
88 | 4.69k | ColumnString::MutablePtr col_res = ColumnString::create(); |
89 | | |
90 | 4.69k | std::visit( |
91 | 4.69k | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { |
92 | 16.2k | for (int i = 0; i < input_rows_count; ++i) { |
93 | 11.5k | StringRef origin_str = |
94 | 11.5k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); |
95 | 11.5k | StringRef old_str = |
96 | 11.5k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); |
97 | 11.5k | StringRef new_str = |
98 | 11.5k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); |
99 | | |
100 | 11.5k | std::string result = |
101 | 11.5k | replace(origin_str.to_string(), old_str.to_string_view(), |
102 | 11.5k | new_str.to_string_view()); |
103 | | |
104 | 11.5k | col_res->insert_data(result.data(), result.length()); |
105 | 11.5k | } |
106 | 4.69k | }, _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 91 | 163 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 575 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 412 | StringRef origin_str = | 94 | 412 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 412 | StringRef old_str = | 96 | 412 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 412 | StringRef new_str = | 98 | 412 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 412 | std::string result = | 101 | 412 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 412 | new_str.to_string_view()); | 103 | | | 104 | 412 | col_res->insert_data(result.data(), result.length()); | 105 | 412 | } | 106 | 163 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 91 | 3.02k | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 12.4k | for (int i = 0; i < input_rows_count; ++i) { | 93 | 9.39k | StringRef origin_str = | 94 | 9.39k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 9.39k | StringRef old_str = | 96 | 9.39k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 9.39k | StringRef new_str = | 98 | 9.39k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 9.39k | std::string result = | 101 | 9.39k | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 9.39k | new_str.to_string_view()); | 103 | | | 104 | 9.39k | col_res->insert_data(result.data(), result.length()); | 105 | 9.39k | } | 106 | 3.02k | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 91 | 132 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 479 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 347 | StringRef origin_str = | 94 | 347 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 347 | StringRef old_str = | 96 | 347 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 347 | StringRef new_str = | 98 | 347 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 347 | std::string result = | 101 | 347 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 347 | new_str.to_string_view()); | 103 | | | 104 | 347 | col_res->insert_data(result.data(), result.length()); | 105 | 347 | } | 106 | 132 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 91 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 125 | StringRef origin_str = | 94 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 125 | StringRef old_str = | 96 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 125 | StringRef new_str = | 98 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 125 | std::string result = | 101 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 125 | new_str.to_string_view()); | 103 | | | 104 | 125 | col_res->insert_data(result.data(), result.length()); | 105 | 125 | } | 106 | 125 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
107 | 4.69k | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
108 | 4.69k | make_bool_variant(col_const[2])); |
109 | | |
110 | 4.69k | block.replace_by_position(result, std::move(col_res)); |
111 | 4.69k | return Status::OK(); |
112 | 4.69k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 74 | 3.81k | uint32_t result, size_t input_rows_count) const override { | 75 | | // We need a local variable to hold a reference to the converted column. | 76 | | // So that the converted column will not be released before we use it. | 77 | 3.81k | ColumnPtr col[3]; | 78 | 3.81k | bool col_const[3]; | 79 | 15.2k | for (size_t i = 0; i < 3; ++i) { | 80 | 11.4k | std::tie(col[i], col_const[i]) = | 81 | 11.4k | unpack_if_const(block.get_by_position(arguments[i]).column); | 82 | 11.4k | } | 83 | | | 84 | 3.81k | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); | 85 | 3.81k | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); | 86 | 3.81k | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); | 87 | | | 88 | 3.81k | ColumnString::MutablePtr col_res = ColumnString::create(); | 89 | | | 90 | 3.81k | std::visit( | 91 | 3.81k | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 3.81k | for (int i = 0; i < input_rows_count; ++i) { | 93 | 3.81k | StringRef origin_str = | 94 | 3.81k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 3.81k | StringRef old_str = | 96 | 3.81k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 3.81k | StringRef new_str = | 98 | 3.81k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 3.81k | std::string result = | 101 | 3.81k | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 3.81k | new_str.to_string_view()); | 103 | | | 104 | 3.81k | col_res->insert_data(result.data(), result.length()); | 105 | 3.81k | } | 106 | 3.81k | }, | 107 | 3.81k | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 108 | 3.81k | make_bool_variant(col_const[2])); | 109 | | | 110 | 3.81k | block.replace_by_position(result, std::move(col_res)); | 111 | 3.81k | return Status::OK(); | 112 | 3.81k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 74 | 882 | uint32_t result, size_t input_rows_count) const override { | 75 | | // We need a local variable to hold a reference to the converted column. | 76 | | // So that the converted column will not be released before we use it. | 77 | 882 | ColumnPtr col[3]; | 78 | 882 | bool col_const[3]; | 79 | 3.52k | for (size_t i = 0; i < 3; ++i) { | 80 | 2.64k | std::tie(col[i], col_const[i]) = | 81 | 2.64k | unpack_if_const(block.get_by_position(arguments[i]).column); | 82 | 2.64k | } | 83 | | | 84 | 882 | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); | 85 | 882 | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); | 86 | 882 | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); | 87 | | | 88 | 882 | ColumnString::MutablePtr col_res = ColumnString::create(); | 89 | | | 90 | 882 | std::visit( | 91 | 882 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 92 | 882 | for (int i = 0; i < input_rows_count; ++i) { | 93 | 882 | StringRef origin_str = | 94 | 882 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 95 | 882 | StringRef old_str = | 96 | 882 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 97 | 882 | StringRef new_str = | 98 | 882 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 99 | | | 100 | 882 | std::string result = | 101 | 882 | replace(origin_str.to_string(), old_str.to_string_view(), | 102 | 882 | new_str.to_string_view()); | 103 | | | 104 | 882 | col_res->insert_data(result.data(), result.length()); | 105 | 882 | } | 106 | 882 | }, | 107 | 882 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 108 | 882 | make_bool_variant(col_const[2])); | 109 | | | 110 | 882 | block.replace_by_position(result, std::move(col_res)); | 111 | 882 | return Status::OK(); | 112 | 882 | } |
|
113 | | |
114 | | private: |
115 | 11.5k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { |
116 | 11.5k | if (old_str.empty()) { |
117 | 503 | if constexpr (empty) { |
118 | 252 | return str; |
119 | 252 | } else { |
120 | | // Different from "Replace" only when the search string is empty. |
121 | | // it will insert `new_str` in front of every character and at the end of the old str. |
122 | 251 | if (new_str.empty()) { |
123 | 59 | return str; |
124 | 59 | } |
125 | 192 | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { |
126 | 190 | std::string result; |
127 | 190 | ColumnString::check_chars_length( |
128 | 190 | str.length() * (new_str.length() + 1) + new_str.length(), 0); |
129 | 190 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); |
130 | 651 | for (char c : str) { |
131 | 651 | result += new_str; |
132 | 651 | result += c; |
133 | 651 | } |
134 | 190 | result += new_str; |
135 | 190 | return result; |
136 | 190 | } else { |
137 | 2 | std::string result; |
138 | 2 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); |
139 | 11 | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { |
140 | 9 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; |
141 | 9 | result += new_str; |
142 | 9 | result.append(&str[i], utf8_char_len); |
143 | 9 | } |
144 | 2 | result += new_str; |
145 | 2 | ColumnString::check_chars_length(result.size(), 0); |
146 | 2 | return result; |
147 | 2 | } |
148 | 192 | } |
149 | 11.0k | } else { |
150 | 11.0k | std::string::size_type pos = 0; |
151 | 11.0k | std::string::size_type oldLen = old_str.size(); |
152 | 11.0k | std::string::size_type newLen = new_str.size(); |
153 | 12.3k | while ((pos = str.find(old_str, pos)) != std::string::npos) { |
154 | 1.35k | str.replace(pos, oldLen, new_str); |
155 | 1.35k | pos += newLen; |
156 | 1.35k | } |
157 | 11.0k | return str; |
158 | 11.0k | } |
159 | 11.5k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_ Line | Count | Source | 115 | 10.4k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { | 116 | 10.4k | if (old_str.empty()) { | 117 | 252 | if constexpr (empty) { | 118 | 252 | return str; | 119 | | } else { | 120 | | // Different from "Replace" only when the search string is empty. | 121 | | // it will insert `new_str` in front of every character and at the end of the old str. | 122 | | if (new_str.empty()) { | 123 | | return str; | 124 | | } | 125 | | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { | 126 | | std::string result; | 127 | | ColumnString::check_chars_length( | 128 | | str.length() * (new_str.length() + 1) + new_str.length(), 0); | 129 | | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 130 | | for (char c : str) { | 131 | | result += new_str; | 132 | | result += c; | 133 | | } | 134 | | result += new_str; | 135 | | return result; | 136 | | } else { | 137 | | std::string result; | 138 | | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 139 | | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { | 140 | | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; | 141 | | result += new_str; | 142 | | result.append(&str[i], utf8_char_len); | 143 | | } | 144 | | result += new_str; | 145 | | ColumnString::check_chars_length(result.size(), 0); | 146 | | return result; | 147 | | } | 148 | | } | 149 | 10.1k | } else { | 150 | 10.1k | std::string::size_type pos = 0; | 151 | 10.1k | std::string::size_type oldLen = old_str.size(); | 152 | 10.1k | std::string::size_type newLen = new_str.size(); | 153 | 11.1k | while ((pos = str.find(old_str, pos)) != std::string::npos) { | 154 | 1.01k | str.replace(pos, oldLen, new_str); | 155 | 1.01k | pos += newLen; | 156 | 1.01k | } | 157 | 10.1k | return str; | 158 | 10.1k | } | 159 | 10.4k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_ Line | Count | Source | 115 | 1.09k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { | 116 | 1.09k | if (old_str.empty()) { | 117 | | if constexpr (empty) { | 118 | | return str; | 119 | 251 | } else { | 120 | | // Different from "Replace" only when the search string is empty. | 121 | | // it will insert `new_str` in front of every character and at the end of the old str. | 122 | 251 | if (new_str.empty()) { | 123 | 59 | return str; | 124 | 59 | } | 125 | 192 | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { | 126 | 190 | std::string result; | 127 | 190 | ColumnString::check_chars_length( | 128 | 190 | str.length() * (new_str.length() + 1) + new_str.length(), 0); | 129 | 190 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 130 | 651 | for (char c : str) { | 131 | 651 | result += new_str; | 132 | 651 | result += c; | 133 | 651 | } | 134 | 190 | result += new_str; | 135 | 190 | return result; | 136 | 190 | } else { | 137 | 2 | std::string result; | 138 | 2 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 139 | 11 | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { | 140 | 9 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; | 141 | 9 | result += new_str; | 142 | 9 | result.append(&str[i], utf8_char_len); | 143 | 9 | } | 144 | 2 | result += new_str; | 145 | 2 | ColumnString::check_chars_length(result.size(), 0); | 146 | 2 | return result; | 147 | 2 | } | 148 | 192 | } | 149 | 846 | } else { | 150 | 846 | std::string::size_type pos = 0; | 151 | 846 | std::string::size_type oldLen = old_str.size(); | 152 | 846 | std::string::size_type newLen = new_str.size(); | 153 | 1.17k | while ((pos = str.find(old_str, pos)) != std::string::npos) { | 154 | 332 | str.replace(pos, oldLen, new_str); | 155 | 332 | pos += newLen; | 156 | 332 | } | 157 | 846 | return str; | 158 | 846 | } | 159 | 1.09k | } |
|
160 | | }; |
161 | | |
162 | | struct ReverseImpl { |
163 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
164 | 72 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
165 | 72 | auto rows_count = offsets.size(); |
166 | 72 | res_offsets.resize(rows_count); |
167 | 72 | res_data.reserve(data.size()); |
168 | 215 | for (ssize_t i = 0; i < rows_count; ++i) { |
169 | 143 | auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
170 | 143 | int64_t src_len = offsets[i] - offsets[i - 1]; |
171 | 143 | std::string dst; |
172 | 143 | dst.resize(src_len); |
173 | 143 | simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst); |
174 | 143 | StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data, |
175 | 143 | res_offsets); |
176 | 143 | } |
177 | 72 | return Status::OK(); |
178 | 72 | } |
179 | | }; |
180 | | |
181 | | template <typename Impl> |
182 | | class FunctionSubReplace : public IFunction { |
183 | | public: |
184 | | static constexpr auto name = "sub_replace"; |
185 | | |
186 | 85 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv Line | Count | Source | 186 | 37 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } |
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv Line | Count | Source | 186 | 48 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } |
|
187 | | |
188 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev |
189 | | |
190 | 71 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
191 | 71 | return make_nullable(std::make_shared<DataTypeString>()); |
192 | 71 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 190 | 30 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 191 | 30 | return make_nullable(std::make_shared<DataTypeString>()); | 192 | 30 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 190 | 41 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 191 | 41 | return make_nullable(std::make_shared<DataTypeString>()); | 192 | 41 | } |
|
193 | | |
194 | 73 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv Line | Count | Source | 194 | 31 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv Line | Count | Source | 194 | 42 | bool is_variadic() const override { return true; } |
|
195 | | |
196 | 12 | DataTypes get_variadic_argument_types_impl() const override { |
197 | 12 | return Impl::get_variadic_argument_types(); |
198 | 12 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 196 | 6 | DataTypes get_variadic_argument_types_impl() const override { | 197 | 6 | return Impl::get_variadic_argument_types(); | 198 | 6 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv Line | Count | Source | 196 | 6 | DataTypes get_variadic_argument_types_impl() const override { | 197 | 6 | return Impl::get_variadic_argument_types(); | 198 | 6 | } |
|
199 | | |
200 | 0 | size_t get_number_of_arguments() const override { |
201 | 0 | return get_variadic_argument_types_impl().size(); |
202 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv |
203 | | |
204 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
205 | 93 | uint32_t result, size_t input_rows_count) const override { |
206 | 93 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
207 | 93 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 205 | 41 | uint32_t result, size_t input_rows_count) const override { | 206 | 41 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 207 | 41 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 205 | 52 | uint32_t result, size_t input_rows_count) const override { | 206 | 52 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 207 | 52 | } |
|
208 | | }; |
209 | | |
210 | | struct SubReplaceImpl { |
211 | | static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
212 | 94 | size_t input_rows_count) { |
213 | 94 | auto res_column = ColumnString::create(); |
214 | 94 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); |
215 | 94 | auto args_null_map = ColumnUInt8::create(input_rows_count, 0); |
216 | 94 | ColumnPtr argument_columns[4]; |
217 | 94 | bool col_const[4]; |
218 | 470 | for (int i = 0; i < 4; ++i) { |
219 | 376 | std::tie(argument_columns[i], col_const[i]) = |
220 | 376 | unpack_if_const(block.get_by_position(arguments[i]).column); |
221 | 376 | } |
222 | 94 | const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get()); |
223 | 94 | const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get()); |
224 | 94 | const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
225 | 94 | const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get()); |
226 | | |
227 | 94 | std::visit( |
228 | 94 | [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { |
229 | 94 | if (data_column->is_ascii()) { |
230 | 70 | vector_ascii<origin_str_const, new_str_const, start_const, len_const>( |
231 | 70 | data_column, mask_column, start_column->get_data(), |
232 | 70 | length_column->get_data(), args_null_map->get_data(), result_column, |
233 | 70 | input_rows_count); |
234 | 70 | } else { |
235 | 24 | vector_utf8<origin_str_const, new_str_const, start_const, len_const>( |
236 | 24 | data_column, mask_column, start_column->get_data(), |
237 | 24 | length_column->get_data(), args_null_map->get_data(), result_column, |
238 | 24 | input_rows_count); |
239 | 24 | } |
240 | 94 | }, _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 228 | 94 | [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { | 229 | 94 | if (data_column->is_ascii()) { | 230 | 70 | vector_ascii<origin_str_const, new_str_const, start_const, len_const>( | 231 | 70 | data_column, mask_column, start_column->get_data(), | 232 | 70 | length_column->get_data(), args_null_map->get_data(), result_column, | 233 | 70 | input_rows_count); | 234 | 70 | } else { | 235 | 24 | vector_utf8<origin_str_const, new_str_const, start_const, len_const>( | 236 | 24 | data_column, mask_column, start_column->get_data(), | 237 | 24 | length_column->get_data(), args_null_map->get_data(), result_column, | 238 | 24 | input_rows_count); | 239 | 24 | } | 240 | 94 | }, |
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_ |
241 | 94 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
242 | 94 | make_bool_variant(col_const[2]), make_bool_variant(col_const[3])); |
243 | 94 | block.get_by_position(result).column = |
244 | 94 | ColumnNullable::create(std::move(res_column), std::move(args_null_map)); |
245 | 94 | return Status::OK(); |
246 | 94 | } |
247 | | |
248 | | private: |
249 | | template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const> |
250 | | static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column, |
251 | | const PaddedPODArray<Int32>& args_start, |
252 | | const PaddedPODArray<Int32>& args_length, NullMap& args_null_map, |
253 | 70 | ColumnString* result_column, size_t input_rows_count) { |
254 | 70 | ColumnString::Chars& res_chars = result_column->get_chars(); |
255 | 70 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); |
256 | 10.4k | for (size_t row = 0; row < input_rows_count; ++row) { |
257 | 10.3k | StringRef origin_str = |
258 | 10.3k | data_column->get_data_at(index_check_const<origin_str_const>(row)); |
259 | 10.3k | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); |
260 | 10.3k | const auto start = args_start[index_check_const<start_const>(row)]; |
261 | 10.3k | const auto length = args_length[index_check_const<len_const>(row)]; |
262 | 10.3k | const size_t origin_str_len = origin_str.size; |
263 | | //input is null, start < 0, len < 0, str_size <= start. return NULL |
264 | 10.3k | if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) { |
265 | 10.2k | res_offsets.push_back(res_chars.size()); |
266 | 10.2k | args_null_map[row] = 1; |
267 | 10.2k | } else { |
268 | 92 | std::string_view replace_str = new_str.to_string_view(); |
269 | 92 | std::string result = origin_str.to_string(); |
270 | 92 | result.replace(start, length, replace_str); |
271 | 92 | result_column->insert_data(result.data(), result.length()); |
272 | 92 | } |
273 | 10.3k | } |
274 | 70 | } _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Line | Count | Source | 253 | 70 | ColumnString* result_column, size_t input_rows_count) { | 254 | 70 | ColumnString::Chars& res_chars = result_column->get_chars(); | 255 | 70 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); | 256 | 10.4k | for (size_t row = 0; row < input_rows_count; ++row) { | 257 | 10.3k | StringRef origin_str = | 258 | 10.3k | data_column->get_data_at(index_check_const<origin_str_const>(row)); | 259 | 10.3k | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); | 260 | 10.3k | const auto start = args_start[index_check_const<start_const>(row)]; | 261 | 10.3k | const auto length = args_length[index_check_const<len_const>(row)]; | 262 | 10.3k | const size_t origin_str_len = origin_str.size; | 263 | | //input is null, start < 0, len < 0, str_size <= start. return NULL | 264 | 10.3k | if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) { | 265 | 10.2k | res_offsets.push_back(res_chars.size()); | 266 | 10.2k | args_null_map[row] = 1; | 267 | 10.2k | } else { | 268 | 92 | std::string_view replace_str = new_str.to_string_view(); | 269 | 92 | std::string result = origin_str.to_string(); | 270 | 92 | result.replace(start, length, replace_str); | 271 | 92 | result_column->insert_data(result.data(), result.length()); | 272 | 92 | } | 273 | 10.3k | } | 274 | 70 | } |
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m |
275 | | |
276 | | template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const> |
277 | | static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column, |
278 | | const PaddedPODArray<Int32>& args_start, |
279 | | const PaddedPODArray<Int32>& args_length, NullMap& args_null_map, |
280 | 24 | ColumnString* result_column, size_t input_rows_count) { |
281 | 24 | ColumnString::Chars& res_chars = result_column->get_chars(); |
282 | 24 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); |
283 | | |
284 | 48 | for (size_t row = 0; row < input_rows_count; ++row) { |
285 | 24 | StringRef origin_str = |
286 | 24 | data_column->get_data_at(index_check_const<origin_str_const>(row)); |
287 | 24 | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); |
288 | 24 | const auto start = args_start[index_check_const<start_const>(row)]; |
289 | 24 | const auto length = args_length[index_check_const<len_const>(row)]; |
290 | | //input is null, start < 0, len < 0 return NULL |
291 | 24 | if (args_null_map[row] || start < 0 || length < 0) { |
292 | 2 | res_offsets.push_back(res_chars.size()); |
293 | 2 | args_null_map[row] = 1; |
294 | 2 | continue; |
295 | 2 | } |
296 | | |
297 | 22 | const auto [start_byte_len, start_char_len] = |
298 | 22 | simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(), |
299 | 22 | origin_str.end(), start); |
300 | | |
301 | | // start >= orgin.size |
302 | 22 | DCHECK(start_char_len <= start); |
303 | 22 | if (start_byte_len == origin_str.size) { |
304 | 8 | res_offsets.push_back(res_chars.size()); |
305 | 8 | args_null_map[row] = 1; |
306 | 8 | continue; |
307 | 8 | } |
308 | | |
309 | 14 | auto [end_byte_len, end_char_len] = |
310 | 14 | simd::VStringFunctions::iterate_utf8_with_limit_length( |
311 | 14 | origin_str.begin() + start_byte_len, origin_str.end(), length); |
312 | 14 | DCHECK(end_char_len <= length); |
313 | 14 | std::string_view replace_str = new_str.to_string_view(); |
314 | 14 | std::string result = origin_str.to_string(); |
315 | 14 | result.replace(start_byte_len, end_byte_len, replace_str); |
316 | 14 | result_column->insert_data(result.data(), result.length()); |
317 | 14 | } |
318 | 24 | } _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Line | Count | Source | 280 | 24 | ColumnString* result_column, size_t input_rows_count) { | 281 | 24 | ColumnString::Chars& res_chars = result_column->get_chars(); | 282 | 24 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); | 283 | | | 284 | 48 | for (size_t row = 0; row < input_rows_count; ++row) { | 285 | 24 | StringRef origin_str = | 286 | 24 | data_column->get_data_at(index_check_const<origin_str_const>(row)); | 287 | 24 | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); | 288 | 24 | const auto start = args_start[index_check_const<start_const>(row)]; | 289 | 24 | const auto length = args_length[index_check_const<len_const>(row)]; | 290 | | //input is null, start < 0, len < 0 return NULL | 291 | 24 | if (args_null_map[row] || start < 0 || length < 0) { | 292 | 2 | res_offsets.push_back(res_chars.size()); | 293 | 2 | args_null_map[row] = 1; | 294 | 2 | continue; | 295 | 2 | } | 296 | | | 297 | 22 | const auto [start_byte_len, start_char_len] = | 298 | 22 | simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(), | 299 | 22 | origin_str.end(), start); | 300 | | | 301 | | // start >= orgin.size | 302 | 22 | DCHECK(start_char_len <= start); | 303 | 22 | if (start_byte_len == origin_str.size) { | 304 | 8 | res_offsets.push_back(res_chars.size()); | 305 | 8 | args_null_map[row] = 1; | 306 | 8 | continue; | 307 | 8 | } | 308 | | | 309 | 14 | auto [end_byte_len, end_char_len] = | 310 | 14 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 311 | 14 | origin_str.begin() + start_byte_len, origin_str.end(), length); | 312 | | DCHECK(end_char_len <= length); | 313 | 14 | std::string_view replace_str = new_str.to_string_view(); | 314 | 14 | std::string result = origin_str.to_string(); | 315 | 14 | result.replace(start_byte_len, end_byte_len, replace_str); | 316 | 14 | result_column->insert_data(result.data(), result.length()); | 317 | 14 | } | 318 | 24 | } |
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m |
319 | | }; |
320 | | |
321 | | struct SubReplaceThreeImpl { |
322 | 6 | static DataTypes get_variadic_argument_types() { |
323 | 6 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
324 | 6 | std::make_shared<DataTypeInt32>()}; |
325 | 6 | } |
326 | | |
327 | | static Status execute_impl(FunctionContext* context, Block& block, |
328 | | const ColumnNumbers& arguments, uint32_t result, |
329 | 41 | size_t input_rows_count) { |
330 | 41 | auto params = ColumnInt32::create(input_rows_count); |
331 | 41 | auto& strlen_data = params->get_data(); |
332 | | |
333 | 41 | auto str_col = |
334 | 41 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
335 | 41 | if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { |
336 | 0 | str_col = nullable->get_nested_column_ptr(); |
337 | 0 | } |
338 | 41 | const auto* str_column = assert_cast<const ColumnString*>(str_col.get()); |
339 | | // use utf8 len |
340 | 116 | for (int i = 0; i < input_rows_count; ++i) { |
341 | 75 | StringRef str_ref = str_column->get_data_at(i); |
342 | 75 | strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size); |
343 | 75 | } |
344 | | |
345 | 41 | block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"}); |
346 | 41 | ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2], |
347 | 41 | block.columns() - 1}; |
348 | 41 | return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count); |
349 | 41 | } |
350 | | }; |
351 | | |
352 | | struct SubReplaceFourImpl { |
353 | 6 | static DataTypes get_variadic_argument_types() { |
354 | 6 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
355 | 6 | std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()}; |
356 | 6 | } |
357 | | |
358 | | static Status execute_impl(FunctionContext* context, Block& block, |
359 | | const ColumnNumbers& arguments, uint32_t result, |
360 | 52 | size_t input_rows_count) { |
361 | 52 | return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count); |
362 | 52 | } |
363 | | }; |
364 | | |
365 | | class FunctionOverlay : public IFunction { |
366 | | public: |
367 | | static constexpr auto name = "overlay"; |
368 | 177 | static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); } |
369 | 1 | String get_name() const override { return name; } |
370 | 170 | size_t get_number_of_arguments() const override { return 4; } |
371 | | |
372 | 170 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
373 | 170 | return std::make_shared<DataTypeString>(); |
374 | 170 | } |
375 | | |
376 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
377 | 154 | uint32_t result, size_t input_rows_count) const override { |
378 | 154 | DCHECK_EQ(arguments.size(), 4); |
379 | | |
380 | 154 | bool col_const[4]; |
381 | 154 | ColumnPtr argument_columns[4]; |
382 | 770 | for (int i = 0; i < 4; ++i) { |
383 | 616 | std::tie(argument_columns[i], col_const[i]) = |
384 | 616 | unpack_if_const(block.get_by_position(arguments[i]).column); |
385 | 616 | } |
386 | | |
387 | 154 | const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get()); |
388 | | |
389 | 154 | const auto* col_pos = |
390 | 154 | assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data(); |
391 | 154 | const auto* col_len = |
392 | 154 | assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data(); |
393 | 154 | const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get()); |
394 | | |
395 | 154 | ColumnString::MutablePtr col_res = ColumnString::create(); |
396 | | |
397 | | // if all input string is ascii, we can use ascii function to handle it |
398 | 154 | const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii(); |
399 | 154 | std::visit( |
400 | 154 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { |
401 | 154 | if (is_all_ascii) { |
402 | 79 | vector_ascii<origin_const, pos_const, len_const, insert_const>( |
403 | 79 | col_origin, col_pos, col_len, col_insert, col_res, |
404 | 79 | input_rows_count); |
405 | 79 | } else { |
406 | 75 | vector_utf8<origin_const, pos_const, len_const, insert_const>( |
407 | 75 | col_origin, col_pos, col_len, col_insert, col_res, |
408 | 75 | input_rows_count); |
409 | 75 | } |
410 | 154 | }, _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_ Line | Count | Source | 400 | 139 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 401 | 139 | if (is_all_ascii) { | 402 | 72 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 403 | 72 | col_origin, col_pos, col_len, col_insert, col_res, | 404 | 72 | input_rows_count); | 405 | 72 | } else { | 406 | 67 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 407 | 67 | col_origin, col_pos, col_len, col_insert, col_res, | 408 | 67 | input_rows_count); | 409 | 67 | } | 410 | 139 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_ Line | Count | Source | 400 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 401 | 1 | if (is_all_ascii) { | 402 | 0 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 403 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 404 | 0 | input_rows_count); | 405 | 1 | } else { | 406 | 1 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 407 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 408 | 1 | input_rows_count); | 409 | 1 | } | 410 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_ Line | Count | Source | 400 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 401 | 1 | if (is_all_ascii) { | 402 | 0 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 403 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 404 | 0 | input_rows_count); | 405 | 1 | } else { | 406 | 1 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 407 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 408 | 1 | input_rows_count); | 409 | 1 | } | 410 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_ Line | Count | Source | 400 | 12 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 401 | 12 | if (is_all_ascii) { | 402 | 6 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 403 | 6 | col_origin, col_pos, col_len, col_insert, col_res, | 404 | 6 | input_rows_count); | 405 | 6 | } else { | 406 | 6 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 407 | 6 | col_origin, col_pos, col_len, col_insert, col_res, | 408 | 6 | input_rows_count); | 409 | 6 | } | 410 | 12 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_ Line | Count | Source | 400 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 401 | 1 | if (is_all_ascii) { | 402 | 1 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 403 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 404 | 1 | input_rows_count); | 405 | 1 | } else { | 406 | 0 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 407 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 408 | 0 | input_rows_count); | 409 | 0 | } | 410 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_ |
411 | 154 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
412 | 154 | make_bool_variant(col_const[2]), make_bool_variant(col_const[3])); |
413 | 154 | block.replace_by_position(result, std::move(col_res)); |
414 | 154 | return Status::OK(); |
415 | 154 | } |
416 | | |
417 | | private: |
418 | | template <bool origin_const, bool pos_const, bool len_const, bool insert_const> |
419 | | static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len, |
420 | | const ColumnString* col_insert, ColumnString::MutablePtr& col_res, |
421 | 79 | size_t input_rows_count) { |
422 | 79 | auto& col_res_chars = col_res->get_chars(); |
423 | 79 | auto& col_res_offsets = col_res->get_offsets(); |
424 | 79 | StringRef origin_str, insert_str; |
425 | 187 | for (size_t i = 0; i < input_rows_count; i++) { |
426 | 108 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); |
427 | | // pos is 1-based index,so we need to minus 1 |
428 | 108 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; |
429 | 108 | const auto len = col_len[index_check_const<len_const>(i)]; |
430 | 108 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); |
431 | 108 | const auto origin_size = origin_str.size; |
432 | 108 | if (pos >= origin_size || pos < 0) { |
433 | | // If pos is not within the length of the string, the original string is returned. |
434 | 26 | col_res->insert_data(origin_str.data, origin_str.size); |
435 | 26 | continue; |
436 | 26 | } |
437 | 82 | col_res_chars.insert(origin_str.data, |
438 | 82 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 |
439 | 82 | if (pos + len > origin_size || len < 0) { |
440 | 15 | col_res_chars.insert(insert_str.begin(), |
441 | 15 | insert_str.end()); // copy all of insert_str. |
442 | 67 | } else { |
443 | 67 | col_res_chars.insert(insert_str.begin(), |
444 | 67 | insert_str.end()); // copy all of insert_str. |
445 | 67 | col_res_chars.insert( |
446 | 67 | origin_str.data + pos + len, |
447 | 67 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. |
448 | 67 | } |
449 | 82 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); |
450 | 82 | col_res_offsets.push_back(col_res_chars.size()); |
451 | 82 | } |
452 | 79 | } _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 421 | 72 | size_t input_rows_count) { | 422 | 72 | auto& col_res_chars = col_res->get_chars(); | 423 | 72 | auto& col_res_offsets = col_res->get_offsets(); | 424 | 72 | StringRef origin_str, insert_str; | 425 | 144 | for (size_t i = 0; i < input_rows_count; i++) { | 426 | 72 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 427 | | // pos is 1-based index,so we need to minus 1 | 428 | 72 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 429 | 72 | const auto len = col_len[index_check_const<len_const>(i)]; | 430 | 72 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 431 | 72 | const auto origin_size = origin_str.size; | 432 | 72 | if (pos >= origin_size || pos < 0) { | 433 | | // If pos is not within the length of the string, the original string is returned. | 434 | 18 | col_res->insert_data(origin_str.data, origin_str.size); | 435 | 18 | continue; | 436 | 18 | } | 437 | 54 | col_res_chars.insert(origin_str.data, | 438 | 54 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 439 | 54 | if (pos + len > origin_size || len < 0) { | 440 | 11 | col_res_chars.insert(insert_str.begin(), | 441 | 11 | insert_str.end()); // copy all of insert_str. | 442 | 43 | } else { | 443 | 43 | col_res_chars.insert(insert_str.begin(), | 444 | 43 | insert_str.end()); // copy all of insert_str. | 445 | 43 | col_res_chars.insert( | 446 | 43 | origin_str.data + pos + len, | 447 | 43 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 448 | 43 | } | 449 | 54 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 450 | 54 | col_res_offsets.push_back(col_res_chars.size()); | 451 | 54 | } | 452 | 72 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 421 | 6 | size_t input_rows_count) { | 422 | 6 | auto& col_res_chars = col_res->get_chars(); | 423 | 6 | auto& col_res_offsets = col_res->get_offsets(); | 424 | 6 | StringRef origin_str, insert_str; | 425 | 12 | for (size_t i = 0; i < input_rows_count; i++) { | 426 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 427 | | // pos is 1-based index,so we need to minus 1 | 428 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 429 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; | 430 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 431 | 6 | const auto origin_size = origin_str.size; | 432 | 6 | if (pos >= origin_size || pos < 0) { | 433 | | // If pos is not within the length of the string, the original string is returned. | 434 | 3 | col_res->insert_data(origin_str.data, origin_str.size); | 435 | 3 | continue; | 436 | 3 | } | 437 | 3 | col_res_chars.insert(origin_str.data, | 438 | 3 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 439 | 3 | if (pos + len > origin_size || len < 0) { | 440 | 1 | col_res_chars.insert(insert_str.begin(), | 441 | 1 | insert_str.end()); // copy all of insert_str. | 442 | 2 | } else { | 443 | 2 | col_res_chars.insert(insert_str.begin(), | 444 | 2 | insert_str.end()); // copy all of insert_str. | 445 | 2 | col_res_chars.insert( | 446 | 2 | origin_str.data + pos + len, | 447 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 448 | 2 | } | 449 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 450 | 3 | col_res_offsets.push_back(col_res_chars.size()); | 451 | 3 | } | 452 | 6 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 421 | 1 | size_t input_rows_count) { | 422 | 1 | auto& col_res_chars = col_res->get_chars(); | 423 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 424 | 1 | StringRef origin_str, insert_str; | 425 | 31 | for (size_t i = 0; i < input_rows_count; i++) { | 426 | 30 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 427 | | // pos is 1-based index,so we need to minus 1 | 428 | 30 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 429 | 30 | const auto len = col_len[index_check_const<len_const>(i)]; | 430 | 30 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 431 | 30 | const auto origin_size = origin_str.size; | 432 | 30 | if (pos >= origin_size || pos < 0) { | 433 | | // If pos is not within the length of the string, the original string is returned. | 434 | 5 | col_res->insert_data(origin_str.data, origin_str.size); | 435 | 5 | continue; | 436 | 5 | } | 437 | 25 | col_res_chars.insert(origin_str.data, | 438 | 25 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 439 | 25 | if (pos + len > origin_size || len < 0) { | 440 | 3 | col_res_chars.insert(insert_str.begin(), | 441 | 3 | insert_str.end()); // copy all of insert_str. | 442 | 22 | } else { | 443 | 22 | col_res_chars.insert(insert_str.begin(), | 444 | 22 | insert_str.end()); // copy all of insert_str. | 445 | 22 | col_res_chars.insert( | 446 | 22 | origin_str.data + pos + len, | 447 | 22 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 448 | 22 | } | 449 | 25 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 450 | 25 | col_res_offsets.push_back(col_res_chars.size()); | 451 | 25 | } | 452 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm |
453 | | |
454 | | template <bool origin_const, bool pos_const, bool len_const, bool insert_const> |
455 | | NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin, |
456 | | int const* col_pos, int const* col_len, |
457 | | const ColumnString* col_insert, |
458 | | ColumnString::MutablePtr& col_res, |
459 | 75 | size_t input_rows_count) { |
460 | 75 | auto& col_res_chars = col_res->get_chars(); |
461 | 75 | auto& col_res_offsets = col_res->get_offsets(); |
462 | 75 | StringRef origin_str, insert_str; |
463 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. |
464 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. |
465 | 75 | std::vector<size_t> utf8_origin_offsets; |
466 | 252 | for (size_t i = 0; i < input_rows_count; i++) { |
467 | 177 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); |
468 | | // pos is 1-based index,so we need to minus 1 |
469 | 177 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; |
470 | 177 | const auto len = col_len[index_check_const<len_const>(i)]; |
471 | 177 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); |
472 | 177 | utf8_origin_offsets.clear(); |
473 | | |
474 | 1.10k | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { |
475 | 924 | utf8_origin_offsets.push_back(ni); |
476 | 924 | char_size = get_utf8_byte_length(origin_str.data[ni]); |
477 | 924 | } |
478 | | |
479 | 177 | const size_t utf8_origin_size = utf8_origin_offsets.size(); |
480 | | |
481 | 177 | if (pos >= utf8_origin_size || pos < 0) { |
482 | | // If pos is not within the length of the string, the original string is returned. |
483 | 38 | col_res->insert_data(origin_str.data, origin_str.size); |
484 | 38 | continue; |
485 | 38 | } |
486 | 139 | col_res_chars.insert( |
487 | 139 | origin_str.data, |
488 | 139 | origin_str.data + |
489 | 139 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 |
490 | 139 | if (pos + len >= utf8_origin_size || len < 0) { |
491 | 35 | col_res_chars.insert(insert_str.begin(), |
492 | 35 | insert_str.end()); // copy all of insert_str. |
493 | 104 | } else { |
494 | 104 | col_res_chars.insert(insert_str.begin(), |
495 | 104 | insert_str.end()); // copy all of insert_str. |
496 | 104 | col_res_chars.insert( |
497 | 104 | origin_str.data + utf8_origin_offsets[pos + len], |
498 | 104 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. |
499 | 104 | } |
500 | 139 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); |
501 | 139 | col_res_offsets.push_back(col_res_chars.size()); |
502 | 139 | } |
503 | 75 | } _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 459 | 67 | size_t input_rows_count) { | 460 | 67 | auto& col_res_chars = col_res->get_chars(); | 461 | 67 | auto& col_res_offsets = col_res->get_offsets(); | 462 | 67 | StringRef origin_str, insert_str; | 463 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 464 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 465 | 67 | std::vector<size_t> utf8_origin_offsets; | 466 | 178 | for (size_t i = 0; i < input_rows_count; i++) { | 467 | 111 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 468 | | // pos is 1-based index,so we need to minus 1 | 469 | 111 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 470 | 111 | const auto len = col_len[index_check_const<len_const>(i)]; | 471 | 111 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 472 | 111 | utf8_origin_offsets.clear(); | 473 | | | 474 | 639 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 475 | 528 | utf8_origin_offsets.push_back(ni); | 476 | 528 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 477 | 528 | } | 478 | | | 479 | 111 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 480 | | | 481 | 111 | if (pos >= utf8_origin_size || pos < 0) { | 482 | | // If pos is not within the length of the string, the original string is returned. | 483 | 22 | col_res->insert_data(origin_str.data, origin_str.size); | 484 | 22 | continue; | 485 | 22 | } | 486 | 89 | col_res_chars.insert( | 487 | 89 | origin_str.data, | 488 | 89 | origin_str.data + | 489 | 89 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 490 | 89 | if (pos + len >= utf8_origin_size || len < 0) { | 491 | 23 | col_res_chars.insert(insert_str.begin(), | 492 | 23 | insert_str.end()); // copy all of insert_str. | 493 | 66 | } else { | 494 | 66 | col_res_chars.insert(insert_str.begin(), | 495 | 66 | insert_str.end()); // copy all of insert_str. | 496 | 66 | col_res_chars.insert( | 497 | 66 | origin_str.data + utf8_origin_offsets[pos + len], | 498 | 66 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 499 | 66 | } | 500 | 89 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 501 | 89 | col_res_offsets.push_back(col_res_chars.size()); | 502 | 89 | } | 503 | 67 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 459 | 1 | size_t input_rows_count) { | 460 | 1 | auto& col_res_chars = col_res->get_chars(); | 461 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 462 | 1 | StringRef origin_str, insert_str; | 463 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 464 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 465 | 1 | std::vector<size_t> utf8_origin_offsets; | 466 | 31 | for (size_t i = 0; i < input_rows_count; i++) { | 467 | 30 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 468 | | // pos is 1-based index,so we need to minus 1 | 469 | 30 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 470 | 30 | const auto len = col_len[index_check_const<len_const>(i)]; | 471 | 30 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 472 | 30 | utf8_origin_offsets.clear(); | 473 | | | 474 | 209 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 475 | 179 | utf8_origin_offsets.push_back(ni); | 476 | 179 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 477 | 179 | } | 478 | | | 479 | 30 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 480 | | | 481 | 30 | if (pos >= utf8_origin_size || pos < 0) { | 482 | | // If pos is not within the length of the string, the original string is returned. | 483 | 9 | col_res->insert_data(origin_str.data, origin_str.size); | 484 | 9 | continue; | 485 | 9 | } | 486 | 21 | col_res_chars.insert( | 487 | 21 | origin_str.data, | 488 | 21 | origin_str.data + | 489 | 21 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 490 | 21 | if (pos + len >= utf8_origin_size || len < 0) { | 491 | 3 | col_res_chars.insert(insert_str.begin(), | 492 | 3 | insert_str.end()); // copy all of insert_str. | 493 | 18 | } else { | 494 | 18 | col_res_chars.insert(insert_str.begin(), | 495 | 18 | insert_str.end()); // copy all of insert_str. | 496 | 18 | col_res_chars.insert( | 497 | 18 | origin_str.data + utf8_origin_offsets[pos + len], | 498 | 18 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 499 | 18 | } | 500 | 21 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 501 | 21 | col_res_offsets.push_back(col_res_chars.size()); | 502 | 21 | } | 503 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 459 | 1 | size_t input_rows_count) { | 460 | 1 | auto& col_res_chars = col_res->get_chars(); | 461 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 462 | 1 | StringRef origin_str, insert_str; | 463 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 464 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 465 | 1 | std::vector<size_t> utf8_origin_offsets; | 466 | 31 | for (size_t i = 0; i < input_rows_count; i++) { | 467 | 30 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 468 | | // pos is 1-based index,so we need to minus 1 | 469 | 30 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 470 | 30 | const auto len = col_len[index_check_const<len_const>(i)]; | 471 | 30 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 472 | 30 | utf8_origin_offsets.clear(); | 473 | | | 474 | 209 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 475 | 179 | utf8_origin_offsets.push_back(ni); | 476 | 179 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 477 | 179 | } | 478 | | | 479 | 30 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 480 | | | 481 | 30 | if (pos >= utf8_origin_size || pos < 0) { | 482 | | // If pos is not within the length of the string, the original string is returned. | 483 | 4 | col_res->insert_data(origin_str.data, origin_str.size); | 484 | 4 | continue; | 485 | 4 | } | 486 | 26 | col_res_chars.insert( | 487 | 26 | origin_str.data, | 488 | 26 | origin_str.data + | 489 | 26 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 490 | 26 | if (pos + len >= utf8_origin_size || len < 0) { | 491 | 8 | col_res_chars.insert(insert_str.begin(), | 492 | 8 | insert_str.end()); // copy all of insert_str. | 493 | 18 | } else { | 494 | 18 | col_res_chars.insert(insert_str.begin(), | 495 | 18 | insert_str.end()); // copy all of insert_str. | 496 | 18 | col_res_chars.insert( | 497 | 18 | origin_str.data + utf8_origin_offsets[pos + len], | 498 | 18 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 499 | 18 | } | 500 | 26 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 501 | 26 | col_res_offsets.push_back(col_res_chars.size()); | 502 | 26 | } | 503 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 459 | 6 | size_t input_rows_count) { | 460 | 6 | auto& col_res_chars = col_res->get_chars(); | 461 | 6 | auto& col_res_offsets = col_res->get_offsets(); | 462 | 6 | StringRef origin_str, insert_str; | 463 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 464 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 465 | 6 | std::vector<size_t> utf8_origin_offsets; | 466 | 12 | for (size_t i = 0; i < input_rows_count; i++) { | 467 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 468 | | // pos is 1-based index,so we need to minus 1 | 469 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 470 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; | 471 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 472 | 6 | utf8_origin_offsets.clear(); | 473 | | | 474 | 44 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 475 | 38 | utf8_origin_offsets.push_back(ni); | 476 | 38 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 477 | 38 | } | 478 | | | 479 | 6 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 480 | | | 481 | 6 | if (pos >= utf8_origin_size || pos < 0) { | 482 | | // If pos is not within the length of the string, the original string is returned. | 483 | 3 | col_res->insert_data(origin_str.data, origin_str.size); | 484 | 3 | continue; | 485 | 3 | } | 486 | 3 | col_res_chars.insert( | 487 | 3 | origin_str.data, | 488 | 3 | origin_str.data + | 489 | 3 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 490 | 3 | if (pos + len >= utf8_origin_size || len < 0) { | 491 | 1 | col_res_chars.insert(insert_str.begin(), | 492 | 1 | insert_str.end()); // copy all of insert_str. | 493 | 2 | } else { | 494 | 2 | col_res_chars.insert(insert_str.begin(), | 495 | 2 | insert_str.end()); // copy all of insert_str. | 496 | 2 | col_res_chars.insert( | 497 | 2 | origin_str.data + utf8_origin_offsets[pos + len], | 498 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 499 | 2 | } | 500 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 501 | 3 | col_res_offsets.push_back(col_res_chars.size()); | 502 | 3 | } | 503 | 6 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm |
504 | | }; |
505 | | |
506 | | #include "common/compile_check_avoid_end.h" |
507 | | } // namespace doris |