be/src/exprs/function/function_string_replace.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <cstddef> |
21 | | #include <cstring> |
22 | | #include <string> |
23 | | #include <string_view> |
24 | | |
25 | | #include "common/compiler_util.h" |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/column/column_const.h" |
31 | | #include "core/column/column_nullable.h" |
32 | | #include "core/column/column_string.h" |
33 | | #include "core/column/column_vector.h" |
34 | | #include "core/data_type/data_type_nullable.h" |
35 | | #include "core/data_type/data_type_number.h" |
36 | | #include "core/data_type/data_type_string.h" |
37 | | #include "core/string_ref.h" |
38 | | #include "exec/common/string_searcher.h" |
39 | | #include "exec/common/stringop_substring.h" |
40 | | #include "exec/common/template_helpers.hpp" |
41 | | #include "exprs/function/function.h" |
42 | | #include "exprs/function/function_helpers.h" |
43 | | #include "exprs/function_context.h" |
44 | | #include "util/simd/vstring_function.h" |
45 | | |
46 | | namespace doris { |
47 | | #include "common/compile_check_avoid_begin.h" |
48 | | |
49 | | struct ReplaceImpl { |
50 | | static constexpr auto name = "replace"; |
51 | | }; |
52 | | |
53 | | struct ReplaceEmptyImpl { |
54 | | static constexpr auto name = "replace_empty"; |
55 | | }; |
56 | | |
57 | | template <typename Impl, bool empty> |
58 | | class FunctionReplace : public IFunction { |
59 | | public: |
60 | | static constexpr auto name = Impl::name; |
61 | 6.23k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv Line | Count | Source | 61 | 4.70k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); } |
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv Line | Count | Source | 61 | 1.52k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); } |
|
62 | 2 | String get_name() const override { return name; }_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev Line | Count | Source | 62 | 1 | String get_name() const override { return name; } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev Line | Count | Source | 62 | 1 | String get_name() const override { return name; } |
|
63 | 6.21k | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv Line | Count | Source | 63 | 4.69k | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv Line | Count | Source | 63 | 1.52k | size_t get_number_of_arguments() const override { return 3; } |
|
64 | | |
65 | 6.21k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
66 | 6.21k | return std::make_shared<DataTypeString>(); |
67 | 6.21k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 65 | 4.69k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 66 | 4.69k | return std::make_shared<DataTypeString>(); | 67 | 4.69k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 65 | 1.52k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 66 | 1.52k | return std::make_shared<DataTypeString>(); | 67 | 1.52k | } |
|
68 | | |
69 | 16 | DataTypes get_variadic_argument_types_impl() const override { |
70 | 16 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
71 | 16 | std::make_shared<DataTypeString>()}; |
72 | 16 | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv Line | Count | Source | 69 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 70 | 8 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 71 | 8 | std::make_shared<DataTypeString>()}; | 72 | 8 | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv Line | Count | Source | 69 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 70 | 8 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 71 | 8 | std::make_shared<DataTypeString>()}; | 72 | 8 | } |
|
73 | | |
74 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
75 | 4.85k | uint32_t result, size_t input_rows_count) const override { |
76 | | // We need a local variable to hold a reference to the converted column. |
77 | | // So that the converted column will not be released before we use it. |
78 | 4.85k | ColumnPtr col[3]; |
79 | 4.85k | bool col_const[3]; |
80 | 19.4k | for (size_t i = 0; i < 3; ++i) { |
81 | 14.5k | std::tie(col[i], col_const[i]) = |
82 | 14.5k | unpack_if_const(block.get_by_position(arguments[i]).column); |
83 | 14.5k | } |
84 | | |
85 | 4.85k | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); |
86 | 4.85k | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); |
87 | 4.85k | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); |
88 | | |
89 | 4.85k | ColumnString::MutablePtr col_res = ColumnString::create(); |
90 | | |
91 | | // Fast path: when old_str and new_str are both constant and old_str is |
92 | | // non-empty (the common case for replace(col, 'literal', 'literal')). |
93 | | // Works directly on ColumnString chars/offsets to avoid per-row |
94 | | // std::string allocation and copy overhead. |
95 | | // Applies to both replace (empty=true) and replace_empty (empty=false): |
96 | | // when old_str is non-empty the two variants behave identically. |
97 | 4.85k | if (col_const[1] && col_const[2]) { |
98 | 3.31k | StringRef old_ref = col_old_str->get_data_at(0); |
99 | 3.31k | StringRef new_ref = col_new_str->get_data_at(0); |
100 | 3.31k | if (old_ref.size > 0) { |
101 | 3.26k | _replace_const_pattern(*col_origin_str, old_ref, new_ref, *col_res, |
102 | 3.26k | input_rows_count, col_const[0]); |
103 | 3.26k | block.replace_by_position(result, std::move(col_res)); |
104 | 3.26k | return Status::OK(); |
105 | 3.26k | } |
106 | 3.31k | } |
107 | | |
108 | 1.59k | std::visit( |
109 | 1.59k | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { |
110 | 3.65k | for (int i = 0; i < input_rows_count; ++i) { |
111 | 2.05k | StringRef origin_str = |
112 | 2.05k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); |
113 | 2.05k | StringRef old_str = |
114 | 2.05k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); |
115 | 2.05k | StringRef new_str = |
116 | 2.05k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); |
117 | | |
118 | 2.05k | std::string result = |
119 | 2.05k | replace(origin_str.to_string(), old_str.to_string_view(), |
120 | 2.05k | new_str.to_string_view()); |
121 | | |
122 | 2.05k | col_res->insert_data(result.data(), result.length()); |
123 | 2.05k | } |
124 | 1.59k | }, _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 109 | 163 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 575 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 412 | StringRef origin_str = | 112 | 412 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 412 | StringRef old_str = | 114 | 412 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 412 | StringRef new_str = | 116 | 412 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 412 | std::string result = | 119 | 412 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 412 | new_str.to_string_view()); | 121 | | | 122 | 412 | col_res->insert_data(result.data(), result.length()); | 123 | 412 | } | 124 | 163 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 109 | 25 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 50 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 25 | StringRef origin_str = | 112 | 25 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 25 | StringRef old_str = | 114 | 25 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 25 | StringRef new_str = | 116 | 25 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 25 | std::string result = | 119 | 25 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 25 | new_str.to_string_view()); | 121 | | | 122 | 25 | col_res->insert_data(result.data(), result.length()); | 123 | 25 | } | 124 | 25 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 109 | 132 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 479 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 347 | StringRef origin_str = | 112 | 347 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 347 | StringRef old_str = | 114 | 347 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 347 | StringRef new_str = | 116 | 347 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 347 | std::string result = | 119 | 347 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 347 | new_str.to_string_view()); | 121 | | | 122 | 347 | col_res->insert_data(result.data(), result.length()); | 123 | 347 | } | 124 | 132 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 109 | 25 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 50 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 25 | StringRef origin_str = | 112 | 25 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 25 | StringRef old_str = | 114 | 25 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 25 | StringRef new_str = | 116 | 25 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 25 | std::string result = | 119 | 25 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 25 | new_str.to_string_view()); | 121 | | | 122 | 25 | col_res->insert_data(result.data(), result.length()); | 123 | 25 | } | 124 | 25 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 109 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 125 | StringRef origin_str = | 112 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 125 | StringRef old_str = | 114 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 125 | StringRef new_str = | 116 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 125 | std::string result = | 119 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 125 | new_str.to_string_view()); | 121 | | | 122 | 125 | col_res->insert_data(result.data(), result.length()); | 123 | 125 | } | 124 | 125 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
125 | 1.59k | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
126 | 1.59k | make_bool_variant(col_const[2])); |
127 | | |
128 | 1.59k | block.replace_by_position(result, std::move(col_res)); |
129 | 1.59k | return Status::OK(); |
130 | 4.85k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 75 | 3.97k | uint32_t result, size_t input_rows_count) const override { | 76 | | // We need a local variable to hold a reference to the converted column. | 77 | | // So that the converted column will not be released before we use it. | 78 | 3.97k | ColumnPtr col[3]; | 79 | 3.97k | bool col_const[3]; | 80 | 15.8k | for (size_t i = 0; i < 3; ++i) { | 81 | 11.9k | std::tie(col[i], col_const[i]) = | 82 | 11.9k | unpack_if_const(block.get_by_position(arguments[i]).column); | 83 | 11.9k | } | 84 | | | 85 | 3.97k | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); | 86 | 3.97k | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); | 87 | 3.97k | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); | 88 | | | 89 | 3.97k | ColumnString::MutablePtr col_res = ColumnString::create(); | 90 | | | 91 | | // Fast path: when old_str and new_str are both constant and old_str is | 92 | | // non-empty (the common case for replace(col, 'literal', 'literal')). | 93 | | // Works directly on ColumnString chars/offsets to avoid per-row | 94 | | // std::string allocation and copy overhead. | 95 | | // Applies to both replace (empty=true) and replace_empty (empty=false): | 96 | | // when old_str is non-empty the two variants behave identically. | 97 | 3.97k | if (col_const[1] && col_const[2]) { | 98 | 3.18k | StringRef old_ref = col_old_str->get_data_at(0); | 99 | 3.18k | StringRef new_ref = col_new_str->get_data_at(0); | 100 | 3.18k | if (old_ref.size > 0) { | 101 | 3.16k | _replace_const_pattern(*col_origin_str, old_ref, new_ref, *col_res, | 102 | 3.16k | input_rows_count, col_const[0]); | 103 | 3.16k | block.replace_by_position(result, std::move(col_res)); | 104 | 3.16k | return Status::OK(); | 105 | 3.16k | } | 106 | 3.18k | } | 107 | | | 108 | 813 | std::visit( | 109 | 813 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 813 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 813 | StringRef origin_str = | 112 | 813 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 813 | StringRef old_str = | 114 | 813 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 813 | StringRef new_str = | 116 | 813 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 813 | std::string result = | 119 | 813 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 813 | new_str.to_string_view()); | 121 | | | 122 | 813 | col_res->insert_data(result.data(), result.length()); | 123 | 813 | } | 124 | 813 | }, | 125 | 813 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 126 | 813 | make_bool_variant(col_const[2])); | 127 | | | 128 | 813 | block.replace_by_position(result, std::move(col_res)); | 129 | 813 | return Status::OK(); | 130 | 3.97k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 75 | 882 | uint32_t result, size_t input_rows_count) const override { | 76 | | // We need a local variable to hold a reference to the converted column. | 77 | | // So that the converted column will not be released before we use it. | 78 | 882 | ColumnPtr col[3]; | 79 | 882 | bool col_const[3]; | 80 | 3.52k | for (size_t i = 0; i < 3; ++i) { | 81 | 2.64k | std::tie(col[i], col_const[i]) = | 82 | 2.64k | unpack_if_const(block.get_by_position(arguments[i]).column); | 83 | 2.64k | } | 84 | | | 85 | 882 | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); | 86 | 882 | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); | 87 | 882 | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); | 88 | | | 89 | 882 | ColumnString::MutablePtr col_res = ColumnString::create(); | 90 | | | 91 | | // Fast path: when old_str and new_str are both constant and old_str is | 92 | | // non-empty (the common case for replace(col, 'literal', 'literal')). | 93 | | // Works directly on ColumnString chars/offsets to avoid per-row | 94 | | // std::string allocation and copy overhead. | 95 | | // Applies to both replace (empty=true) and replace_empty (empty=false): | 96 | | // when old_str is non-empty the two variants behave identically. | 97 | 882 | if (col_const[1] && col_const[2]) { | 98 | 125 | StringRef old_ref = col_old_str->get_data_at(0); | 99 | 125 | StringRef new_ref = col_new_str->get_data_at(0); | 100 | 125 | if (old_ref.size > 0) { | 101 | 100 | _replace_const_pattern(*col_origin_str, old_ref, new_ref, *col_res, | 102 | 100 | input_rows_count, col_const[0]); | 103 | 100 | block.replace_by_position(result, std::move(col_res)); | 104 | 100 | return Status::OK(); | 105 | 100 | } | 106 | 125 | } | 107 | | | 108 | 782 | std::visit( | 109 | 782 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 110 | 782 | for (int i = 0; i < input_rows_count; ++i) { | 111 | 782 | StringRef origin_str = | 112 | 782 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 113 | 782 | StringRef old_str = | 114 | 782 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 115 | 782 | StringRef new_str = | 116 | 782 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 117 | | | 118 | 782 | std::string result = | 119 | 782 | replace(origin_str.to_string(), old_str.to_string_view(), | 120 | 782 | new_str.to_string_view()); | 121 | | | 122 | 782 | col_res->insert_data(result.data(), result.length()); | 123 | 782 | } | 124 | 782 | }, | 125 | 782 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 126 | 782 | make_bool_variant(col_const[2])); | 127 | | | 128 | 782 | block.replace_by_position(result, std::move(col_res)); | 129 | 782 | return Status::OK(); | 130 | 882 | } |
|
131 | | |
132 | | private: |
133 | | // Optimized replace path for constant old_str (non-empty) and constant new_str. |
134 | | // Avoids per-row std::string allocation by working directly on ColumnString |
135 | | // chars/offsets. Two-level search strategy: |
136 | | // 1. memchr (glibc AVX512) scans for the needle's first byte. If absent, |
137 | | // the row is guaranteed no-match and is bulk-copied with a single memcpy. |
138 | | // 2. When the first byte is present, ASCIICaseSensitiveStringSearcher |
139 | | // (SSE4.1, prebuilt once outside the row loop) does the full needle scan. |
140 | | static void _replace_const_pattern(const ColumnString& src, StringRef old_ref, |
141 | | StringRef new_ref, ColumnString& dst, |
142 | 3.26k | size_t input_rows_count, bool src_const) { |
143 | 3.26k | auto& dst_chars = dst.get_chars(); |
144 | 3.26k | auto& dst_offsets = dst.get_offsets(); |
145 | | |
146 | 3.26k | dst_chars.reserve(src_const ? (src.get_data_at(0).size * input_rows_count) |
147 | 3.26k | : src.get_chars().size()); |
148 | 3.26k | dst_offsets.resize(input_rows_count); |
149 | | |
150 | | // Build SSE4.1 searcher once — first+second byte masks precomputed here. |
151 | 3.26k | ASCIICaseSensitiveStringSearcher searcher(old_ref.data, old_ref.size); |
152 | 3.26k | const size_t needle_size = old_ref.size; |
153 | 3.26k | const size_t replacement_size = new_ref.size; |
154 | 3.26k | const char* replacement_data = new_ref.data; |
155 | 3.26k | const auto needle_first = static_cast<unsigned char>(old_ref.data[0]); |
156 | | |
157 | 12.9k | for (size_t i = 0; i < input_rows_count; ++i) { |
158 | 9.72k | StringRef row = src.get_data_at(src_const ? 0 : i); |
159 | 9.72k | const char* const row_end = row.data + row.size; |
160 | | |
161 | | // Level-1: memchr for needle's first byte (glibc uses AVX512 internally). |
162 | | // If the first byte is absent the entire row cannot contain the needle; |
163 | | // bulk-copy it and move to the next row without entering the SSE4.1 loop. |
164 | 9.72k | if (memchr(row.data, needle_first, row.size) == nullptr) { |
165 | 9.32k | StringOP::push_value_string({row.data, row.size}, i, dst_chars, dst_offsets); |
166 | 9.32k | continue; |
167 | 9.32k | } |
168 | | |
169 | | // Level-2: SSE4.1 searcher handles needle matching for this row. |
170 | 397 | const char* pos = row.data; |
171 | 1.14k | while (pos < row_end) { |
172 | 1.09k | const char* match = searcher.search(pos, row_end); |
173 | | // Copy prefix before match |
174 | 1.09k | size_t prefix_len = static_cast<size_t>(match - pos); |
175 | 1.09k | if (prefix_len > 0) { |
176 | 1.03k | size_t old_size = dst_chars.size(); |
177 | 1.03k | ColumnString::check_chars_length(old_size + prefix_len, i + 1); |
178 | 1.03k | dst_chars.resize(old_size + prefix_len); |
179 | 1.03k | memcpy(&dst_chars[old_size], pos, prefix_len); |
180 | 1.03k | } |
181 | 1.09k | if (match == row_end) { |
182 | 343 | break; |
183 | 343 | } |
184 | | // Copy replacement |
185 | 750 | if (replacement_size > 0) { |
186 | 734 | size_t old_size = dst_chars.size(); |
187 | 734 | ColumnString::check_chars_length(old_size + replacement_size, i + 1); |
188 | 734 | dst_chars.resize(old_size + replacement_size); |
189 | 734 | memcpy(&dst_chars[old_size], replacement_data, replacement_size); |
190 | 734 | } |
191 | 750 | pos = match + needle_size; |
192 | 750 | } |
193 | 397 | StringOP::push_empty_string(i, dst_chars, dst_offsets); |
194 | 397 | } |
195 | 3.26k | } _ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE22_replace_const_patternERKNS_9ColumnStrIjEENS_9StringRefES7_RS4_mb Line | Count | Source | 142 | 3.16k | size_t input_rows_count, bool src_const) { | 143 | 3.16k | auto& dst_chars = dst.get_chars(); | 144 | 3.16k | auto& dst_offsets = dst.get_offsets(); | 145 | | | 146 | 3.16k | dst_chars.reserve(src_const ? (src.get_data_at(0).size * input_rows_count) | 147 | 3.16k | : src.get_chars().size()); | 148 | 3.16k | dst_offsets.resize(input_rows_count); | 149 | | | 150 | | // Build SSE4.1 searcher once — first+second byte masks precomputed here. | 151 | 3.16k | ASCIICaseSensitiveStringSearcher searcher(old_ref.data, old_ref.size); | 152 | 3.16k | const size_t needle_size = old_ref.size; | 153 | 3.16k | const size_t replacement_size = new_ref.size; | 154 | 3.16k | const char* replacement_data = new_ref.data; | 155 | 3.16k | const auto needle_first = static_cast<unsigned char>(old_ref.data[0]); | 156 | | | 157 | 12.7k | for (size_t i = 0; i < input_rows_count; ++i) { | 158 | 9.62k | StringRef row = src.get_data_at(src_const ? 0 : i); | 159 | 9.62k | const char* const row_end = row.data + row.size; | 160 | | | 161 | | // Level-1: memchr for needle's first byte (glibc uses AVX512 internally). | 162 | | // If the first byte is absent the entire row cannot contain the needle; | 163 | | // bulk-copy it and move to the next row without entering the SSE4.1 loop. | 164 | 9.62k | if (memchr(row.data, needle_first, row.size) == nullptr) { | 165 | 9.26k | StringOP::push_value_string({row.data, row.size}, i, dst_chars, dst_offsets); | 166 | 9.26k | continue; | 167 | 9.26k | } | 168 | | | 169 | | // Level-2: SSE4.1 searcher handles needle matching for this row. | 170 | 357 | const char* pos = row.data; | 171 | 1.06k | while (pos < row_end) { | 172 | 1.03k | const char* match = searcher.search(pos, row_end); | 173 | | // Copy prefix before match | 174 | 1.03k | size_t prefix_len = static_cast<size_t>(match - pos); | 175 | 1.03k | if (prefix_len > 0) { | 176 | 1.00k | size_t old_size = dst_chars.size(); | 177 | 1.00k | ColumnString::check_chars_length(old_size + prefix_len, i + 1); | 178 | 1.00k | dst_chars.resize(old_size + prefix_len); | 179 | 1.00k | memcpy(&dst_chars[old_size], pos, prefix_len); | 180 | 1.00k | } | 181 | 1.03k | if (match == row_end) { | 182 | 328 | break; | 183 | 328 | } | 184 | | // Copy replacement | 185 | 710 | if (replacement_size > 0) { | 186 | 702 | size_t old_size = dst_chars.size(); | 187 | 702 | ColumnString::check_chars_length(old_size + replacement_size, i + 1); | 188 | 702 | dst_chars.resize(old_size + replacement_size); | 189 | 702 | memcpy(&dst_chars[old_size], replacement_data, replacement_size); | 190 | 702 | } | 191 | 710 | pos = match + needle_size; | 192 | 710 | } | 193 | 357 | StringOP::push_empty_string(i, dst_chars, dst_offsets); | 194 | 357 | } | 195 | 3.16k | } |
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE22_replace_const_patternERKNS_9ColumnStrIjEENS_9StringRefES7_RS4_mb Line | Count | Source | 142 | 100 | size_t input_rows_count, bool src_const) { | 143 | 100 | auto& dst_chars = dst.get_chars(); | 144 | 100 | auto& dst_offsets = dst.get_offsets(); | 145 | | | 146 | 100 | dst_chars.reserve(src_const ? (src.get_data_at(0).size * input_rows_count) | 147 | 100 | : src.get_chars().size()); | 148 | 100 | dst_offsets.resize(input_rows_count); | 149 | | | 150 | | // Build SSE4.1 searcher once — first+second byte masks precomputed here. | 151 | 100 | ASCIICaseSensitiveStringSearcher searcher(old_ref.data, old_ref.size); | 152 | 100 | const size_t needle_size = old_ref.size; | 153 | 100 | const size_t replacement_size = new_ref.size; | 154 | 100 | const char* replacement_data = new_ref.data; | 155 | 100 | const auto needle_first = static_cast<unsigned char>(old_ref.data[0]); | 156 | | | 157 | 200 | for (size_t i = 0; i < input_rows_count; ++i) { | 158 | 100 | StringRef row = src.get_data_at(src_const ? 0 : i); | 159 | 100 | const char* const row_end = row.data + row.size; | 160 | | | 161 | | // Level-1: memchr for needle's first byte (glibc uses AVX512 internally). | 162 | | // If the first byte is absent the entire row cannot contain the needle; | 163 | | // bulk-copy it and move to the next row without entering the SSE4.1 loop. | 164 | 100 | if (memchr(row.data, needle_first, row.size) == nullptr) { | 165 | 60 | StringOP::push_value_string({row.data, row.size}, i, dst_chars, dst_offsets); | 166 | 60 | continue; | 167 | 60 | } | 168 | | | 169 | | // Level-2: SSE4.1 searcher handles needle matching for this row. | 170 | 40 | const char* pos = row.data; | 171 | 80 | while (pos < row_end) { | 172 | 55 | const char* match = searcher.search(pos, row_end); | 173 | | // Copy prefix before match | 174 | 55 | size_t prefix_len = static_cast<size_t>(match - pos); | 175 | 55 | if (prefix_len > 0) { | 176 | 30 | size_t old_size = dst_chars.size(); | 177 | 30 | ColumnString::check_chars_length(old_size + prefix_len, i + 1); | 178 | 30 | dst_chars.resize(old_size + prefix_len); | 179 | 30 | memcpy(&dst_chars[old_size], pos, prefix_len); | 180 | 30 | } | 181 | 55 | if (match == row_end) { | 182 | 15 | break; | 183 | 15 | } | 184 | | // Copy replacement | 185 | 40 | if (replacement_size > 0) { | 186 | 32 | size_t old_size = dst_chars.size(); | 187 | 32 | ColumnString::check_chars_length(old_size + replacement_size, i + 1); | 188 | 32 | dst_chars.resize(old_size + replacement_size); | 189 | 32 | memcpy(&dst_chars[old_size], replacement_data, replacement_size); | 190 | 32 | } | 191 | 40 | pos = match + needle_size; | 192 | 40 | } | 193 | 40 | StringOP::push_empty_string(i, dst_chars, dst_offsets); | 194 | 40 | } | 195 | 100 | } |
|
196 | | |
197 | 2.05k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { |
198 | 2.05k | if (old_str.empty()) { |
199 | 503 | if constexpr (empty) { |
200 | 252 | return str; |
201 | 252 | } else { |
202 | | // Different from "Replace" only when the search string is empty. |
203 | | // it will insert `new_str` in front of every character and at the end of the old str. |
204 | 251 | if (new_str.empty()) { |
205 | 59 | return str; |
206 | 59 | } |
207 | 192 | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { |
208 | 190 | std::string result; |
209 | 190 | ColumnString::check_chars_length( |
210 | 190 | str.length() * (new_str.length() + 1) + new_str.length(), 0); |
211 | 190 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); |
212 | 651 | for (char c : str) { |
213 | 651 | result += new_str; |
214 | 651 | result += c; |
215 | 651 | } |
216 | 190 | result += new_str; |
217 | 190 | return result; |
218 | 190 | } else { |
219 | 2 | std::string result; |
220 | 2 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); |
221 | 11 | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { |
222 | 9 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; |
223 | 9 | result += new_str; |
224 | 9 | result.append(&str[i], utf8_char_len); |
225 | 9 | } |
226 | 2 | result += new_str; |
227 | 2 | ColumnString::check_chars_length(result.size(), 0); |
228 | 2 | return result; |
229 | 2 | } |
230 | 192 | } |
231 | 1.55k | } else { |
232 | 1.55k | std::string::size_type pos = 0; |
233 | 1.55k | std::string::size_type oldLen = old_str.size(); |
234 | 1.55k | std::string::size_type newLen = new_str.size(); |
235 | 2.20k | while ((pos = str.find(old_str, pos)) != std::string::npos) { |
236 | 649 | str.replace(pos, oldLen, new_str); |
237 | 649 | pos += newLen; |
238 | 649 | } |
239 | 1.55k | return str; |
240 | 1.55k | } |
241 | 2.05k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_ Line | Count | Source | 197 | 1.06k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { | 198 | 1.06k | if (old_str.empty()) { | 199 | 252 | if constexpr (empty) { | 200 | 252 | return str; | 201 | | } else { | 202 | | // Different from "Replace" only when the search string is empty. | 203 | | // it will insert `new_str` in front of every character and at the end of the old str. | 204 | | if (new_str.empty()) { | 205 | | return str; | 206 | | } | 207 | | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { | 208 | | std::string result; | 209 | | ColumnString::check_chars_length( | 210 | | str.length() * (new_str.length() + 1) + new_str.length(), 0); | 211 | | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 212 | | for (char c : str) { | 213 | | result += new_str; | 214 | | result += c; | 215 | | } | 216 | | result += new_str; | 217 | | return result; | 218 | | } else { | 219 | | std::string result; | 220 | | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 221 | | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { | 222 | | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; | 223 | | result += new_str; | 224 | | result.append(&str[i], utf8_char_len); | 225 | | } | 226 | | result += new_str; | 227 | | ColumnString::check_chars_length(result.size(), 0); | 228 | | return result; | 229 | | } | 230 | | } | 231 | 810 | } else { | 232 | 810 | std::string::size_type pos = 0; | 233 | 810 | std::string::size_type oldLen = old_str.size(); | 234 | 810 | std::string::size_type newLen = new_str.size(); | 235 | 1.16k | while ((pos = str.find(old_str, pos)) != std::string::npos) { | 236 | 357 | str.replace(pos, oldLen, new_str); | 237 | 357 | pos += newLen; | 238 | 357 | } | 239 | 810 | return str; | 240 | 810 | } | 241 | 1.06k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_ Line | Count | Source | 197 | 997 | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { | 198 | 997 | if (old_str.empty()) { | 199 | | if constexpr (empty) { | 200 | | return str; | 201 | 251 | } else { | 202 | | // Different from "Replace" only when the search string is empty. | 203 | | // it will insert `new_str` in front of every character and at the end of the old str. | 204 | 251 | if (new_str.empty()) { | 205 | 59 | return str; | 206 | 59 | } | 207 | 192 | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { | 208 | 190 | std::string result; | 209 | 190 | ColumnString::check_chars_length( | 210 | 190 | str.length() * (new_str.length() + 1) + new_str.length(), 0); | 211 | 190 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 212 | 651 | for (char c : str) { | 213 | 651 | result += new_str; | 214 | 651 | result += c; | 215 | 651 | } | 216 | 190 | result += new_str; | 217 | 190 | return result; | 218 | 190 | } else { | 219 | 2 | std::string result; | 220 | 2 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 221 | 11 | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { | 222 | 9 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; | 223 | 9 | result += new_str; | 224 | 9 | result.append(&str[i], utf8_char_len); | 225 | 9 | } | 226 | 2 | result += new_str; | 227 | 2 | ColumnString::check_chars_length(result.size(), 0); | 228 | 2 | return result; | 229 | 2 | } | 230 | 192 | } | 231 | 746 | } else { | 232 | 746 | std::string::size_type pos = 0; | 233 | 746 | std::string::size_type oldLen = old_str.size(); | 234 | 746 | std::string::size_type newLen = new_str.size(); | 235 | 1.03k | while ((pos = str.find(old_str, pos)) != std::string::npos) { | 236 | 292 | str.replace(pos, oldLen, new_str); | 237 | 292 | pos += newLen; | 238 | 292 | } | 239 | 746 | return str; | 240 | 746 | } | 241 | 997 | } |
|
242 | | }; |
243 | | |
244 | | struct ReverseImpl { |
245 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
246 | 72 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
247 | 72 | auto rows_count = offsets.size(); |
248 | 72 | res_offsets.resize(rows_count); |
249 | 72 | res_data.reserve(data.size()); |
250 | 215 | for (ssize_t i = 0; i < rows_count; ++i) { |
251 | 143 | auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
252 | 143 | int64_t src_len = offsets[i] - offsets[i - 1]; |
253 | 143 | std::string dst; |
254 | 143 | dst.resize(src_len); |
255 | 143 | simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst); |
256 | 143 | StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data, |
257 | 143 | res_offsets); |
258 | 143 | } |
259 | 72 | return Status::OK(); |
260 | 72 | } |
261 | | }; |
262 | | |
263 | | template <typename Impl> |
264 | | class FunctionSubReplace : public IFunction { |
265 | | public: |
266 | | static constexpr auto name = "sub_replace"; |
267 | | |
268 | 89 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv Line | Count | Source | 268 | 39 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } |
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv Line | Count | Source | 268 | 50 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } |
|
269 | | |
270 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev |
271 | | |
272 | 71 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
273 | 71 | return make_nullable(std::make_shared<DataTypeString>()); |
274 | 71 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 272 | 30 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 273 | 30 | return make_nullable(std::make_shared<DataTypeString>()); | 274 | 30 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 272 | 41 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 273 | 41 | return make_nullable(std::make_shared<DataTypeString>()); | 274 | 41 | } |
|
275 | | |
276 | 73 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv Line | Count | Source | 276 | 31 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv Line | Count | Source | 276 | 42 | bool is_variadic() const override { return true; } |
|
277 | | |
278 | 16 | DataTypes get_variadic_argument_types_impl() const override { |
279 | 16 | return Impl::get_variadic_argument_types(); |
280 | 16 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 278 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 279 | 8 | return Impl::get_variadic_argument_types(); | 280 | 8 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv Line | Count | Source | 278 | 8 | DataTypes get_variadic_argument_types_impl() const override { | 279 | 8 | return Impl::get_variadic_argument_types(); | 280 | 8 | } |
|
281 | | |
282 | 0 | size_t get_number_of_arguments() const override { |
283 | 0 | return get_variadic_argument_types_impl().size(); |
284 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv |
285 | | |
286 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
287 | 93 | uint32_t result, size_t input_rows_count) const override { |
288 | 93 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
289 | 93 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 287 | 41 | uint32_t result, size_t input_rows_count) const override { | 288 | 41 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 289 | 41 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 287 | 52 | uint32_t result, size_t input_rows_count) const override { | 288 | 52 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 289 | 52 | } |
|
290 | | }; |
291 | | |
292 | | struct SubReplaceImpl { |
293 | | static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
294 | 94 | size_t input_rows_count) { |
295 | 94 | auto res_column = ColumnString::create(); |
296 | 94 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); |
297 | 94 | auto args_null_map = ColumnUInt8::create(input_rows_count, 0); |
298 | 94 | ColumnPtr argument_columns[4]; |
299 | 94 | bool col_const[4]; |
300 | 470 | for (int i = 0; i < 4; ++i) { |
301 | 376 | std::tie(argument_columns[i], col_const[i]) = |
302 | 376 | unpack_if_const(block.get_by_position(arguments[i]).column); |
303 | 376 | } |
304 | 94 | const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get()); |
305 | 94 | const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get()); |
306 | 94 | const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
307 | 94 | const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get()); |
308 | | |
309 | 94 | std::visit( |
310 | 94 | [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { |
311 | 94 | if (data_column->is_ascii()) { |
312 | 70 | vector_ascii<origin_str_const, new_str_const, start_const, len_const>( |
313 | 70 | data_column, mask_column, start_column->get_data(), |
314 | 70 | length_column->get_data(), args_null_map->get_data(), result_column, |
315 | 70 | input_rows_count); |
316 | 70 | } else { |
317 | 24 | vector_utf8<origin_str_const, new_str_const, start_const, len_const>( |
318 | 24 | data_column, mask_column, start_column->get_data(), |
319 | 24 | length_column->get_data(), args_null_map->get_data(), result_column, |
320 | 24 | input_rows_count); |
321 | 24 | } |
322 | 94 | }, _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 310 | 94 | [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { | 311 | 94 | if (data_column->is_ascii()) { | 312 | 70 | vector_ascii<origin_str_const, new_str_const, start_const, len_const>( | 313 | 70 | data_column, mask_column, start_column->get_data(), | 314 | 70 | length_column->get_data(), args_null_map->get_data(), result_column, | 315 | 70 | input_rows_count); | 316 | 70 | } else { | 317 | 24 | vector_utf8<origin_str_const, new_str_const, start_const, len_const>( | 318 | 24 | data_column, mask_column, start_column->get_data(), | 319 | 24 | length_column->get_data(), args_null_map->get_data(), result_column, | 320 | 24 | input_rows_count); | 321 | 24 | } | 322 | 94 | }, |
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_ |
323 | 94 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
324 | 94 | make_bool_variant(col_const[2]), make_bool_variant(col_const[3])); |
325 | 94 | block.get_by_position(result).column = |
326 | 94 | ColumnNullable::create(std::move(res_column), std::move(args_null_map)); |
327 | 94 | return Status::OK(); |
328 | 94 | } |
329 | | |
330 | | private: |
331 | | template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const> |
332 | | static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column, |
333 | | const PaddedPODArray<Int32>& args_start, |
334 | | const PaddedPODArray<Int32>& args_length, NullMap& args_null_map, |
335 | 70 | ColumnString* result_column, size_t input_rows_count) { |
336 | 70 | ColumnString::Chars& res_chars = result_column->get_chars(); |
337 | 70 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); |
338 | 10.4k | for (size_t row = 0; row < input_rows_count; ++row) { |
339 | 10.3k | StringRef origin_str = |
340 | 10.3k | data_column->get_data_at(index_check_const<origin_str_const>(row)); |
341 | 10.3k | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); |
342 | 10.3k | const auto start = args_start[index_check_const<start_const>(row)]; |
343 | 10.3k | const auto length = args_length[index_check_const<len_const>(row)]; |
344 | 10.3k | const size_t origin_str_len = origin_str.size; |
345 | | //input is null, start < 0, len < 0, str_size <= start. return NULL |
346 | 10.3k | if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) { |
347 | 10.2k | res_offsets.push_back(res_chars.size()); |
348 | 10.2k | args_null_map[row] = 1; |
349 | 10.2k | } else { |
350 | 92 | std::string_view replace_str = new_str.to_string_view(); |
351 | 92 | std::string result = origin_str.to_string(); |
352 | 92 | result.replace(start, length, replace_str); |
353 | 92 | result_column->insert_data(result.data(), result.length()); |
354 | 92 | } |
355 | 10.3k | } |
356 | 70 | } _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Line | Count | Source | 335 | 70 | ColumnString* result_column, size_t input_rows_count) { | 336 | 70 | ColumnString::Chars& res_chars = result_column->get_chars(); | 337 | 70 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); | 338 | 10.4k | for (size_t row = 0; row < input_rows_count; ++row) { | 339 | 10.3k | StringRef origin_str = | 340 | 10.3k | data_column->get_data_at(index_check_const<origin_str_const>(row)); | 341 | 10.3k | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); | 342 | 10.3k | const auto start = args_start[index_check_const<start_const>(row)]; | 343 | 10.3k | const auto length = args_length[index_check_const<len_const>(row)]; | 344 | 10.3k | const size_t origin_str_len = origin_str.size; | 345 | | //input is null, start < 0, len < 0, str_size <= start. return NULL | 346 | 10.3k | if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) { | 347 | 10.2k | res_offsets.push_back(res_chars.size()); | 348 | 10.2k | args_null_map[row] = 1; | 349 | 10.2k | } else { | 350 | 92 | std::string_view replace_str = new_str.to_string_view(); | 351 | 92 | std::string result = origin_str.to_string(); | 352 | 92 | result.replace(start, length, replace_str); | 353 | 92 | result_column->insert_data(result.data(), result.length()); | 354 | 92 | } | 355 | 10.3k | } | 356 | 70 | } |
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m |
357 | | |
358 | | template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const> |
359 | | static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column, |
360 | | const PaddedPODArray<Int32>& args_start, |
361 | | const PaddedPODArray<Int32>& args_length, NullMap& args_null_map, |
362 | 24 | ColumnString* result_column, size_t input_rows_count) { |
363 | 24 | ColumnString::Chars& res_chars = result_column->get_chars(); |
364 | 24 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); |
365 | | |
366 | 48 | for (size_t row = 0; row < input_rows_count; ++row) { |
367 | 24 | StringRef origin_str = |
368 | 24 | data_column->get_data_at(index_check_const<origin_str_const>(row)); |
369 | 24 | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); |
370 | 24 | const auto start = args_start[index_check_const<start_const>(row)]; |
371 | 24 | const auto length = args_length[index_check_const<len_const>(row)]; |
372 | | //input is null, start < 0, len < 0 return NULL |
373 | 24 | if (args_null_map[row] || start < 0 || length < 0) { |
374 | 2 | res_offsets.push_back(res_chars.size()); |
375 | 2 | args_null_map[row] = 1; |
376 | 2 | continue; |
377 | 2 | } |
378 | | |
379 | 22 | const auto [start_byte_len, start_char_len] = |
380 | 22 | simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(), |
381 | 22 | origin_str.end(), start); |
382 | | |
383 | | // start >= orgin.size |
384 | 22 | DCHECK(start_char_len <= start); |
385 | 22 | if (start_byte_len == origin_str.size) { |
386 | 8 | res_offsets.push_back(res_chars.size()); |
387 | 8 | args_null_map[row] = 1; |
388 | 8 | continue; |
389 | 8 | } |
390 | | |
391 | 14 | auto [end_byte_len, end_char_len] = |
392 | 14 | simd::VStringFunctions::iterate_utf8_with_limit_length( |
393 | 14 | origin_str.begin() + start_byte_len, origin_str.end(), length); |
394 | 14 | DCHECK(end_char_len <= length); |
395 | 14 | std::string_view replace_str = new_str.to_string_view(); |
396 | 14 | std::string result = origin_str.to_string(); |
397 | 14 | result.replace(start_byte_len, end_byte_len, replace_str); |
398 | 14 | result_column->insert_data(result.data(), result.length()); |
399 | 14 | } |
400 | 24 | } _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Line | Count | Source | 362 | 24 | ColumnString* result_column, size_t input_rows_count) { | 363 | 24 | ColumnString::Chars& res_chars = result_column->get_chars(); | 364 | 24 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); | 365 | | | 366 | 48 | for (size_t row = 0; row < input_rows_count; ++row) { | 367 | 24 | StringRef origin_str = | 368 | 24 | data_column->get_data_at(index_check_const<origin_str_const>(row)); | 369 | 24 | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); | 370 | 24 | const auto start = args_start[index_check_const<start_const>(row)]; | 371 | 24 | const auto length = args_length[index_check_const<len_const>(row)]; | 372 | | //input is null, start < 0, len < 0 return NULL | 373 | 24 | if (args_null_map[row] || start < 0 || length < 0) { | 374 | 2 | res_offsets.push_back(res_chars.size()); | 375 | 2 | args_null_map[row] = 1; | 376 | 2 | continue; | 377 | 2 | } | 378 | | | 379 | 22 | const auto [start_byte_len, start_char_len] = | 380 | 22 | simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(), | 381 | 22 | origin_str.end(), start); | 382 | | | 383 | | // start >= orgin.size | 384 | 22 | DCHECK(start_char_len <= start); | 385 | 22 | if (start_byte_len == origin_str.size) { | 386 | 8 | res_offsets.push_back(res_chars.size()); | 387 | 8 | args_null_map[row] = 1; | 388 | 8 | continue; | 389 | 8 | } | 390 | | | 391 | 14 | auto [end_byte_len, end_char_len] = | 392 | 14 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 393 | 14 | origin_str.begin() + start_byte_len, origin_str.end(), length); | 394 | | DCHECK(end_char_len <= length); | 395 | 14 | std::string_view replace_str = new_str.to_string_view(); | 396 | 14 | std::string result = origin_str.to_string(); | 397 | 14 | result.replace(start_byte_len, end_byte_len, replace_str); | 398 | 14 | result_column->insert_data(result.data(), result.length()); | 399 | 14 | } | 400 | 24 | } |
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m |
401 | | }; |
402 | | |
403 | | struct SubReplaceThreeImpl { |
404 | 8 | static DataTypes get_variadic_argument_types() { |
405 | 8 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
406 | 8 | std::make_shared<DataTypeInt32>()}; |
407 | 8 | } |
408 | | |
409 | | static Status execute_impl(FunctionContext* context, Block& block, |
410 | | const ColumnNumbers& arguments, uint32_t result, |
411 | 41 | size_t input_rows_count) { |
412 | 41 | auto params = ColumnInt32::create(input_rows_count); |
413 | 41 | auto& strlen_data = params->get_data(); |
414 | | |
415 | 41 | auto str_col = |
416 | 41 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
417 | 41 | if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { |
418 | 0 | str_col = nullable->get_nested_column_ptr(); |
419 | 0 | } |
420 | 41 | const auto* str_column = assert_cast<const ColumnString*>(str_col.get()); |
421 | | // use utf8 len |
422 | 116 | for (int i = 0; i < input_rows_count; ++i) { |
423 | 75 | StringRef str_ref = str_column->get_data_at(i); |
424 | 75 | strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size); |
425 | 75 | } |
426 | | |
427 | 41 | block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"}); |
428 | 41 | ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2], |
429 | 41 | block.columns() - 1}; |
430 | 41 | return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count); |
431 | 41 | } |
432 | | }; |
433 | | |
434 | | struct SubReplaceFourImpl { |
435 | 8 | static DataTypes get_variadic_argument_types() { |
436 | 8 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
437 | 8 | std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()}; |
438 | 8 | } |
439 | | |
440 | | static Status execute_impl(FunctionContext* context, Block& block, |
441 | | const ColumnNumbers& arguments, uint32_t result, |
442 | 52 | size_t input_rows_count) { |
443 | 52 | return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count); |
444 | 52 | } |
445 | | }; |
446 | | |
447 | | class FunctionOverlay : public IFunction { |
448 | | public: |
449 | | static constexpr auto name = "overlay"; |
450 | 179 | static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); } |
451 | 1 | String get_name() const override { return name; } |
452 | 170 | size_t get_number_of_arguments() const override { return 4; } |
453 | | |
454 | 170 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
455 | 170 | return std::make_shared<DataTypeString>(); |
456 | 170 | } |
457 | | |
458 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
459 | 154 | uint32_t result, size_t input_rows_count) const override { |
460 | 154 | DCHECK_EQ(arguments.size(), 4); |
461 | | |
462 | 154 | bool col_const[4]; |
463 | 154 | ColumnPtr argument_columns[4]; |
464 | 770 | for (int i = 0; i < 4; ++i) { |
465 | 616 | std::tie(argument_columns[i], col_const[i]) = |
466 | 616 | unpack_if_const(block.get_by_position(arguments[i]).column); |
467 | 616 | } |
468 | | |
469 | 154 | const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get()); |
470 | | |
471 | 154 | const auto* col_pos = |
472 | 154 | assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data(); |
473 | 154 | const auto* col_len = |
474 | 154 | assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data(); |
475 | 154 | const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get()); |
476 | | |
477 | 154 | ColumnString::MutablePtr col_res = ColumnString::create(); |
478 | | |
479 | | // if all input string is ascii, we can use ascii function to handle it |
480 | 154 | const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii(); |
481 | 154 | std::visit( |
482 | 154 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { |
483 | 154 | if (is_all_ascii) { |
484 | 79 | vector_ascii<origin_const, pos_const, len_const, insert_const>( |
485 | 79 | col_origin, col_pos, col_len, col_insert, col_res, |
486 | 79 | input_rows_count); |
487 | 79 | } else { |
488 | 75 | vector_utf8<origin_const, pos_const, len_const, insert_const>( |
489 | 75 | col_origin, col_pos, col_len, col_insert, col_res, |
490 | 75 | input_rows_count); |
491 | 75 | } |
492 | 154 | }, _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_ Line | Count | Source | 482 | 139 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 483 | 139 | if (is_all_ascii) { | 484 | 72 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 485 | 72 | col_origin, col_pos, col_len, col_insert, col_res, | 486 | 72 | input_rows_count); | 487 | 72 | } else { | 488 | 67 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 489 | 67 | col_origin, col_pos, col_len, col_insert, col_res, | 490 | 67 | input_rows_count); | 491 | 67 | } | 492 | 139 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_ Line | Count | Source | 482 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 483 | 1 | if (is_all_ascii) { | 484 | 0 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 485 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 486 | 0 | input_rows_count); | 487 | 1 | } else { | 488 | 1 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 489 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 490 | 1 | input_rows_count); | 491 | 1 | } | 492 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_ Line | Count | Source | 482 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 483 | 1 | if (is_all_ascii) { | 484 | 0 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 485 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 486 | 0 | input_rows_count); | 487 | 1 | } else { | 488 | 1 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 489 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 490 | 1 | input_rows_count); | 491 | 1 | } | 492 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_ Line | Count | Source | 482 | 12 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 483 | 12 | if (is_all_ascii) { | 484 | 6 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 485 | 6 | col_origin, col_pos, col_len, col_insert, col_res, | 486 | 6 | input_rows_count); | 487 | 6 | } else { | 488 | 6 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 489 | 6 | col_origin, col_pos, col_len, col_insert, col_res, | 490 | 6 | input_rows_count); | 491 | 6 | } | 492 | 12 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_ Line | Count | Source | 482 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 483 | 1 | if (is_all_ascii) { | 484 | 1 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 485 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 486 | 1 | input_rows_count); | 487 | 1 | } else { | 488 | 0 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 489 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 490 | 0 | input_rows_count); | 491 | 0 | } | 492 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_ |
493 | 154 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
494 | 154 | make_bool_variant(col_const[2]), make_bool_variant(col_const[3])); |
495 | 154 | block.replace_by_position(result, std::move(col_res)); |
496 | 154 | return Status::OK(); |
497 | 154 | } |
498 | | |
499 | | private: |
500 | | template <bool origin_const, bool pos_const, bool len_const, bool insert_const> |
501 | | static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len, |
502 | | const ColumnString* col_insert, ColumnString::MutablePtr& col_res, |
503 | 79 | size_t input_rows_count) { |
504 | 79 | auto& col_res_chars = col_res->get_chars(); |
505 | 79 | auto& col_res_offsets = col_res->get_offsets(); |
506 | 79 | StringRef origin_str, insert_str; |
507 | 187 | for (size_t i = 0; i < input_rows_count; i++) { |
508 | 108 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); |
509 | | // pos is 1-based index,so we need to minus 1 |
510 | 108 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; |
511 | 108 | const auto len = col_len[index_check_const<len_const>(i)]; |
512 | 108 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); |
513 | 108 | const auto origin_size = origin_str.size; |
514 | 108 | if (pos >= origin_size || pos < 0) { |
515 | | // If pos is not within the length of the string, the original string is returned. |
516 | 26 | col_res->insert_data(origin_str.data, origin_str.size); |
517 | 26 | continue; |
518 | 26 | } |
519 | 82 | col_res_chars.insert(origin_str.data, |
520 | 82 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 |
521 | 82 | if (pos + len > origin_size || len < 0) { |
522 | 15 | col_res_chars.insert(insert_str.begin(), |
523 | 15 | insert_str.end()); // copy all of insert_str. |
524 | 67 | } else { |
525 | 67 | col_res_chars.insert(insert_str.begin(), |
526 | 67 | insert_str.end()); // copy all of insert_str. |
527 | 67 | col_res_chars.insert( |
528 | 67 | origin_str.data + pos + len, |
529 | 67 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. |
530 | 67 | } |
531 | 82 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); |
532 | 82 | col_res_offsets.push_back(col_res_chars.size()); |
533 | 82 | } |
534 | 79 | } _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 503 | 72 | size_t input_rows_count) { | 504 | 72 | auto& col_res_chars = col_res->get_chars(); | 505 | 72 | auto& col_res_offsets = col_res->get_offsets(); | 506 | 72 | StringRef origin_str, insert_str; | 507 | 144 | for (size_t i = 0; i < input_rows_count; i++) { | 508 | 72 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 509 | | // pos is 1-based index,so we need to minus 1 | 510 | 72 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 511 | 72 | const auto len = col_len[index_check_const<len_const>(i)]; | 512 | 72 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 513 | 72 | const auto origin_size = origin_str.size; | 514 | 72 | if (pos >= origin_size || pos < 0) { | 515 | | // If pos is not within the length of the string, the original string is returned. | 516 | 18 | col_res->insert_data(origin_str.data, origin_str.size); | 517 | 18 | continue; | 518 | 18 | } | 519 | 54 | col_res_chars.insert(origin_str.data, | 520 | 54 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 521 | 54 | if (pos + len > origin_size || len < 0) { | 522 | 11 | col_res_chars.insert(insert_str.begin(), | 523 | 11 | insert_str.end()); // copy all of insert_str. | 524 | 43 | } else { | 525 | 43 | col_res_chars.insert(insert_str.begin(), | 526 | 43 | insert_str.end()); // copy all of insert_str. | 527 | 43 | col_res_chars.insert( | 528 | 43 | origin_str.data + pos + len, | 529 | 43 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 530 | 43 | } | 531 | 54 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 532 | 54 | col_res_offsets.push_back(col_res_chars.size()); | 533 | 54 | } | 534 | 72 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 503 | 6 | size_t input_rows_count) { | 504 | 6 | auto& col_res_chars = col_res->get_chars(); | 505 | 6 | auto& col_res_offsets = col_res->get_offsets(); | 506 | 6 | StringRef origin_str, insert_str; | 507 | 12 | for (size_t i = 0; i < input_rows_count; i++) { | 508 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 509 | | // pos is 1-based index,so we need to minus 1 | 510 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 511 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; | 512 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 513 | 6 | const auto origin_size = origin_str.size; | 514 | 6 | if (pos >= origin_size || pos < 0) { | 515 | | // If pos is not within the length of the string, the original string is returned. | 516 | 3 | col_res->insert_data(origin_str.data, origin_str.size); | 517 | 3 | continue; | 518 | 3 | } | 519 | 3 | col_res_chars.insert(origin_str.data, | 520 | 3 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 521 | 3 | if (pos + len > origin_size || len < 0) { | 522 | 1 | col_res_chars.insert(insert_str.begin(), | 523 | 1 | insert_str.end()); // copy all of insert_str. | 524 | 2 | } else { | 525 | 2 | col_res_chars.insert(insert_str.begin(), | 526 | 2 | insert_str.end()); // copy all of insert_str. | 527 | 2 | col_res_chars.insert( | 528 | 2 | origin_str.data + pos + len, | 529 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 530 | 2 | } | 531 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 532 | 3 | col_res_offsets.push_back(col_res_chars.size()); | 533 | 3 | } | 534 | 6 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 503 | 1 | size_t input_rows_count) { | 504 | 1 | auto& col_res_chars = col_res->get_chars(); | 505 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 506 | 1 | StringRef origin_str, insert_str; | 507 | 31 | for (size_t i = 0; i < input_rows_count; i++) { | 508 | 30 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 509 | | // pos is 1-based index,so we need to minus 1 | 510 | 30 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 511 | 30 | const auto len = col_len[index_check_const<len_const>(i)]; | 512 | 30 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 513 | 30 | const auto origin_size = origin_str.size; | 514 | 30 | if (pos >= origin_size || pos < 0) { | 515 | | // If pos is not within the length of the string, the original string is returned. | 516 | 5 | col_res->insert_data(origin_str.data, origin_str.size); | 517 | 5 | continue; | 518 | 5 | } | 519 | 25 | col_res_chars.insert(origin_str.data, | 520 | 25 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 521 | 25 | if (pos + len > origin_size || len < 0) { | 522 | 3 | col_res_chars.insert(insert_str.begin(), | 523 | 3 | insert_str.end()); // copy all of insert_str. | 524 | 22 | } else { | 525 | 22 | col_res_chars.insert(insert_str.begin(), | 526 | 22 | insert_str.end()); // copy all of insert_str. | 527 | 22 | col_res_chars.insert( | 528 | 22 | origin_str.data + pos + len, | 529 | 22 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 530 | 22 | } | 531 | 25 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 532 | 25 | col_res_offsets.push_back(col_res_chars.size()); | 533 | 25 | } | 534 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm |
535 | | |
536 | | template <bool origin_const, bool pos_const, bool len_const, bool insert_const> |
537 | | NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin, |
538 | | int const* col_pos, int const* col_len, |
539 | | const ColumnString* col_insert, |
540 | | ColumnString::MutablePtr& col_res, |
541 | 75 | size_t input_rows_count) { |
542 | 75 | auto& col_res_chars = col_res->get_chars(); |
543 | 75 | auto& col_res_offsets = col_res->get_offsets(); |
544 | 75 | StringRef origin_str, insert_str; |
545 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. |
546 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. |
547 | 75 | std::vector<size_t> utf8_origin_offsets; |
548 | 252 | for (size_t i = 0; i < input_rows_count; i++) { |
549 | 177 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); |
550 | | // pos is 1-based index,so we need to minus 1 |
551 | 177 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; |
552 | 177 | const auto len = col_len[index_check_const<len_const>(i)]; |
553 | 177 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); |
554 | 177 | utf8_origin_offsets.clear(); |
555 | | |
556 | 1.10k | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { |
557 | 924 | utf8_origin_offsets.push_back(ni); |
558 | 924 | char_size = get_utf8_byte_length(origin_str.data[ni]); |
559 | 924 | } |
560 | | |
561 | 177 | const size_t utf8_origin_size = utf8_origin_offsets.size(); |
562 | | |
563 | 177 | if (pos >= utf8_origin_size || pos < 0) { |
564 | | // If pos is not within the length of the string, the original string is returned. |
565 | 38 | col_res->insert_data(origin_str.data, origin_str.size); |
566 | 38 | continue; |
567 | 38 | } |
568 | 139 | col_res_chars.insert( |
569 | 139 | origin_str.data, |
570 | 139 | origin_str.data + |
571 | 139 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 |
572 | 139 | if (pos + len >= utf8_origin_size || len < 0) { |
573 | 35 | col_res_chars.insert(insert_str.begin(), |
574 | 35 | insert_str.end()); // copy all of insert_str. |
575 | 104 | } else { |
576 | 104 | col_res_chars.insert(insert_str.begin(), |
577 | 104 | insert_str.end()); // copy all of insert_str. |
578 | 104 | col_res_chars.insert( |
579 | 104 | origin_str.data + utf8_origin_offsets[pos + len], |
580 | 104 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. |
581 | 104 | } |
582 | 139 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); |
583 | 139 | col_res_offsets.push_back(col_res_chars.size()); |
584 | 139 | } |
585 | 75 | } _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 541 | 67 | size_t input_rows_count) { | 542 | 67 | auto& col_res_chars = col_res->get_chars(); | 543 | 67 | auto& col_res_offsets = col_res->get_offsets(); | 544 | 67 | StringRef origin_str, insert_str; | 545 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 546 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 547 | 67 | std::vector<size_t> utf8_origin_offsets; | 548 | 178 | for (size_t i = 0; i < input_rows_count; i++) { | 549 | 111 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 550 | | // pos is 1-based index,so we need to minus 1 | 551 | 111 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 552 | 111 | const auto len = col_len[index_check_const<len_const>(i)]; | 553 | 111 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 554 | 111 | utf8_origin_offsets.clear(); | 555 | | | 556 | 639 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 557 | 528 | utf8_origin_offsets.push_back(ni); | 558 | 528 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 559 | 528 | } | 560 | | | 561 | 111 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 562 | | | 563 | 111 | if (pos >= utf8_origin_size || pos < 0) { | 564 | | // If pos is not within the length of the string, the original string is returned. | 565 | 22 | col_res->insert_data(origin_str.data, origin_str.size); | 566 | 22 | continue; | 567 | 22 | } | 568 | 89 | col_res_chars.insert( | 569 | 89 | origin_str.data, | 570 | 89 | origin_str.data + | 571 | 89 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 572 | 89 | if (pos + len >= utf8_origin_size || len < 0) { | 573 | 23 | col_res_chars.insert(insert_str.begin(), | 574 | 23 | insert_str.end()); // copy all of insert_str. | 575 | 66 | } else { | 576 | 66 | col_res_chars.insert(insert_str.begin(), | 577 | 66 | insert_str.end()); // copy all of insert_str. | 578 | 66 | col_res_chars.insert( | 579 | 66 | origin_str.data + utf8_origin_offsets[pos + len], | 580 | 66 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 581 | 66 | } | 582 | 89 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 583 | 89 | col_res_offsets.push_back(col_res_chars.size()); | 584 | 89 | } | 585 | 67 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 541 | 1 | size_t input_rows_count) { | 542 | 1 | auto& col_res_chars = col_res->get_chars(); | 543 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 544 | 1 | StringRef origin_str, insert_str; | 545 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 546 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 547 | 1 | std::vector<size_t> utf8_origin_offsets; | 548 | 31 | for (size_t i = 0; i < input_rows_count; i++) { | 549 | 30 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 550 | | // pos is 1-based index,so we need to minus 1 | 551 | 30 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 552 | 30 | const auto len = col_len[index_check_const<len_const>(i)]; | 553 | 30 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 554 | 30 | utf8_origin_offsets.clear(); | 555 | | | 556 | 209 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 557 | 179 | utf8_origin_offsets.push_back(ni); | 558 | 179 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 559 | 179 | } | 560 | | | 561 | 30 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 562 | | | 563 | 30 | if (pos >= utf8_origin_size || pos < 0) { | 564 | | // If pos is not within the length of the string, the original string is returned. | 565 | 9 | col_res->insert_data(origin_str.data, origin_str.size); | 566 | 9 | continue; | 567 | 9 | } | 568 | 21 | col_res_chars.insert( | 569 | 21 | origin_str.data, | 570 | 21 | origin_str.data + | 571 | 21 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 572 | 21 | if (pos + len >= utf8_origin_size || len < 0) { | 573 | 3 | col_res_chars.insert(insert_str.begin(), | 574 | 3 | insert_str.end()); // copy all of insert_str. | 575 | 18 | } else { | 576 | 18 | col_res_chars.insert(insert_str.begin(), | 577 | 18 | insert_str.end()); // copy all of insert_str. | 578 | 18 | col_res_chars.insert( | 579 | 18 | origin_str.data + utf8_origin_offsets[pos + len], | 580 | 18 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 581 | 18 | } | 582 | 21 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 583 | 21 | col_res_offsets.push_back(col_res_chars.size()); | 584 | 21 | } | 585 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 541 | 1 | size_t input_rows_count) { | 542 | 1 | auto& col_res_chars = col_res->get_chars(); | 543 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 544 | 1 | StringRef origin_str, insert_str; | 545 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 546 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 547 | 1 | std::vector<size_t> utf8_origin_offsets; | 548 | 31 | for (size_t i = 0; i < input_rows_count; i++) { | 549 | 30 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 550 | | // pos is 1-based index,so we need to minus 1 | 551 | 30 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 552 | 30 | const auto len = col_len[index_check_const<len_const>(i)]; | 553 | 30 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 554 | 30 | utf8_origin_offsets.clear(); | 555 | | | 556 | 209 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 557 | 179 | utf8_origin_offsets.push_back(ni); | 558 | 179 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 559 | 179 | } | 560 | | | 561 | 30 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 562 | | | 563 | 30 | if (pos >= utf8_origin_size || pos < 0) { | 564 | | // If pos is not within the length of the string, the original string is returned. | 565 | 4 | col_res->insert_data(origin_str.data, origin_str.size); | 566 | 4 | continue; | 567 | 4 | } | 568 | 26 | col_res_chars.insert( | 569 | 26 | origin_str.data, | 570 | 26 | origin_str.data + | 571 | 26 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 572 | 26 | if (pos + len >= utf8_origin_size || len < 0) { | 573 | 8 | col_res_chars.insert(insert_str.begin(), | 574 | 8 | insert_str.end()); // copy all of insert_str. | 575 | 18 | } else { | 576 | 18 | col_res_chars.insert(insert_str.begin(), | 577 | 18 | insert_str.end()); // copy all of insert_str. | 578 | 18 | col_res_chars.insert( | 579 | 18 | origin_str.data + utf8_origin_offsets[pos + len], | 580 | 18 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 581 | 18 | } | 582 | 26 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 583 | 26 | col_res_offsets.push_back(col_res_chars.size()); | 584 | 26 | } | 585 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 541 | 6 | size_t input_rows_count) { | 542 | 6 | auto& col_res_chars = col_res->get_chars(); | 543 | 6 | auto& col_res_offsets = col_res->get_offsets(); | 544 | 6 | StringRef origin_str, insert_str; | 545 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 546 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 547 | 6 | std::vector<size_t> utf8_origin_offsets; | 548 | 12 | for (size_t i = 0; i < input_rows_count; i++) { | 549 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 550 | | // pos is 1-based index,so we need to minus 1 | 551 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 552 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; | 553 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 554 | 6 | utf8_origin_offsets.clear(); | 555 | | | 556 | 44 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 557 | 38 | utf8_origin_offsets.push_back(ni); | 558 | 38 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 559 | 38 | } | 560 | | | 561 | 6 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 562 | | | 563 | 6 | if (pos >= utf8_origin_size || pos < 0) { | 564 | | // If pos is not within the length of the string, the original string is returned. | 565 | 3 | col_res->insert_data(origin_str.data, origin_str.size); | 566 | 3 | continue; | 567 | 3 | } | 568 | 3 | col_res_chars.insert( | 569 | 3 | origin_str.data, | 570 | 3 | origin_str.data + | 571 | 3 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 572 | 3 | if (pos + len >= utf8_origin_size || len < 0) { | 573 | 1 | col_res_chars.insert(insert_str.begin(), | 574 | 1 | insert_str.end()); // copy all of insert_str. | 575 | 2 | } else { | 576 | 2 | col_res_chars.insert(insert_str.begin(), | 577 | 2 | insert_str.end()); // copy all of insert_str. | 578 | 2 | col_res_chars.insert( | 579 | 2 | origin_str.data + utf8_origin_offsets[pos + len], | 580 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 581 | 2 | } | 582 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 583 | 3 | col_res_offsets.push_back(col_res_chars.size()); | 584 | 3 | } | 585 | 6 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm |
586 | | }; |
587 | | |
588 | | #include "common/compile_check_avoid_end.h" |
589 | | } // namespace doris |