Coverage Report

Created: 2026-04-18 09:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_replace.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <cstring>
22
#include <string>
23
#include <string_view>
24
25
#include "common/compiler_util.h"
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_numbers.h"
30
#include "core/column/column_const.h"
31
#include "core/column/column_nullable.h"
32
#include "core/column/column_string.h"
33
#include "core/column/column_vector.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_number.h"
36
#include "core/data_type/data_type_string.h"
37
#include "core/string_ref.h"
38
#include "exec/common/string_searcher.h"
39
#include "exec/common/stringop_substring.h"
40
#include "exec/common/template_helpers.hpp"
41
#include "exprs/function/function.h"
42
#include "exprs/function/function_helpers.h"
43
#include "exprs/function_context.h"
44
#include "util/simd/vstring_function.h"
45
46
namespace doris {
47
#include "common/compile_check_avoid_begin.h"
48
49
struct ReplaceImpl {
50
    static constexpr auto name = "replace";
51
};
52
53
struct ReplaceEmptyImpl {
54
    static constexpr auto name = "replace_empty";
55
};
56
57
template <typename Impl, bool empty>
58
class FunctionReplace : public IFunction {
59
public:
60
    static constexpr auto name = Impl::name;
61
6.23k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv
Line
Count
Source
61
4.70k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv
Line
Count
Source
61
1.52k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
62
2
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev
Line
Count
Source
62
1
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev
Line
Count
Source
62
1
    String get_name() const override { return name; }
63
6.21k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv
Line
Count
Source
63
4.69k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv
Line
Count
Source
63
1.52k
    size_t get_number_of_arguments() const override { return 3; }
64
65
6.21k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
66
6.21k
        return std::make_shared<DataTypeString>();
67
6.21k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
65
4.69k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
66
4.69k
        return std::make_shared<DataTypeString>();
67
4.69k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
65
1.52k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
66
1.52k
        return std::make_shared<DataTypeString>();
67
1.52k
    }
68
69
16
    DataTypes get_variadic_argument_types_impl() const override {
70
16
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
71
16
                std::make_shared<DataTypeString>()};
72
16
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv
Line
Count
Source
69
8
    DataTypes get_variadic_argument_types_impl() const override {
70
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
71
8
                std::make_shared<DataTypeString>()};
72
8
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv
Line
Count
Source
69
8
    DataTypes get_variadic_argument_types_impl() const override {
70
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
71
8
                std::make_shared<DataTypeString>()};
72
8
    }
73
74
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
75
4.85k
                        uint32_t result, size_t input_rows_count) const override {
76
        // We need a local variable to hold a reference to the converted column.
77
        // So that the converted column will not be released before we use it.
78
4.85k
        ColumnPtr col[3];
79
4.85k
        bool col_const[3];
80
19.4k
        for (size_t i = 0; i < 3; ++i) {
81
14.5k
            std::tie(col[i], col_const[i]) =
82
14.5k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
83
14.5k
        }
84
85
4.85k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
86
4.85k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
87
4.85k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
88
89
4.85k
        ColumnString::MutablePtr col_res = ColumnString::create();
90
91
        // Fast path: when old_str and new_str are both constant and old_str is
92
        // non-empty (the common case for replace(col, 'literal', 'literal')).
93
        // Works directly on ColumnString chars/offsets to avoid per-row
94
        // std::string allocation and copy overhead.
95
        // Applies to both replace (empty=true) and replace_empty (empty=false):
96
        // when old_str is non-empty the two variants behave identically.
97
4.85k
        if (col_const[1] && col_const[2]) {
98
3.31k
            StringRef old_ref = col_old_str->get_data_at(0);
99
3.31k
            StringRef new_ref = col_new_str->get_data_at(0);
100
3.31k
            if (old_ref.size > 0) {
101
3.26k
                _replace_const_pattern(*col_origin_str, old_ref, new_ref, *col_res,
102
3.26k
                                       input_rows_count, col_const[0]);
103
3.26k
                block.replace_by_position(result, std::move(col_res));
104
3.26k
                return Status::OK();
105
3.26k
            }
106
3.31k
        }
107
108
1.59k
        std::visit(
109
1.59k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
3.65k
                    for (int i = 0; i < input_rows_count; ++i) {
111
2.05k
                        StringRef origin_str =
112
2.05k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
2.05k
                        StringRef old_str =
114
2.05k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
2.05k
                        StringRef new_str =
116
2.05k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
2.05k
                        std::string result =
119
2.05k
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
2.05k
                                        new_str.to_string_view());
121
122
2.05k
                        col_res->insert_data(result.data(), result.length());
123
2.05k
                    }
124
1.59k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
109
163
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
575
                    for (int i = 0; i < input_rows_count; ++i) {
111
412
                        StringRef origin_str =
112
412
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
412
                        StringRef old_str =
114
412
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
412
                        StringRef new_str =
116
412
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
412
                        std::string result =
119
412
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
412
                                        new_str.to_string_view());
121
122
412
                        col_res->insert_data(result.data(), result.length());
123
412
                    }
124
163
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
109
25
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
50
                    for (int i = 0; i < input_rows_count; ++i) {
111
25
                        StringRef origin_str =
112
25
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
25
                        StringRef old_str =
114
25
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
25
                        StringRef new_str =
116
25
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
25
                        std::string result =
119
25
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
25
                                        new_str.to_string_view());
121
122
25
                        col_res->insert_data(result.data(), result.length());
123
25
                    }
124
25
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
109
132
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
479
                    for (int i = 0; i < input_rows_count; ++i) {
111
347
                        StringRef origin_str =
112
347
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
347
                        StringRef old_str =
114
347
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
347
                        StringRef new_str =
116
347
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
347
                        std::string result =
119
347
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
347
                                        new_str.to_string_view());
121
122
347
                        col_res->insert_data(result.data(), result.length());
123
347
                    }
124
132
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
109
25
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
50
                    for (int i = 0; i < input_rows_count; ++i) {
111
25
                        StringRef origin_str =
112
25
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
25
                        StringRef old_str =
114
25
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
25
                        StringRef new_str =
116
25
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
25
                        std::string result =
119
25
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
25
                                        new_str.to_string_view());
121
122
25
                        col_res->insert_data(result.data(), result.length());
123
25
                    }
124
25
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
109
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
250
                    for (int i = 0; i < input_rows_count; ++i) {
111
125
                        StringRef origin_str =
112
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
125
                        StringRef old_str =
114
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
125
                        StringRef new_str =
116
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
125
                        std::string result =
119
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
125
                                        new_str.to_string_view());
121
122
125
                        col_res->insert_data(result.data(), result.length());
123
125
                    }
124
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
125
1.59k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
126
1.59k
                make_bool_variant(col_const[2]));
127
128
1.59k
        block.replace_by_position(result, std::move(col_res));
129
1.59k
        return Status::OK();
130
4.85k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
75
3.97k
                        uint32_t result, size_t input_rows_count) const override {
76
        // We need a local variable to hold a reference to the converted column.
77
        // So that the converted column will not be released before we use it.
78
3.97k
        ColumnPtr col[3];
79
3.97k
        bool col_const[3];
80
15.8k
        for (size_t i = 0; i < 3; ++i) {
81
11.9k
            std::tie(col[i], col_const[i]) =
82
11.9k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
83
11.9k
        }
84
85
3.97k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
86
3.97k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
87
3.97k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
88
89
3.97k
        ColumnString::MutablePtr col_res = ColumnString::create();
90
91
        // Fast path: when old_str and new_str are both constant and old_str is
92
        // non-empty (the common case for replace(col, 'literal', 'literal')).
93
        // Works directly on ColumnString chars/offsets to avoid per-row
94
        // std::string allocation and copy overhead.
95
        // Applies to both replace (empty=true) and replace_empty (empty=false):
96
        // when old_str is non-empty the two variants behave identically.
97
3.97k
        if (col_const[1] && col_const[2]) {
98
3.18k
            StringRef old_ref = col_old_str->get_data_at(0);
99
3.18k
            StringRef new_ref = col_new_str->get_data_at(0);
100
3.18k
            if (old_ref.size > 0) {
101
3.16k
                _replace_const_pattern(*col_origin_str, old_ref, new_ref, *col_res,
102
3.16k
                                       input_rows_count, col_const[0]);
103
3.16k
                block.replace_by_position(result, std::move(col_res));
104
3.16k
                return Status::OK();
105
3.16k
            }
106
3.18k
        }
107
108
813
        std::visit(
109
813
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
813
                    for (int i = 0; i < input_rows_count; ++i) {
111
813
                        StringRef origin_str =
112
813
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
813
                        StringRef old_str =
114
813
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
813
                        StringRef new_str =
116
813
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
813
                        std::string result =
119
813
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
813
                                        new_str.to_string_view());
121
122
813
                        col_res->insert_data(result.data(), result.length());
123
813
                    }
124
813
                },
125
813
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
126
813
                make_bool_variant(col_const[2]));
127
128
813
        block.replace_by_position(result, std::move(col_res));
129
813
        return Status::OK();
130
3.97k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
75
882
                        uint32_t result, size_t input_rows_count) const override {
76
        // We need a local variable to hold a reference to the converted column.
77
        // So that the converted column will not be released before we use it.
78
882
        ColumnPtr col[3];
79
882
        bool col_const[3];
80
3.52k
        for (size_t i = 0; i < 3; ++i) {
81
2.64k
            std::tie(col[i], col_const[i]) =
82
2.64k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
83
2.64k
        }
84
85
882
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
86
882
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
87
882
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
88
89
882
        ColumnString::MutablePtr col_res = ColumnString::create();
90
91
        // Fast path: when old_str and new_str are both constant and old_str is
92
        // non-empty (the common case for replace(col, 'literal', 'literal')).
93
        // Works directly on ColumnString chars/offsets to avoid per-row
94
        // std::string allocation and copy overhead.
95
        // Applies to both replace (empty=true) and replace_empty (empty=false):
96
        // when old_str is non-empty the two variants behave identically.
97
882
        if (col_const[1] && col_const[2]) {
98
125
            StringRef old_ref = col_old_str->get_data_at(0);
99
125
            StringRef new_ref = col_new_str->get_data_at(0);
100
125
            if (old_ref.size > 0) {
101
100
                _replace_const_pattern(*col_origin_str, old_ref, new_ref, *col_res,
102
100
                                       input_rows_count, col_const[0]);
103
100
                block.replace_by_position(result, std::move(col_res));
104
100
                return Status::OK();
105
100
            }
106
125
        }
107
108
782
        std::visit(
109
782
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
110
782
                    for (int i = 0; i < input_rows_count; ++i) {
111
782
                        StringRef origin_str =
112
782
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
113
782
                        StringRef old_str =
114
782
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
115
782
                        StringRef new_str =
116
782
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
117
118
782
                        std::string result =
119
782
                                replace(origin_str.to_string(), old_str.to_string_view(),
120
782
                                        new_str.to_string_view());
121
122
782
                        col_res->insert_data(result.data(), result.length());
123
782
                    }
124
782
                },
125
782
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
126
782
                make_bool_variant(col_const[2]));
127
128
782
        block.replace_by_position(result, std::move(col_res));
129
782
        return Status::OK();
130
882
    }
131
132
private:
133
    // Optimized replace path for constant old_str (non-empty) and constant new_str.
134
    // Avoids per-row std::string allocation by working directly on ColumnString
135
    // chars/offsets.  Two-level search strategy:
136
    //  1. memchr (glibc AVX512) scans for the needle's first byte.  If absent,
137
    //     the row is guaranteed no-match and is bulk-copied with a single memcpy.
138
    //  2. When the first byte is present, ASCIICaseSensitiveStringSearcher
139
    //     (SSE4.1, prebuilt once outside the row loop) does the full needle scan.
140
    static void _replace_const_pattern(const ColumnString& src, StringRef old_ref,
141
                                       StringRef new_ref, ColumnString& dst,
142
3.26k
                                       size_t input_rows_count, bool src_const) {
143
3.26k
        auto& dst_chars = dst.get_chars();
144
3.26k
        auto& dst_offsets = dst.get_offsets();
145
146
3.26k
        dst_chars.reserve(src_const ? (src.get_data_at(0).size * input_rows_count)
147
3.26k
                                    : src.get_chars().size());
148
3.26k
        dst_offsets.resize(input_rows_count);
149
150
        // Build SSE4.1 searcher once — first+second byte masks precomputed here.
151
3.26k
        ASCIICaseSensitiveStringSearcher searcher(old_ref.data, old_ref.size);
152
3.26k
        const size_t needle_size = old_ref.size;
153
3.26k
        const size_t replacement_size = new_ref.size;
154
3.26k
        const char* replacement_data = new_ref.data;
155
3.26k
        const auto needle_first = static_cast<unsigned char>(old_ref.data[0]);
156
157
12.9k
        for (size_t i = 0; i < input_rows_count; ++i) {
158
9.72k
            StringRef row = src.get_data_at(src_const ? 0 : i);
159
9.72k
            const char* const row_end = row.data + row.size;
160
161
            // Level-1: memchr for needle's first byte (glibc uses AVX512 internally).
162
            // If the first byte is absent the entire row cannot contain the needle;
163
            // bulk-copy it and move to the next row without entering the SSE4.1 loop.
164
9.72k
            if (memchr(row.data, needle_first, row.size) == nullptr) {
165
9.32k
                StringOP::push_value_string({row.data, row.size}, i, dst_chars, dst_offsets);
166
9.32k
                continue;
167
9.32k
            }
168
169
            // Level-2: SSE4.1 searcher handles needle matching for this row.
170
397
            const char* pos = row.data;
171
1.14k
            while (pos < row_end) {
172
1.09k
                const char* match = searcher.search(pos, row_end);
173
                // Copy prefix before match
174
1.09k
                size_t prefix_len = static_cast<size_t>(match - pos);
175
1.09k
                if (prefix_len > 0) {
176
1.03k
                    size_t old_size = dst_chars.size();
177
1.03k
                    ColumnString::check_chars_length(old_size + prefix_len, i + 1);
178
1.03k
                    dst_chars.resize(old_size + prefix_len);
179
1.03k
                    memcpy(&dst_chars[old_size], pos, prefix_len);
180
1.03k
                }
181
1.09k
                if (match == row_end) {
182
343
                    break;
183
343
                }
184
                // Copy replacement
185
750
                if (replacement_size > 0) {
186
734
                    size_t old_size = dst_chars.size();
187
734
                    ColumnString::check_chars_length(old_size + replacement_size, i + 1);
188
734
                    dst_chars.resize(old_size + replacement_size);
189
734
                    memcpy(&dst_chars[old_size], replacement_data, replacement_size);
190
734
                }
191
750
                pos = match + needle_size;
192
750
            }
193
397
            StringOP::push_empty_string(i, dst_chars, dst_offsets);
194
397
        }
195
3.26k
    }
_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE22_replace_const_patternERKNS_9ColumnStrIjEENS_9StringRefES7_RS4_mb
Line
Count
Source
142
3.16k
                                       size_t input_rows_count, bool src_const) {
143
3.16k
        auto& dst_chars = dst.get_chars();
144
3.16k
        auto& dst_offsets = dst.get_offsets();
145
146
3.16k
        dst_chars.reserve(src_const ? (src.get_data_at(0).size * input_rows_count)
147
3.16k
                                    : src.get_chars().size());
148
3.16k
        dst_offsets.resize(input_rows_count);
149
150
        // Build SSE4.1 searcher once — first+second byte masks precomputed here.
151
3.16k
        ASCIICaseSensitiveStringSearcher searcher(old_ref.data, old_ref.size);
152
3.16k
        const size_t needle_size = old_ref.size;
153
3.16k
        const size_t replacement_size = new_ref.size;
154
3.16k
        const char* replacement_data = new_ref.data;
155
3.16k
        const auto needle_first = static_cast<unsigned char>(old_ref.data[0]);
156
157
12.7k
        for (size_t i = 0; i < input_rows_count; ++i) {
158
9.62k
            StringRef row = src.get_data_at(src_const ? 0 : i);
159
9.62k
            const char* const row_end = row.data + row.size;
160
161
            // Level-1: memchr for needle's first byte (glibc uses AVX512 internally).
162
            // If the first byte is absent the entire row cannot contain the needle;
163
            // bulk-copy it and move to the next row without entering the SSE4.1 loop.
164
9.62k
            if (memchr(row.data, needle_first, row.size) == nullptr) {
165
9.26k
                StringOP::push_value_string({row.data, row.size}, i, dst_chars, dst_offsets);
166
9.26k
                continue;
167
9.26k
            }
168
169
            // Level-2: SSE4.1 searcher handles needle matching for this row.
170
357
            const char* pos = row.data;
171
1.06k
            while (pos < row_end) {
172
1.03k
                const char* match = searcher.search(pos, row_end);
173
                // Copy prefix before match
174
1.03k
                size_t prefix_len = static_cast<size_t>(match - pos);
175
1.03k
                if (prefix_len > 0) {
176
1.00k
                    size_t old_size = dst_chars.size();
177
1.00k
                    ColumnString::check_chars_length(old_size + prefix_len, i + 1);
178
1.00k
                    dst_chars.resize(old_size + prefix_len);
179
1.00k
                    memcpy(&dst_chars[old_size], pos, prefix_len);
180
1.00k
                }
181
1.03k
                if (match == row_end) {
182
328
                    break;
183
328
                }
184
                // Copy replacement
185
710
                if (replacement_size > 0) {
186
702
                    size_t old_size = dst_chars.size();
187
702
                    ColumnString::check_chars_length(old_size + replacement_size, i + 1);
188
702
                    dst_chars.resize(old_size + replacement_size);
189
702
                    memcpy(&dst_chars[old_size], replacement_data, replacement_size);
190
702
                }
191
710
                pos = match + needle_size;
192
710
            }
193
357
            StringOP::push_empty_string(i, dst_chars, dst_offsets);
194
357
        }
195
3.16k
    }
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE22_replace_const_patternERKNS_9ColumnStrIjEENS_9StringRefES7_RS4_mb
Line
Count
Source
142
100
                                       size_t input_rows_count, bool src_const) {
143
100
        auto& dst_chars = dst.get_chars();
144
100
        auto& dst_offsets = dst.get_offsets();
145
146
100
        dst_chars.reserve(src_const ? (src.get_data_at(0).size * input_rows_count)
147
100
                                    : src.get_chars().size());
148
100
        dst_offsets.resize(input_rows_count);
149
150
        // Build SSE4.1 searcher once — first+second byte masks precomputed here.
151
100
        ASCIICaseSensitiveStringSearcher searcher(old_ref.data, old_ref.size);
152
100
        const size_t needle_size = old_ref.size;
153
100
        const size_t replacement_size = new_ref.size;
154
100
        const char* replacement_data = new_ref.data;
155
100
        const auto needle_first = static_cast<unsigned char>(old_ref.data[0]);
156
157
200
        for (size_t i = 0; i < input_rows_count; ++i) {
158
100
            StringRef row = src.get_data_at(src_const ? 0 : i);
159
100
            const char* const row_end = row.data + row.size;
160
161
            // Level-1: memchr for needle's first byte (glibc uses AVX512 internally).
162
            // If the first byte is absent the entire row cannot contain the needle;
163
            // bulk-copy it and move to the next row without entering the SSE4.1 loop.
164
100
            if (memchr(row.data, needle_first, row.size) == nullptr) {
165
60
                StringOP::push_value_string({row.data, row.size}, i, dst_chars, dst_offsets);
166
60
                continue;
167
60
            }
168
169
            // Level-2: SSE4.1 searcher handles needle matching for this row.
170
40
            const char* pos = row.data;
171
80
            while (pos < row_end) {
172
55
                const char* match = searcher.search(pos, row_end);
173
                // Copy prefix before match
174
55
                size_t prefix_len = static_cast<size_t>(match - pos);
175
55
                if (prefix_len > 0) {
176
30
                    size_t old_size = dst_chars.size();
177
30
                    ColumnString::check_chars_length(old_size + prefix_len, i + 1);
178
30
                    dst_chars.resize(old_size + prefix_len);
179
30
                    memcpy(&dst_chars[old_size], pos, prefix_len);
180
30
                }
181
55
                if (match == row_end) {
182
15
                    break;
183
15
                }
184
                // Copy replacement
185
40
                if (replacement_size > 0) {
186
32
                    size_t old_size = dst_chars.size();
187
32
                    ColumnString::check_chars_length(old_size + replacement_size, i + 1);
188
32
                    dst_chars.resize(old_size + replacement_size);
189
32
                    memcpy(&dst_chars[old_size], replacement_data, replacement_size);
190
32
                }
191
40
                pos = match + needle_size;
192
40
            }
193
40
            StringOP::push_empty_string(i, dst_chars, dst_offsets);
194
40
        }
195
100
    }
196
197
2.05k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
198
2.05k
        if (old_str.empty()) {
199
503
            if constexpr (empty) {
200
252
                return str;
201
252
            } else {
202
                // Different from "Replace" only when the search string is empty.
203
                // it will insert `new_str` in front of every character and at the end of the old str.
204
251
                if (new_str.empty()) {
205
59
                    return str;
206
59
                }
207
192
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
208
190
                    std::string result;
209
190
                    ColumnString::check_chars_length(
210
190
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
211
190
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
212
651
                    for (char c : str) {
213
651
                        result += new_str;
214
651
                        result += c;
215
651
                    }
216
190
                    result += new_str;
217
190
                    return result;
218
190
                } else {
219
2
                    std::string result;
220
2
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
221
11
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
222
9
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
223
9
                        result += new_str;
224
9
                        result.append(&str[i], utf8_char_len);
225
9
                    }
226
2
                    result += new_str;
227
2
                    ColumnString::check_chars_length(result.size(), 0);
228
2
                    return result;
229
2
                }
230
192
            }
231
1.55k
        } else {
232
1.55k
            std::string::size_type pos = 0;
233
1.55k
            std::string::size_type oldLen = old_str.size();
234
1.55k
            std::string::size_type newLen = new_str.size();
235
2.20k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
236
649
                str.replace(pos, oldLen, new_str);
237
649
                pos += newLen;
238
649
            }
239
1.55k
            return str;
240
1.55k
        }
241
2.05k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
197
1.06k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
198
1.06k
        if (old_str.empty()) {
199
252
            if constexpr (empty) {
200
252
                return str;
201
            } else {
202
                // Different from "Replace" only when the search string is empty.
203
                // it will insert `new_str` in front of every character and at the end of the old str.
204
                if (new_str.empty()) {
205
                    return str;
206
                }
207
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
208
                    std::string result;
209
                    ColumnString::check_chars_length(
210
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
211
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
212
                    for (char c : str) {
213
                        result += new_str;
214
                        result += c;
215
                    }
216
                    result += new_str;
217
                    return result;
218
                } else {
219
                    std::string result;
220
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
221
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
222
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
223
                        result += new_str;
224
                        result.append(&str[i], utf8_char_len);
225
                    }
226
                    result += new_str;
227
                    ColumnString::check_chars_length(result.size(), 0);
228
                    return result;
229
                }
230
            }
231
810
        } else {
232
810
            std::string::size_type pos = 0;
233
810
            std::string::size_type oldLen = old_str.size();
234
810
            std::string::size_type newLen = new_str.size();
235
1.16k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
236
357
                str.replace(pos, oldLen, new_str);
237
357
                pos += newLen;
238
357
            }
239
810
            return str;
240
810
        }
241
1.06k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
197
997
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
198
997
        if (old_str.empty()) {
199
            if constexpr (empty) {
200
                return str;
201
251
            } else {
202
                // Different from "Replace" only when the search string is empty.
203
                // it will insert `new_str` in front of every character and at the end of the old str.
204
251
                if (new_str.empty()) {
205
59
                    return str;
206
59
                }
207
192
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
208
190
                    std::string result;
209
190
                    ColumnString::check_chars_length(
210
190
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
211
190
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
212
651
                    for (char c : str) {
213
651
                        result += new_str;
214
651
                        result += c;
215
651
                    }
216
190
                    result += new_str;
217
190
                    return result;
218
190
                } else {
219
2
                    std::string result;
220
2
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
221
11
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
222
9
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
223
9
                        result += new_str;
224
9
                        result.append(&str[i], utf8_char_len);
225
9
                    }
226
2
                    result += new_str;
227
2
                    ColumnString::check_chars_length(result.size(), 0);
228
2
                    return result;
229
2
                }
230
192
            }
231
746
        } else {
232
746
            std::string::size_type pos = 0;
233
746
            std::string::size_type oldLen = old_str.size();
234
746
            std::string::size_type newLen = new_str.size();
235
1.03k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
236
292
                str.replace(pos, oldLen, new_str);
237
292
                pos += newLen;
238
292
            }
239
746
            return str;
240
746
        }
241
997
    }
242
};
243
244
struct ReverseImpl {
245
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
246
72
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
247
72
        auto rows_count = offsets.size();
248
72
        res_offsets.resize(rows_count);
249
72
        res_data.reserve(data.size());
250
215
        for (ssize_t i = 0; i < rows_count; ++i) {
251
143
            auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
252
143
            int64_t src_len = offsets[i] - offsets[i - 1];
253
143
            std::string dst;
254
143
            dst.resize(src_len);
255
143
            simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst);
256
143
            StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data,
257
143
                                        res_offsets);
258
143
        }
259
72
        return Status::OK();
260
72
    }
261
};
262
263
template <typename Impl>
264
class FunctionSubReplace : public IFunction {
265
public:
266
    static constexpr auto name = "sub_replace";
267
268
89
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv
Line
Count
Source
268
39
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv
Line
Count
Source
268
50
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
269
270
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev
271
272
71
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
273
71
        return make_nullable(std::make_shared<DataTypeString>());
274
71
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
272
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
273
30
        return make_nullable(std::make_shared<DataTypeString>());
274
30
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
272
41
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
273
41
        return make_nullable(std::make_shared<DataTypeString>());
274
41
    }
275
276
73
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv
Line
Count
Source
276
31
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv
Line
Count
Source
276
42
    bool is_variadic() const override { return true; }
277
278
16
    DataTypes get_variadic_argument_types_impl() const override {
279
16
        return Impl::get_variadic_argument_types();
280
16
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
278
8
    DataTypes get_variadic_argument_types_impl() const override {
279
8
        return Impl::get_variadic_argument_types();
280
8
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv
Line
Count
Source
278
8
    DataTypes get_variadic_argument_types_impl() const override {
279
8
        return Impl::get_variadic_argument_types();
280
8
    }
281
282
0
    size_t get_number_of_arguments() const override {
283
0
        return get_variadic_argument_types_impl().size();
284
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv
285
286
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
287
93
                        uint32_t result, size_t input_rows_count) const override {
288
93
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
289
93
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
287
41
                        uint32_t result, size_t input_rows_count) const override {
288
41
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
289
41
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
287
52
                        uint32_t result, size_t input_rows_count) const override {
288
52
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
289
52
    }
290
};
291
292
struct SubReplaceImpl {
293
    static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
294
94
                                  size_t input_rows_count) {
295
94
        auto res_column = ColumnString::create();
296
94
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
297
94
        auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
298
94
        ColumnPtr argument_columns[4];
299
94
        bool col_const[4];
300
470
        for (int i = 0; i < 4; ++i) {
301
376
            std::tie(argument_columns[i], col_const[i]) =
302
376
                    unpack_if_const(block.get_by_position(arguments[i]).column);
303
376
        }
304
94
        const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
305
94
        const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
306
94
        const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get());
307
94
        const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get());
308
309
94
        std::visit(
310
94
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
311
94
                    if (data_column->is_ascii()) {
312
70
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
313
70
                                data_column, mask_column, start_column->get_data(),
314
70
                                length_column->get_data(), args_null_map->get_data(), result_column,
315
70
                                input_rows_count);
316
70
                    } else {
317
24
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
318
24
                                data_column, mask_column, start_column->get_data(),
319
24
                                length_column->get_data(), args_null_map->get_data(), result_column,
320
24
                                input_rows_count);
321
24
                    }
322
94
                },
_ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
310
94
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
311
94
                    if (data_column->is_ascii()) {
312
70
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
313
70
                                data_column, mask_column, start_column->get_data(),
314
70
                                length_column->get_data(), args_null_map->get_data(), result_column,
315
70
                                input_rows_count);
316
70
                    } else {
317
24
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
318
24
                                data_column, mask_column, start_column->get_data(),
319
24
                                length_column->get_data(), args_null_map->get_data(), result_column,
320
24
                                input_rows_count);
321
24
                    }
322
94
                },
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_
323
94
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
324
94
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
325
94
        block.get_by_position(result).column =
326
94
                ColumnNullable::create(std::move(res_column), std::move(args_null_map));
327
94
        return Status::OK();
328
94
    }
329
330
private:
331
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
332
    static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column,
333
                             const PaddedPODArray<Int32>& args_start,
334
                             const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
335
70
                             ColumnString* result_column, size_t input_rows_count) {
336
70
        ColumnString::Chars& res_chars = result_column->get_chars();
337
70
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
338
10.4k
        for (size_t row = 0; row < input_rows_count; ++row) {
339
10.3k
            StringRef origin_str =
340
10.3k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
341
10.3k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
342
10.3k
            const auto start = args_start[index_check_const<start_const>(row)];
343
10.3k
            const auto length = args_length[index_check_const<len_const>(row)];
344
10.3k
            const size_t origin_str_len = origin_str.size;
345
            //input is null, start < 0, len < 0, str_size <= start. return NULL
346
10.3k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
347
10.2k
                res_offsets.push_back(res_chars.size());
348
10.2k
                args_null_map[row] = 1;
349
10.2k
            } else {
350
92
                std::string_view replace_str = new_str.to_string_view();
351
92
                std::string result = origin_str.to_string();
352
92
                result.replace(start, length, replace_str);
353
92
                result_column->insert_data(result.data(), result.length());
354
92
            }
355
10.3k
        }
356
70
    }
_ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
335
70
                             ColumnString* result_column, size_t input_rows_count) {
336
70
        ColumnString::Chars& res_chars = result_column->get_chars();
337
70
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
338
10.4k
        for (size_t row = 0; row < input_rows_count; ++row) {
339
10.3k
            StringRef origin_str =
340
10.3k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
341
10.3k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
342
10.3k
            const auto start = args_start[index_check_const<start_const>(row)];
343
10.3k
            const auto length = args_length[index_check_const<len_const>(row)];
344
10.3k
            const size_t origin_str_len = origin_str.size;
345
            //input is null, start < 0, len < 0, str_size <= start. return NULL
346
10.3k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
347
10.2k
                res_offsets.push_back(res_chars.size());
348
10.2k
                args_null_map[row] = 1;
349
10.2k
            } else {
350
92
                std::string_view replace_str = new_str.to_string_view();
351
92
                std::string result = origin_str.to_string();
352
92
                result.replace(start, length, replace_str);
353
92
                result_column->insert_data(result.data(), result.length());
354
92
            }
355
10.3k
        }
356
70
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
357
358
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
359
    static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column,
360
                            const PaddedPODArray<Int32>& args_start,
361
                            const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
362
24
                            ColumnString* result_column, size_t input_rows_count) {
363
24
        ColumnString::Chars& res_chars = result_column->get_chars();
364
24
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
365
366
48
        for (size_t row = 0; row < input_rows_count; ++row) {
367
24
            StringRef origin_str =
368
24
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
369
24
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
370
24
            const auto start = args_start[index_check_const<start_const>(row)];
371
24
            const auto length = args_length[index_check_const<len_const>(row)];
372
            //input is null, start < 0, len < 0 return NULL
373
24
            if (args_null_map[row] || start < 0 || length < 0) {
374
2
                res_offsets.push_back(res_chars.size());
375
2
                args_null_map[row] = 1;
376
2
                continue;
377
2
            }
378
379
22
            const auto [start_byte_len, start_char_len] =
380
22
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
381
22
                                                                           origin_str.end(), start);
382
383
            // start >= orgin.size
384
22
            DCHECK(start_char_len <= start);
385
22
            if (start_byte_len == origin_str.size) {
386
8
                res_offsets.push_back(res_chars.size());
387
8
                args_null_map[row] = 1;
388
8
                continue;
389
8
            }
390
391
14
            auto [end_byte_len, end_char_len] =
392
14
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
393
14
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
394
14
            DCHECK(end_char_len <= length);
395
14
            std::string_view replace_str = new_str.to_string_view();
396
14
            std::string result = origin_str.to_string();
397
14
            result.replace(start_byte_len, end_byte_len, replace_str);
398
14
            result_column->insert_data(result.data(), result.length());
399
14
        }
400
24
    }
_ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
362
24
                            ColumnString* result_column, size_t input_rows_count) {
363
24
        ColumnString::Chars& res_chars = result_column->get_chars();
364
24
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
365
366
48
        for (size_t row = 0; row < input_rows_count; ++row) {
367
24
            StringRef origin_str =
368
24
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
369
24
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
370
24
            const auto start = args_start[index_check_const<start_const>(row)];
371
24
            const auto length = args_length[index_check_const<len_const>(row)];
372
            //input is null, start < 0, len < 0 return NULL
373
24
            if (args_null_map[row] || start < 0 || length < 0) {
374
2
                res_offsets.push_back(res_chars.size());
375
2
                args_null_map[row] = 1;
376
2
                continue;
377
2
            }
378
379
22
            const auto [start_byte_len, start_char_len] =
380
22
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
381
22
                                                                           origin_str.end(), start);
382
383
            // start >= orgin.size
384
22
            DCHECK(start_char_len <= start);
385
22
            if (start_byte_len == origin_str.size) {
386
8
                res_offsets.push_back(res_chars.size());
387
8
                args_null_map[row] = 1;
388
8
                continue;
389
8
            }
390
391
14
            auto [end_byte_len, end_char_len] =
392
14
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
393
14
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
394
            DCHECK(end_char_len <= length);
395
14
            std::string_view replace_str = new_str.to_string_view();
396
14
            std::string result = origin_str.to_string();
397
14
            result.replace(start_byte_len, end_byte_len, replace_str);
398
14
            result_column->insert_data(result.data(), result.length());
399
14
        }
400
24
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
401
};
402
403
struct SubReplaceThreeImpl {
404
8
    static DataTypes get_variadic_argument_types() {
405
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
406
8
                std::make_shared<DataTypeInt32>()};
407
8
    }
408
409
    static Status execute_impl(FunctionContext* context, Block& block,
410
                               const ColumnNumbers& arguments, uint32_t result,
411
41
                               size_t input_rows_count) {
412
41
        auto params = ColumnInt32::create(input_rows_count);
413
41
        auto& strlen_data = params->get_data();
414
415
41
        auto str_col =
416
41
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
417
41
        if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
418
0
            str_col = nullable->get_nested_column_ptr();
419
0
        }
420
41
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
421
        // use utf8 len
422
116
        for (int i = 0; i < input_rows_count; ++i) {
423
75
            StringRef str_ref = str_column->get_data_at(i);
424
75
            strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size);
425
75
        }
426
427
41
        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
428
41
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
429
41
                                        block.columns() - 1};
430
41
        return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
431
41
    }
432
};
433
434
struct SubReplaceFourImpl {
435
8
    static DataTypes get_variadic_argument_types() {
436
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
437
8
                std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
438
8
    }
439
440
    static Status execute_impl(FunctionContext* context, Block& block,
441
                               const ColumnNumbers& arguments, uint32_t result,
442
52
                               size_t input_rows_count) {
443
52
        return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
444
52
    }
445
};
446
447
class FunctionOverlay : public IFunction {
448
public:
449
    static constexpr auto name = "overlay";
450
179
    static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); }
451
1
    String get_name() const override { return name; }
452
170
    size_t get_number_of_arguments() const override { return 4; }
453
454
170
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
455
170
        return std::make_shared<DataTypeString>();
456
170
    }
457
458
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
459
154
                        uint32_t result, size_t input_rows_count) const override {
460
154
        DCHECK_EQ(arguments.size(), 4);
461
462
154
        bool col_const[4];
463
154
        ColumnPtr argument_columns[4];
464
770
        for (int i = 0; i < 4; ++i) {
465
616
            std::tie(argument_columns[i], col_const[i]) =
466
616
                    unpack_if_const(block.get_by_position(arguments[i]).column);
467
616
        }
468
469
154
        const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get());
470
471
154
        const auto* col_pos =
472
154
                assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data();
473
154
        const auto* col_len =
474
154
                assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data();
475
154
        const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get());
476
477
154
        ColumnString::MutablePtr col_res = ColumnString::create();
478
479
        // if all input string is ascii, we can use ascii function to handle it
480
154
        const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii();
481
154
        std::visit(
482
154
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
483
154
                    if (is_all_ascii) {
484
79
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
485
79
                                col_origin, col_pos, col_len, col_insert, col_res,
486
79
                                input_rows_count);
487
79
                    } else {
488
75
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
489
75
                                col_origin, col_pos, col_len, col_insert, col_res,
490
75
                                input_rows_count);
491
75
                    }
492
154
                },
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
482
139
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
483
139
                    if (is_all_ascii) {
484
72
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
485
72
                                col_origin, col_pos, col_len, col_insert, col_res,
486
72
                                input_rows_count);
487
72
                    } else {
488
67
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
489
67
                                col_origin, col_pos, col_len, col_insert, col_res,
490
67
                                input_rows_count);
491
67
                    }
492
139
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_
Line
Count
Source
482
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
483
1
                    if (is_all_ascii) {
484
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
485
0
                                col_origin, col_pos, col_len, col_insert, col_res,
486
0
                                input_rows_count);
487
1
                    } else {
488
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
489
1
                                col_origin, col_pos, col_len, col_insert, col_res,
490
1
                                input_rows_count);
491
1
                    }
492
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
482
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
483
1
                    if (is_all_ascii) {
484
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
485
0
                                col_origin, col_pos, col_len, col_insert, col_res,
486
0
                                input_rows_count);
487
1
                    } else {
488
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
489
1
                                col_origin, col_pos, col_len, col_insert, col_res,
490
1
                                input_rows_count);
491
1
                    }
492
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
482
12
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
483
12
                    if (is_all_ascii) {
484
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
485
6
                                col_origin, col_pos, col_len, col_insert, col_res,
486
6
                                input_rows_count);
487
6
                    } else {
488
6
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
489
6
                                col_origin, col_pos, col_len, col_insert, col_res,
490
6
                                input_rows_count);
491
6
                    }
492
12
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
482
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
483
1
                    if (is_all_ascii) {
484
1
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
485
1
                                col_origin, col_pos, col_len, col_insert, col_res,
486
1
                                input_rows_count);
487
1
                    } else {
488
0
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
489
0
                                col_origin, col_pos, col_len, col_insert, col_res,
490
0
                                input_rows_count);
491
0
                    }
492
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_
493
154
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
494
154
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
495
154
        block.replace_by_position(result, std::move(col_res));
496
154
        return Status::OK();
497
154
    }
498
499
private:
500
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
501
    static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len,
502
                             const ColumnString* col_insert, ColumnString::MutablePtr& col_res,
503
79
                             size_t input_rows_count) {
504
79
        auto& col_res_chars = col_res->get_chars();
505
79
        auto& col_res_offsets = col_res->get_offsets();
506
79
        StringRef origin_str, insert_str;
507
187
        for (size_t i = 0; i < input_rows_count; i++) {
508
108
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
509
            // pos is 1-based index,so we need to minus 1
510
108
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
511
108
            const auto len = col_len[index_check_const<len_const>(i)];
512
108
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
513
108
            const auto origin_size = origin_str.size;
514
108
            if (pos >= origin_size || pos < 0) {
515
                // If pos is not within the length of the string, the original string is returned.
516
26
                col_res->insert_data(origin_str.data, origin_str.size);
517
26
                continue;
518
26
            }
519
82
            col_res_chars.insert(origin_str.data,
520
82
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
521
82
            if (pos + len > origin_size || len < 0) {
522
15
                col_res_chars.insert(insert_str.begin(),
523
15
                                     insert_str.end()); // copy all of insert_str.
524
67
            } else {
525
67
                col_res_chars.insert(insert_str.begin(),
526
67
                                     insert_str.end()); // copy all of insert_str.
527
67
                col_res_chars.insert(
528
67
                        origin_str.data + pos + len,
529
67
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
530
67
            }
531
82
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
532
82
            col_res_offsets.push_back(col_res_chars.size());
533
82
        }
534
79
    }
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
503
72
                             size_t input_rows_count) {
504
72
        auto& col_res_chars = col_res->get_chars();
505
72
        auto& col_res_offsets = col_res->get_offsets();
506
72
        StringRef origin_str, insert_str;
507
144
        for (size_t i = 0; i < input_rows_count; i++) {
508
72
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
509
            // pos is 1-based index,so we need to minus 1
510
72
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
511
72
            const auto len = col_len[index_check_const<len_const>(i)];
512
72
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
513
72
            const auto origin_size = origin_str.size;
514
72
            if (pos >= origin_size || pos < 0) {
515
                // If pos is not within the length of the string, the original string is returned.
516
18
                col_res->insert_data(origin_str.data, origin_str.size);
517
18
                continue;
518
18
            }
519
54
            col_res_chars.insert(origin_str.data,
520
54
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
521
54
            if (pos + len > origin_size || len < 0) {
522
11
                col_res_chars.insert(insert_str.begin(),
523
11
                                     insert_str.end()); // copy all of insert_str.
524
43
            } else {
525
43
                col_res_chars.insert(insert_str.begin(),
526
43
                                     insert_str.end()); // copy all of insert_str.
527
43
                col_res_chars.insert(
528
43
                        origin_str.data + pos + len,
529
43
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
530
43
            }
531
54
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
532
54
            col_res_offsets.push_back(col_res_chars.size());
533
54
        }
534
72
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
503
6
                             size_t input_rows_count) {
504
6
        auto& col_res_chars = col_res->get_chars();
505
6
        auto& col_res_offsets = col_res->get_offsets();
506
6
        StringRef origin_str, insert_str;
507
12
        for (size_t i = 0; i < input_rows_count; i++) {
508
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
509
            // pos is 1-based index,so we need to minus 1
510
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
511
6
            const auto len = col_len[index_check_const<len_const>(i)];
512
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
513
6
            const auto origin_size = origin_str.size;
514
6
            if (pos >= origin_size || pos < 0) {
515
                // If pos is not within the length of the string, the original string is returned.
516
3
                col_res->insert_data(origin_str.data, origin_str.size);
517
3
                continue;
518
3
            }
519
3
            col_res_chars.insert(origin_str.data,
520
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
521
3
            if (pos + len > origin_size || len < 0) {
522
1
                col_res_chars.insert(insert_str.begin(),
523
1
                                     insert_str.end()); // copy all of insert_str.
524
2
            } else {
525
2
                col_res_chars.insert(insert_str.begin(),
526
2
                                     insert_str.end()); // copy all of insert_str.
527
2
                col_res_chars.insert(
528
2
                        origin_str.data + pos + len,
529
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
530
2
            }
531
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
532
3
            col_res_offsets.push_back(col_res_chars.size());
533
3
        }
534
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
503
1
                             size_t input_rows_count) {
504
1
        auto& col_res_chars = col_res->get_chars();
505
1
        auto& col_res_offsets = col_res->get_offsets();
506
1
        StringRef origin_str, insert_str;
507
31
        for (size_t i = 0; i < input_rows_count; i++) {
508
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
509
            // pos is 1-based index,so we need to minus 1
510
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
511
30
            const auto len = col_len[index_check_const<len_const>(i)];
512
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
513
30
            const auto origin_size = origin_str.size;
514
30
            if (pos >= origin_size || pos < 0) {
515
                // If pos is not within the length of the string, the original string is returned.
516
5
                col_res->insert_data(origin_str.data, origin_str.size);
517
5
                continue;
518
5
            }
519
25
            col_res_chars.insert(origin_str.data,
520
25
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
521
25
            if (pos + len > origin_size || len < 0) {
522
3
                col_res_chars.insert(insert_str.begin(),
523
3
                                     insert_str.end()); // copy all of insert_str.
524
22
            } else {
525
22
                col_res_chars.insert(insert_str.begin(),
526
22
                                     insert_str.end()); // copy all of insert_str.
527
22
                col_res_chars.insert(
528
22
                        origin_str.data + pos + len,
529
22
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
530
22
            }
531
25
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
532
25
            col_res_offsets.push_back(col_res_chars.size());
533
25
        }
534
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
535
536
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
537
    NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin,
538
                                                  int const* col_pos, int const* col_len,
539
                                                  const ColumnString* col_insert,
540
                                                  ColumnString::MutablePtr& col_res,
541
75
                                                  size_t input_rows_count) {
542
75
        auto& col_res_chars = col_res->get_chars();
543
75
        auto& col_res_offsets = col_res->get_offsets();
544
75
        StringRef origin_str, insert_str;
545
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
546
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
547
75
        std::vector<size_t> utf8_origin_offsets;
548
252
        for (size_t i = 0; i < input_rows_count; i++) {
549
177
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
550
            // pos is 1-based index,so we need to minus 1
551
177
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
552
177
            const auto len = col_len[index_check_const<len_const>(i)];
553
177
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
554
177
            utf8_origin_offsets.clear();
555
556
1.10k
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
557
924
                utf8_origin_offsets.push_back(ni);
558
924
                char_size = get_utf8_byte_length(origin_str.data[ni]);
559
924
            }
560
561
177
            const size_t utf8_origin_size = utf8_origin_offsets.size();
562
563
177
            if (pos >= utf8_origin_size || pos < 0) {
564
                // If pos is not within the length of the string, the original string is returned.
565
38
                col_res->insert_data(origin_str.data, origin_str.size);
566
38
                continue;
567
38
            }
568
139
            col_res_chars.insert(
569
139
                    origin_str.data,
570
139
                    origin_str.data +
571
139
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
572
139
            if (pos + len >= utf8_origin_size || len < 0) {
573
35
                col_res_chars.insert(insert_str.begin(),
574
35
                                     insert_str.end()); // copy all of insert_str.
575
104
            } else {
576
104
                col_res_chars.insert(insert_str.begin(),
577
104
                                     insert_str.end()); // copy all of insert_str.
578
104
                col_res_chars.insert(
579
104
                        origin_str.data + utf8_origin_offsets[pos + len],
580
104
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
581
104
            }
582
139
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
583
139
            col_res_offsets.push_back(col_res_chars.size());
584
139
        }
585
75
    }
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
541
67
                                                  size_t input_rows_count) {
542
67
        auto& col_res_chars = col_res->get_chars();
543
67
        auto& col_res_offsets = col_res->get_offsets();
544
67
        StringRef origin_str, insert_str;
545
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
546
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
547
67
        std::vector<size_t> utf8_origin_offsets;
548
178
        for (size_t i = 0; i < input_rows_count; i++) {
549
111
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
550
            // pos is 1-based index,so we need to minus 1
551
111
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
552
111
            const auto len = col_len[index_check_const<len_const>(i)];
553
111
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
554
111
            utf8_origin_offsets.clear();
555
556
639
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
557
528
                utf8_origin_offsets.push_back(ni);
558
528
                char_size = get_utf8_byte_length(origin_str.data[ni]);
559
528
            }
560
561
111
            const size_t utf8_origin_size = utf8_origin_offsets.size();
562
563
111
            if (pos >= utf8_origin_size || pos < 0) {
564
                // If pos is not within the length of the string, the original string is returned.
565
22
                col_res->insert_data(origin_str.data, origin_str.size);
566
22
                continue;
567
22
            }
568
89
            col_res_chars.insert(
569
89
                    origin_str.data,
570
89
                    origin_str.data +
571
89
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
572
89
            if (pos + len >= utf8_origin_size || len < 0) {
573
23
                col_res_chars.insert(insert_str.begin(),
574
23
                                     insert_str.end()); // copy all of insert_str.
575
66
            } else {
576
66
                col_res_chars.insert(insert_str.begin(),
577
66
                                     insert_str.end()); // copy all of insert_str.
578
66
                col_res_chars.insert(
579
66
                        origin_str.data + utf8_origin_offsets[pos + len],
580
66
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
581
66
            }
582
89
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
583
89
            col_res_offsets.push_back(col_res_chars.size());
584
89
        }
585
67
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
541
1
                                                  size_t input_rows_count) {
542
1
        auto& col_res_chars = col_res->get_chars();
543
1
        auto& col_res_offsets = col_res->get_offsets();
544
1
        StringRef origin_str, insert_str;
545
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
546
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
547
1
        std::vector<size_t> utf8_origin_offsets;
548
31
        for (size_t i = 0; i < input_rows_count; i++) {
549
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
550
            // pos is 1-based index,so we need to minus 1
551
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
552
30
            const auto len = col_len[index_check_const<len_const>(i)];
553
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
554
30
            utf8_origin_offsets.clear();
555
556
209
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
557
179
                utf8_origin_offsets.push_back(ni);
558
179
                char_size = get_utf8_byte_length(origin_str.data[ni]);
559
179
            }
560
561
30
            const size_t utf8_origin_size = utf8_origin_offsets.size();
562
563
30
            if (pos >= utf8_origin_size || pos < 0) {
564
                // If pos is not within the length of the string, the original string is returned.
565
9
                col_res->insert_data(origin_str.data, origin_str.size);
566
9
                continue;
567
9
            }
568
21
            col_res_chars.insert(
569
21
                    origin_str.data,
570
21
                    origin_str.data +
571
21
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
572
21
            if (pos + len >= utf8_origin_size || len < 0) {
573
3
                col_res_chars.insert(insert_str.begin(),
574
3
                                     insert_str.end()); // copy all of insert_str.
575
18
            } else {
576
18
                col_res_chars.insert(insert_str.begin(),
577
18
                                     insert_str.end()); // copy all of insert_str.
578
18
                col_res_chars.insert(
579
18
                        origin_str.data + utf8_origin_offsets[pos + len],
580
18
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
581
18
            }
582
21
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
583
21
            col_res_offsets.push_back(col_res_chars.size());
584
21
        }
585
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
541
1
                                                  size_t input_rows_count) {
542
1
        auto& col_res_chars = col_res->get_chars();
543
1
        auto& col_res_offsets = col_res->get_offsets();
544
1
        StringRef origin_str, insert_str;
545
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
546
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
547
1
        std::vector<size_t> utf8_origin_offsets;
548
31
        for (size_t i = 0; i < input_rows_count; i++) {
549
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
550
            // pos is 1-based index,so we need to minus 1
551
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
552
30
            const auto len = col_len[index_check_const<len_const>(i)];
553
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
554
30
            utf8_origin_offsets.clear();
555
556
209
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
557
179
                utf8_origin_offsets.push_back(ni);
558
179
                char_size = get_utf8_byte_length(origin_str.data[ni]);
559
179
            }
560
561
30
            const size_t utf8_origin_size = utf8_origin_offsets.size();
562
563
30
            if (pos >= utf8_origin_size || pos < 0) {
564
                // If pos is not within the length of the string, the original string is returned.
565
4
                col_res->insert_data(origin_str.data, origin_str.size);
566
4
                continue;
567
4
            }
568
26
            col_res_chars.insert(
569
26
                    origin_str.data,
570
26
                    origin_str.data +
571
26
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
572
26
            if (pos + len >= utf8_origin_size || len < 0) {
573
8
                col_res_chars.insert(insert_str.begin(),
574
8
                                     insert_str.end()); // copy all of insert_str.
575
18
            } else {
576
18
                col_res_chars.insert(insert_str.begin(),
577
18
                                     insert_str.end()); // copy all of insert_str.
578
18
                col_res_chars.insert(
579
18
                        origin_str.data + utf8_origin_offsets[pos + len],
580
18
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
581
18
            }
582
26
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
583
26
            col_res_offsets.push_back(col_res_chars.size());
584
26
        }
585
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
541
6
                                                  size_t input_rows_count) {
542
6
        auto& col_res_chars = col_res->get_chars();
543
6
        auto& col_res_offsets = col_res->get_offsets();
544
6
        StringRef origin_str, insert_str;
545
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
546
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
547
6
        std::vector<size_t> utf8_origin_offsets;
548
12
        for (size_t i = 0; i < input_rows_count; i++) {
549
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
550
            // pos is 1-based index,so we need to minus 1
551
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
552
6
            const auto len = col_len[index_check_const<len_const>(i)];
553
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
554
6
            utf8_origin_offsets.clear();
555
556
44
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
557
38
                utf8_origin_offsets.push_back(ni);
558
38
                char_size = get_utf8_byte_length(origin_str.data[ni]);
559
38
            }
560
561
6
            const size_t utf8_origin_size = utf8_origin_offsets.size();
562
563
6
            if (pos >= utf8_origin_size || pos < 0) {
564
                // If pos is not within the length of the string, the original string is returned.
565
3
                col_res->insert_data(origin_str.data, origin_str.size);
566
3
                continue;
567
3
            }
568
3
            col_res_chars.insert(
569
3
                    origin_str.data,
570
3
                    origin_str.data +
571
3
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
572
3
            if (pos + len >= utf8_origin_size || len < 0) {
573
1
                col_res_chars.insert(insert_str.begin(),
574
1
                                     insert_str.end()); // copy all of insert_str.
575
2
            } else {
576
2
                col_res_chars.insert(insert_str.begin(),
577
2
                                     insert_str.end()); // copy all of insert_str.
578
2
                col_res_chars.insert(
579
2
                        origin_str.data + utf8_origin_offsets[pos + len],
580
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
581
2
            }
582
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
583
3
            col_res_offsets.push_back(col_res_chars.size());
584
3
        }
585
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
586
};
587
588
#include "common/compile_check_avoid_end.h"
589
} // namespace doris