Coverage Report

Created: 2026-04-10 12:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_replace.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <cstring>
22
#include <string>
23
#include <string_view>
24
25
#include "common/compiler_util.h"
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_numbers.h"
30
#include "core/column/column_const.h"
31
#include "core/column/column_nullable.h"
32
#include "core/column/column_string.h"
33
#include "core/column/column_vector.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_number.h"
36
#include "core/data_type/data_type_string.h"
37
#include "core/string_ref.h"
38
#include "exec/common/stringop_substring.h"
39
#include "exec/common/template_helpers.hpp"
40
#include "exprs/function/function.h"
41
#include "exprs/function/function_helpers.h"
42
#include "exprs/function_context.h"
43
#include "util/simd/vstring_function.h"
44
45
namespace doris {
46
#include "common/compile_check_avoid_begin.h"
47
48
struct ReplaceImpl {
49
    static constexpr auto name = "replace";
50
};
51
52
struct ReplaceEmptyImpl {
53
    static constexpr auto name = "replace_empty";
54
};
55
56
template <typename Impl, bool empty>
57
class FunctionReplace : public IFunction {
58
public:
59
    static constexpr auto name = Impl::name;
60
6.07k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv
Line
Count
Source
60
4.54k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv
Line
Count
Source
60
1.52k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
61
2
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev
Line
Count
Source
61
1
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev
Line
Count
Source
61
1
    String get_name() const override { return name; }
62
6.05k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv
Line
Count
Source
62
4.53k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv
Line
Count
Source
62
1.52k
    size_t get_number_of_arguments() const override { return 3; }
63
64
6.05k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
6.05k
        return std::make_shared<DataTypeString>();
66
6.05k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
64
4.53k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
4.53k
        return std::make_shared<DataTypeString>();
66
4.53k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
64
1.52k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
65
1.52k
        return std::make_shared<DataTypeString>();
66
1.52k
    }
67
68
12
    DataTypes get_variadic_argument_types_impl() const override {
69
12
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
70
12
                std::make_shared<DataTypeString>()};
71
12
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv
Line
Count
Source
68
6
    DataTypes get_variadic_argument_types_impl() const override {
69
6
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
70
6
                std::make_shared<DataTypeString>()};
71
6
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv
Line
Count
Source
68
6
    DataTypes get_variadic_argument_types_impl() const override {
69
6
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
70
6
                std::make_shared<DataTypeString>()};
71
6
    }
72
73
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
74
4.69k
                        uint32_t result, size_t input_rows_count) const override {
75
        // We need a local variable to hold a reference to the converted column.
76
        // So that the converted column will not be released before we use it.
77
4.69k
        ColumnPtr col[3];
78
4.69k
        bool col_const[3];
79
18.7k
        for (size_t i = 0; i < 3; ++i) {
80
14.0k
            std::tie(col[i], col_const[i]) =
81
14.0k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
82
14.0k
        }
83
84
4.69k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
85
4.69k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
86
4.69k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
87
88
4.69k
        ColumnString::MutablePtr col_res = ColumnString::create();
89
90
4.69k
        std::visit(
91
4.69k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
16.2k
                    for (int i = 0; i < input_rows_count; ++i) {
93
11.5k
                        StringRef origin_str =
94
11.5k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
11.5k
                        StringRef old_str =
96
11.5k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
11.5k
                        StringRef new_str =
98
11.5k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
11.5k
                        std::string result =
101
11.5k
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
11.5k
                                        new_str.to_string_view());
103
104
11.5k
                        col_res->insert_data(result.data(), result.length());
105
11.5k
                    }
106
4.69k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
91
163
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
575
                    for (int i = 0; i < input_rows_count; ++i) {
93
412
                        StringRef origin_str =
94
412
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
412
                        StringRef old_str =
96
412
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
412
                        StringRef new_str =
98
412
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
412
                        std::string result =
101
412
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
412
                                        new_str.to_string_view());
103
104
412
                        col_res->insert_data(result.data(), result.length());
105
412
                    }
106
163
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
91
3.02k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
12.4k
                    for (int i = 0; i < input_rows_count; ++i) {
93
9.39k
                        StringRef origin_str =
94
9.39k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
9.39k
                        StringRef old_str =
96
9.39k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
9.39k
                        StringRef new_str =
98
9.39k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
9.39k
                        std::string result =
101
9.39k
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
9.39k
                                        new_str.to_string_view());
103
104
9.39k
                        col_res->insert_data(result.data(), result.length());
105
9.39k
                    }
106
3.02k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
91
132
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
479
                    for (int i = 0; i < input_rows_count; ++i) {
93
347
                        StringRef origin_str =
94
347
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
347
                        StringRef old_str =
96
347
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
347
                        StringRef new_str =
98
347
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
347
                        std::string result =
101
347
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
347
                                        new_str.to_string_view());
103
104
347
                        col_res->insert_data(result.data(), result.length());
105
347
                    }
106
132
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
91
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
250
                    for (int i = 0; i < input_rows_count; ++i) {
93
125
                        StringRef origin_str =
94
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
125
                        StringRef old_str =
96
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
125
                        StringRef new_str =
98
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
125
                        std::string result =
101
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
125
                                        new_str.to_string_view());
103
104
125
                        col_res->insert_data(result.data(), result.length());
105
125
                    }
106
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
107
4.69k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
108
4.69k
                make_bool_variant(col_const[2]));
109
110
4.69k
        block.replace_by_position(result, std::move(col_res));
111
4.69k
        return Status::OK();
112
4.69k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
74
3.81k
                        uint32_t result, size_t input_rows_count) const override {
75
        // We need a local variable to hold a reference to the converted column.
76
        // So that the converted column will not be released before we use it.
77
3.81k
        ColumnPtr col[3];
78
3.81k
        bool col_const[3];
79
15.2k
        for (size_t i = 0; i < 3; ++i) {
80
11.4k
            std::tie(col[i], col_const[i]) =
81
11.4k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
82
11.4k
        }
83
84
3.81k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
85
3.81k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
86
3.81k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
87
88
3.81k
        ColumnString::MutablePtr col_res = ColumnString::create();
89
90
3.81k
        std::visit(
91
3.81k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
3.81k
                    for (int i = 0; i < input_rows_count; ++i) {
93
3.81k
                        StringRef origin_str =
94
3.81k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
3.81k
                        StringRef old_str =
96
3.81k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
3.81k
                        StringRef new_str =
98
3.81k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
3.81k
                        std::string result =
101
3.81k
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
3.81k
                                        new_str.to_string_view());
103
104
3.81k
                        col_res->insert_data(result.data(), result.length());
105
3.81k
                    }
106
3.81k
                },
107
3.81k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
108
3.81k
                make_bool_variant(col_const[2]));
109
110
3.81k
        block.replace_by_position(result, std::move(col_res));
111
3.81k
        return Status::OK();
112
3.81k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
74
882
                        uint32_t result, size_t input_rows_count) const override {
75
        // We need a local variable to hold a reference to the converted column.
76
        // So that the converted column will not be released before we use it.
77
882
        ColumnPtr col[3];
78
882
        bool col_const[3];
79
3.52k
        for (size_t i = 0; i < 3; ++i) {
80
2.64k
            std::tie(col[i], col_const[i]) =
81
2.64k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
82
2.64k
        }
83
84
882
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
85
882
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
86
882
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
87
88
882
        ColumnString::MutablePtr col_res = ColumnString::create();
89
90
882
        std::visit(
91
882
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
92
882
                    for (int i = 0; i < input_rows_count; ++i) {
93
882
                        StringRef origin_str =
94
882
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
95
882
                        StringRef old_str =
96
882
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
97
882
                        StringRef new_str =
98
882
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
99
100
882
                        std::string result =
101
882
                                replace(origin_str.to_string(), old_str.to_string_view(),
102
882
                                        new_str.to_string_view());
103
104
882
                        col_res->insert_data(result.data(), result.length());
105
882
                    }
106
882
                },
107
882
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
108
882
                make_bool_variant(col_const[2]));
109
110
882
        block.replace_by_position(result, std::move(col_res));
111
882
        return Status::OK();
112
882
    }
113
114
private:
115
11.5k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
116
11.5k
        if (old_str.empty()) {
117
503
            if constexpr (empty) {
118
252
                return str;
119
252
            } else {
120
                // Different from "Replace" only when the search string is empty.
121
                // it will insert `new_str` in front of every character and at the end of the old str.
122
251
                if (new_str.empty()) {
123
59
                    return str;
124
59
                }
125
192
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
126
190
                    std::string result;
127
190
                    ColumnString::check_chars_length(
128
190
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
129
190
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
130
651
                    for (char c : str) {
131
651
                        result += new_str;
132
651
                        result += c;
133
651
                    }
134
190
                    result += new_str;
135
190
                    return result;
136
190
                } else {
137
2
                    std::string result;
138
2
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
139
11
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
140
9
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
141
9
                        result += new_str;
142
9
                        result.append(&str[i], utf8_char_len);
143
9
                    }
144
2
                    result += new_str;
145
2
                    ColumnString::check_chars_length(result.size(), 0);
146
2
                    return result;
147
2
                }
148
192
            }
149
11.0k
        } else {
150
11.0k
            std::string::size_type pos = 0;
151
11.0k
            std::string::size_type oldLen = old_str.size();
152
11.0k
            std::string::size_type newLen = new_str.size();
153
12.3k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
154
1.35k
                str.replace(pos, oldLen, new_str);
155
1.35k
                pos += newLen;
156
1.35k
            }
157
11.0k
            return str;
158
11.0k
        }
159
11.5k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
115
10.4k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
116
10.4k
        if (old_str.empty()) {
117
252
            if constexpr (empty) {
118
252
                return str;
119
            } else {
120
                // Different from "Replace" only when the search string is empty.
121
                // it will insert `new_str` in front of every character and at the end of the old str.
122
                if (new_str.empty()) {
123
                    return str;
124
                }
125
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
126
                    std::string result;
127
                    ColumnString::check_chars_length(
128
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
129
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
130
                    for (char c : str) {
131
                        result += new_str;
132
                        result += c;
133
                    }
134
                    result += new_str;
135
                    return result;
136
                } else {
137
                    std::string result;
138
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
139
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
140
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
141
                        result += new_str;
142
                        result.append(&str[i], utf8_char_len);
143
                    }
144
                    result += new_str;
145
                    ColumnString::check_chars_length(result.size(), 0);
146
                    return result;
147
                }
148
            }
149
10.1k
        } else {
150
10.1k
            std::string::size_type pos = 0;
151
10.1k
            std::string::size_type oldLen = old_str.size();
152
10.1k
            std::string::size_type newLen = new_str.size();
153
11.1k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
154
1.01k
                str.replace(pos, oldLen, new_str);
155
1.01k
                pos += newLen;
156
1.01k
            }
157
10.1k
            return str;
158
10.1k
        }
159
10.4k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
115
1.09k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
116
1.09k
        if (old_str.empty()) {
117
            if constexpr (empty) {
118
                return str;
119
251
            } else {
120
                // Different from "Replace" only when the search string is empty.
121
                // it will insert `new_str` in front of every character and at the end of the old str.
122
251
                if (new_str.empty()) {
123
59
                    return str;
124
59
                }
125
192
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
126
190
                    std::string result;
127
190
                    ColumnString::check_chars_length(
128
190
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
129
190
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
130
651
                    for (char c : str) {
131
651
                        result += new_str;
132
651
                        result += c;
133
651
                    }
134
190
                    result += new_str;
135
190
                    return result;
136
190
                } else {
137
2
                    std::string result;
138
2
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
139
11
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
140
9
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
141
9
                        result += new_str;
142
9
                        result.append(&str[i], utf8_char_len);
143
9
                    }
144
2
                    result += new_str;
145
2
                    ColumnString::check_chars_length(result.size(), 0);
146
2
                    return result;
147
2
                }
148
192
            }
149
846
        } else {
150
846
            std::string::size_type pos = 0;
151
846
            std::string::size_type oldLen = old_str.size();
152
846
            std::string::size_type newLen = new_str.size();
153
1.17k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
154
332
                str.replace(pos, oldLen, new_str);
155
332
                pos += newLen;
156
332
            }
157
846
            return str;
158
846
        }
159
1.09k
    }
160
};
161
162
struct ReverseImpl {
163
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
164
72
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
165
72
        auto rows_count = offsets.size();
166
72
        res_offsets.resize(rows_count);
167
72
        res_data.reserve(data.size());
168
215
        for (ssize_t i = 0; i < rows_count; ++i) {
169
143
            auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
170
143
            int64_t src_len = offsets[i] - offsets[i - 1];
171
143
            std::string dst;
172
143
            dst.resize(src_len);
173
143
            simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst);
174
143
            StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data,
175
143
                                        res_offsets);
176
143
        }
177
72
        return Status::OK();
178
72
    }
179
};
180
181
template <typename Impl>
182
class FunctionSubReplace : public IFunction {
183
public:
184
    static constexpr auto name = "sub_replace";
185
186
85
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv
Line
Count
Source
186
37
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv
Line
Count
Source
186
48
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
187
188
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev
189
190
71
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
191
71
        return make_nullable(std::make_shared<DataTypeString>());
192
71
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
190
30
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
191
30
        return make_nullable(std::make_shared<DataTypeString>());
192
30
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
190
41
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
191
41
        return make_nullable(std::make_shared<DataTypeString>());
192
41
    }
193
194
73
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv
Line
Count
Source
194
31
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv
Line
Count
Source
194
42
    bool is_variadic() const override { return true; }
195
196
12
    DataTypes get_variadic_argument_types_impl() const override {
197
12
        return Impl::get_variadic_argument_types();
198
12
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
196
6
    DataTypes get_variadic_argument_types_impl() const override {
197
6
        return Impl::get_variadic_argument_types();
198
6
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv
Line
Count
Source
196
6
    DataTypes get_variadic_argument_types_impl() const override {
197
6
        return Impl::get_variadic_argument_types();
198
6
    }
199
200
0
    size_t get_number_of_arguments() const override {
201
0
        return get_variadic_argument_types_impl().size();
202
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv
203
204
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
205
93
                        uint32_t result, size_t input_rows_count) const override {
206
93
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
207
93
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
205
41
                        uint32_t result, size_t input_rows_count) const override {
206
41
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
207
41
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
205
52
                        uint32_t result, size_t input_rows_count) const override {
206
52
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
207
52
    }
208
};
209
210
struct SubReplaceImpl {
211
    static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
212
94
                                  size_t input_rows_count) {
213
94
        auto res_column = ColumnString::create();
214
94
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
215
94
        auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
216
94
        ColumnPtr argument_columns[4];
217
94
        bool col_const[4];
218
470
        for (int i = 0; i < 4; ++i) {
219
376
            std::tie(argument_columns[i], col_const[i]) =
220
376
                    unpack_if_const(block.get_by_position(arguments[i]).column);
221
376
        }
222
94
        const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
223
94
        const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
224
94
        const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get());
225
94
        const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get());
226
227
94
        std::visit(
228
94
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
229
94
                    if (data_column->is_ascii()) {
230
70
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
231
70
                                data_column, mask_column, start_column->get_data(),
232
70
                                length_column->get_data(), args_null_map->get_data(), result_column,
233
70
                                input_rows_count);
234
70
                    } else {
235
24
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
236
24
                                data_column, mask_column, start_column->get_data(),
237
24
                                length_column->get_data(), args_null_map->get_data(), result_column,
238
24
                                input_rows_count);
239
24
                    }
240
94
                },
_ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
228
94
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
229
94
                    if (data_column->is_ascii()) {
230
70
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
231
70
                                data_column, mask_column, start_column->get_data(),
232
70
                                length_column->get_data(), args_null_map->get_data(), result_column,
233
70
                                input_rows_count);
234
70
                    } else {
235
24
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
236
24
                                data_column, mask_column, start_column->get_data(),
237
24
                                length_column->get_data(), args_null_map->get_data(), result_column,
238
24
                                input_rows_count);
239
24
                    }
240
94
                },
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_
241
94
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
242
94
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
243
94
        block.get_by_position(result).column =
244
94
                ColumnNullable::create(std::move(res_column), std::move(args_null_map));
245
94
        return Status::OK();
246
94
    }
247
248
private:
249
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
250
    static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column,
251
                             const PaddedPODArray<Int32>& args_start,
252
                             const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
253
70
                             ColumnString* result_column, size_t input_rows_count) {
254
70
        ColumnString::Chars& res_chars = result_column->get_chars();
255
70
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
256
10.4k
        for (size_t row = 0; row < input_rows_count; ++row) {
257
10.3k
            StringRef origin_str =
258
10.3k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
259
10.3k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
260
10.3k
            const auto start = args_start[index_check_const<start_const>(row)];
261
10.3k
            const auto length = args_length[index_check_const<len_const>(row)];
262
10.3k
            const size_t origin_str_len = origin_str.size;
263
            //input is null, start < 0, len < 0, str_size <= start. return NULL
264
10.3k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
265
10.2k
                res_offsets.push_back(res_chars.size());
266
10.2k
                args_null_map[row] = 1;
267
10.2k
            } else {
268
92
                std::string_view replace_str = new_str.to_string_view();
269
92
                std::string result = origin_str.to_string();
270
92
                result.replace(start, length, replace_str);
271
92
                result_column->insert_data(result.data(), result.length());
272
92
            }
273
10.3k
        }
274
70
    }
_ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
253
70
                             ColumnString* result_column, size_t input_rows_count) {
254
70
        ColumnString::Chars& res_chars = result_column->get_chars();
255
70
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
256
10.4k
        for (size_t row = 0; row < input_rows_count; ++row) {
257
10.3k
            StringRef origin_str =
258
10.3k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
259
10.3k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
260
10.3k
            const auto start = args_start[index_check_const<start_const>(row)];
261
10.3k
            const auto length = args_length[index_check_const<len_const>(row)];
262
10.3k
            const size_t origin_str_len = origin_str.size;
263
            //input is null, start < 0, len < 0, str_size <= start. return NULL
264
10.3k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
265
10.2k
                res_offsets.push_back(res_chars.size());
266
10.2k
                args_null_map[row] = 1;
267
10.2k
            } else {
268
92
                std::string_view replace_str = new_str.to_string_view();
269
92
                std::string result = origin_str.to_string();
270
92
                result.replace(start, length, replace_str);
271
92
                result_column->insert_data(result.data(), result.length());
272
92
            }
273
10.3k
        }
274
70
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
275
276
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
277
    static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column,
278
                            const PaddedPODArray<Int32>& args_start,
279
                            const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
280
24
                            ColumnString* result_column, size_t input_rows_count) {
281
24
        ColumnString::Chars& res_chars = result_column->get_chars();
282
24
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
283
284
48
        for (size_t row = 0; row < input_rows_count; ++row) {
285
24
            StringRef origin_str =
286
24
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
287
24
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
288
24
            const auto start = args_start[index_check_const<start_const>(row)];
289
24
            const auto length = args_length[index_check_const<len_const>(row)];
290
            //input is null, start < 0, len < 0 return NULL
291
24
            if (args_null_map[row] || start < 0 || length < 0) {
292
2
                res_offsets.push_back(res_chars.size());
293
2
                args_null_map[row] = 1;
294
2
                continue;
295
2
            }
296
297
22
            const auto [start_byte_len, start_char_len] =
298
22
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
299
22
                                                                           origin_str.end(), start);
300
301
            // start >= orgin.size
302
22
            DCHECK(start_char_len <= start);
303
22
            if (start_byte_len == origin_str.size) {
304
8
                res_offsets.push_back(res_chars.size());
305
8
                args_null_map[row] = 1;
306
8
                continue;
307
8
            }
308
309
14
            auto [end_byte_len, end_char_len] =
310
14
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
311
14
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
312
14
            DCHECK(end_char_len <= length);
313
14
            std::string_view replace_str = new_str.to_string_view();
314
14
            std::string result = origin_str.to_string();
315
14
            result.replace(start_byte_len, end_byte_len, replace_str);
316
14
            result_column->insert_data(result.data(), result.length());
317
14
        }
318
24
    }
_ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
280
24
                            ColumnString* result_column, size_t input_rows_count) {
281
24
        ColumnString::Chars& res_chars = result_column->get_chars();
282
24
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
283
284
48
        for (size_t row = 0; row < input_rows_count; ++row) {
285
24
            StringRef origin_str =
286
24
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
287
24
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
288
24
            const auto start = args_start[index_check_const<start_const>(row)];
289
24
            const auto length = args_length[index_check_const<len_const>(row)];
290
            //input is null, start < 0, len < 0 return NULL
291
24
            if (args_null_map[row] || start < 0 || length < 0) {
292
2
                res_offsets.push_back(res_chars.size());
293
2
                args_null_map[row] = 1;
294
2
                continue;
295
2
            }
296
297
22
            const auto [start_byte_len, start_char_len] =
298
22
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
299
22
                                                                           origin_str.end(), start);
300
301
            // start >= orgin.size
302
22
            DCHECK(start_char_len <= start);
303
22
            if (start_byte_len == origin_str.size) {
304
8
                res_offsets.push_back(res_chars.size());
305
8
                args_null_map[row] = 1;
306
8
                continue;
307
8
            }
308
309
14
            auto [end_byte_len, end_char_len] =
310
14
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
311
14
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
312
            DCHECK(end_char_len <= length);
313
14
            std::string_view replace_str = new_str.to_string_view();
314
14
            std::string result = origin_str.to_string();
315
14
            result.replace(start_byte_len, end_byte_len, replace_str);
316
14
            result_column->insert_data(result.data(), result.length());
317
14
        }
318
24
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
319
};
320
321
struct SubReplaceThreeImpl {
322
6
    static DataTypes get_variadic_argument_types() {
323
6
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
324
6
                std::make_shared<DataTypeInt32>()};
325
6
    }
326
327
    static Status execute_impl(FunctionContext* context, Block& block,
328
                               const ColumnNumbers& arguments, uint32_t result,
329
41
                               size_t input_rows_count) {
330
41
        auto params = ColumnInt32::create(input_rows_count);
331
41
        auto& strlen_data = params->get_data();
332
333
41
        auto str_col =
334
41
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
335
41
        if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
336
0
            str_col = nullable->get_nested_column_ptr();
337
0
        }
338
41
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
339
        // use utf8 len
340
116
        for (int i = 0; i < input_rows_count; ++i) {
341
75
            StringRef str_ref = str_column->get_data_at(i);
342
75
            strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size);
343
75
        }
344
345
41
        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
346
41
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
347
41
                                        block.columns() - 1};
348
41
        return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
349
41
    }
350
};
351
352
struct SubReplaceFourImpl {
353
6
    static DataTypes get_variadic_argument_types() {
354
6
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
355
6
                std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
356
6
    }
357
358
    static Status execute_impl(FunctionContext* context, Block& block,
359
                               const ColumnNumbers& arguments, uint32_t result,
360
52
                               size_t input_rows_count) {
361
52
        return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
362
52
    }
363
};
364
365
class FunctionOverlay : public IFunction {
366
public:
367
    static constexpr auto name = "overlay";
368
177
    static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); }
369
1
    String get_name() const override { return name; }
370
170
    size_t get_number_of_arguments() const override { return 4; }
371
372
170
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
373
170
        return std::make_shared<DataTypeString>();
374
170
    }
375
376
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
377
154
                        uint32_t result, size_t input_rows_count) const override {
378
154
        DCHECK_EQ(arguments.size(), 4);
379
380
154
        bool col_const[4];
381
154
        ColumnPtr argument_columns[4];
382
770
        for (int i = 0; i < 4; ++i) {
383
616
            std::tie(argument_columns[i], col_const[i]) =
384
616
                    unpack_if_const(block.get_by_position(arguments[i]).column);
385
616
        }
386
387
154
        const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get());
388
389
154
        const auto* col_pos =
390
154
                assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data();
391
154
        const auto* col_len =
392
154
                assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data();
393
154
        const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get());
394
395
154
        ColumnString::MutablePtr col_res = ColumnString::create();
396
397
        // if all input string is ascii, we can use ascii function to handle it
398
154
        const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii();
399
154
        std::visit(
400
154
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
401
154
                    if (is_all_ascii) {
402
79
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
403
79
                                col_origin, col_pos, col_len, col_insert, col_res,
404
79
                                input_rows_count);
405
79
                    } else {
406
75
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
407
75
                                col_origin, col_pos, col_len, col_insert, col_res,
408
75
                                input_rows_count);
409
75
                    }
410
154
                },
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
400
139
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
401
139
                    if (is_all_ascii) {
402
72
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
403
72
                                col_origin, col_pos, col_len, col_insert, col_res,
404
72
                                input_rows_count);
405
72
                    } else {
406
67
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
407
67
                                col_origin, col_pos, col_len, col_insert, col_res,
408
67
                                input_rows_count);
409
67
                    }
410
139
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_
Line
Count
Source
400
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
401
1
                    if (is_all_ascii) {
402
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
403
0
                                col_origin, col_pos, col_len, col_insert, col_res,
404
0
                                input_rows_count);
405
1
                    } else {
406
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
407
1
                                col_origin, col_pos, col_len, col_insert, col_res,
408
1
                                input_rows_count);
409
1
                    }
410
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
400
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
401
1
                    if (is_all_ascii) {
402
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
403
0
                                col_origin, col_pos, col_len, col_insert, col_res,
404
0
                                input_rows_count);
405
1
                    } else {
406
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
407
1
                                col_origin, col_pos, col_len, col_insert, col_res,
408
1
                                input_rows_count);
409
1
                    }
410
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
400
12
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
401
12
                    if (is_all_ascii) {
402
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
403
6
                                col_origin, col_pos, col_len, col_insert, col_res,
404
6
                                input_rows_count);
405
6
                    } else {
406
6
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
407
6
                                col_origin, col_pos, col_len, col_insert, col_res,
408
6
                                input_rows_count);
409
6
                    }
410
12
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
400
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
401
1
                    if (is_all_ascii) {
402
1
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
403
1
                                col_origin, col_pos, col_len, col_insert, col_res,
404
1
                                input_rows_count);
405
1
                    } else {
406
0
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
407
0
                                col_origin, col_pos, col_len, col_insert, col_res,
408
0
                                input_rows_count);
409
0
                    }
410
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_
411
154
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
412
154
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
413
154
        block.replace_by_position(result, std::move(col_res));
414
154
        return Status::OK();
415
154
    }
416
417
private:
418
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
419
    static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len,
420
                             const ColumnString* col_insert, ColumnString::MutablePtr& col_res,
421
79
                             size_t input_rows_count) {
422
79
        auto& col_res_chars = col_res->get_chars();
423
79
        auto& col_res_offsets = col_res->get_offsets();
424
79
        StringRef origin_str, insert_str;
425
187
        for (size_t i = 0; i < input_rows_count; i++) {
426
108
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
427
            // pos is 1-based index,so we need to minus 1
428
108
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
429
108
            const auto len = col_len[index_check_const<len_const>(i)];
430
108
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
431
108
            const auto origin_size = origin_str.size;
432
108
            if (pos >= origin_size || pos < 0) {
433
                // If pos is not within the length of the string, the original string is returned.
434
26
                col_res->insert_data(origin_str.data, origin_str.size);
435
26
                continue;
436
26
            }
437
82
            col_res_chars.insert(origin_str.data,
438
82
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
439
82
            if (pos + len > origin_size || len < 0) {
440
15
                col_res_chars.insert(insert_str.begin(),
441
15
                                     insert_str.end()); // copy all of insert_str.
442
67
            } else {
443
67
                col_res_chars.insert(insert_str.begin(),
444
67
                                     insert_str.end()); // copy all of insert_str.
445
67
                col_res_chars.insert(
446
67
                        origin_str.data + pos + len,
447
67
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
448
67
            }
449
82
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
450
82
            col_res_offsets.push_back(col_res_chars.size());
451
82
        }
452
79
    }
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
421
72
                             size_t input_rows_count) {
422
72
        auto& col_res_chars = col_res->get_chars();
423
72
        auto& col_res_offsets = col_res->get_offsets();
424
72
        StringRef origin_str, insert_str;
425
144
        for (size_t i = 0; i < input_rows_count; i++) {
426
72
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
427
            // pos is 1-based index,so we need to minus 1
428
72
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
429
72
            const auto len = col_len[index_check_const<len_const>(i)];
430
72
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
431
72
            const auto origin_size = origin_str.size;
432
72
            if (pos >= origin_size || pos < 0) {
433
                // If pos is not within the length of the string, the original string is returned.
434
18
                col_res->insert_data(origin_str.data, origin_str.size);
435
18
                continue;
436
18
            }
437
54
            col_res_chars.insert(origin_str.data,
438
54
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
439
54
            if (pos + len > origin_size || len < 0) {
440
11
                col_res_chars.insert(insert_str.begin(),
441
11
                                     insert_str.end()); // copy all of insert_str.
442
43
            } else {
443
43
                col_res_chars.insert(insert_str.begin(),
444
43
                                     insert_str.end()); // copy all of insert_str.
445
43
                col_res_chars.insert(
446
43
                        origin_str.data + pos + len,
447
43
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
448
43
            }
449
54
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
450
54
            col_res_offsets.push_back(col_res_chars.size());
451
54
        }
452
72
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
421
6
                             size_t input_rows_count) {
422
6
        auto& col_res_chars = col_res->get_chars();
423
6
        auto& col_res_offsets = col_res->get_offsets();
424
6
        StringRef origin_str, insert_str;
425
12
        for (size_t i = 0; i < input_rows_count; i++) {
426
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
427
            // pos is 1-based index,so we need to minus 1
428
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
429
6
            const auto len = col_len[index_check_const<len_const>(i)];
430
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
431
6
            const auto origin_size = origin_str.size;
432
6
            if (pos >= origin_size || pos < 0) {
433
                // If pos is not within the length of the string, the original string is returned.
434
3
                col_res->insert_data(origin_str.data, origin_str.size);
435
3
                continue;
436
3
            }
437
3
            col_res_chars.insert(origin_str.data,
438
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
439
3
            if (pos + len > origin_size || len < 0) {
440
1
                col_res_chars.insert(insert_str.begin(),
441
1
                                     insert_str.end()); // copy all of insert_str.
442
2
            } else {
443
2
                col_res_chars.insert(insert_str.begin(),
444
2
                                     insert_str.end()); // copy all of insert_str.
445
2
                col_res_chars.insert(
446
2
                        origin_str.data + pos + len,
447
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
448
2
            }
449
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
450
3
            col_res_offsets.push_back(col_res_chars.size());
451
3
        }
452
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
421
1
                             size_t input_rows_count) {
422
1
        auto& col_res_chars = col_res->get_chars();
423
1
        auto& col_res_offsets = col_res->get_offsets();
424
1
        StringRef origin_str, insert_str;
425
31
        for (size_t i = 0; i < input_rows_count; i++) {
426
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
427
            // pos is 1-based index,so we need to minus 1
428
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
429
30
            const auto len = col_len[index_check_const<len_const>(i)];
430
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
431
30
            const auto origin_size = origin_str.size;
432
30
            if (pos >= origin_size || pos < 0) {
433
                // If pos is not within the length of the string, the original string is returned.
434
5
                col_res->insert_data(origin_str.data, origin_str.size);
435
5
                continue;
436
5
            }
437
25
            col_res_chars.insert(origin_str.data,
438
25
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
439
25
            if (pos + len > origin_size || len < 0) {
440
3
                col_res_chars.insert(insert_str.begin(),
441
3
                                     insert_str.end()); // copy all of insert_str.
442
22
            } else {
443
22
                col_res_chars.insert(insert_str.begin(),
444
22
                                     insert_str.end()); // copy all of insert_str.
445
22
                col_res_chars.insert(
446
22
                        origin_str.data + pos + len,
447
22
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
448
22
            }
449
25
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
450
25
            col_res_offsets.push_back(col_res_chars.size());
451
25
        }
452
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
453
454
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
455
    NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin,
456
                                                  int const* col_pos, int const* col_len,
457
                                                  const ColumnString* col_insert,
458
                                                  ColumnString::MutablePtr& col_res,
459
75
                                                  size_t input_rows_count) {
460
75
        auto& col_res_chars = col_res->get_chars();
461
75
        auto& col_res_offsets = col_res->get_offsets();
462
75
        StringRef origin_str, insert_str;
463
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
464
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
465
75
        std::vector<size_t> utf8_origin_offsets;
466
252
        for (size_t i = 0; i < input_rows_count; i++) {
467
177
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
468
            // pos is 1-based index,so we need to minus 1
469
177
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
470
177
            const auto len = col_len[index_check_const<len_const>(i)];
471
177
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
472
177
            utf8_origin_offsets.clear();
473
474
1.10k
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
475
924
                utf8_origin_offsets.push_back(ni);
476
924
                char_size = get_utf8_byte_length(origin_str.data[ni]);
477
924
            }
478
479
177
            const size_t utf8_origin_size = utf8_origin_offsets.size();
480
481
177
            if (pos >= utf8_origin_size || pos < 0) {
482
                // If pos is not within the length of the string, the original string is returned.
483
38
                col_res->insert_data(origin_str.data, origin_str.size);
484
38
                continue;
485
38
            }
486
139
            col_res_chars.insert(
487
139
                    origin_str.data,
488
139
                    origin_str.data +
489
139
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
490
139
            if (pos + len >= utf8_origin_size || len < 0) {
491
35
                col_res_chars.insert(insert_str.begin(),
492
35
                                     insert_str.end()); // copy all of insert_str.
493
104
            } else {
494
104
                col_res_chars.insert(insert_str.begin(),
495
104
                                     insert_str.end()); // copy all of insert_str.
496
104
                col_res_chars.insert(
497
104
                        origin_str.data + utf8_origin_offsets[pos + len],
498
104
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
499
104
            }
500
139
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
501
139
            col_res_offsets.push_back(col_res_chars.size());
502
139
        }
503
75
    }
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
459
67
                                                  size_t input_rows_count) {
460
67
        auto& col_res_chars = col_res->get_chars();
461
67
        auto& col_res_offsets = col_res->get_offsets();
462
67
        StringRef origin_str, insert_str;
463
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
464
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
465
67
        std::vector<size_t> utf8_origin_offsets;
466
178
        for (size_t i = 0; i < input_rows_count; i++) {
467
111
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
468
            // pos is 1-based index,so we need to minus 1
469
111
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
470
111
            const auto len = col_len[index_check_const<len_const>(i)];
471
111
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
472
111
            utf8_origin_offsets.clear();
473
474
639
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
475
528
                utf8_origin_offsets.push_back(ni);
476
528
                char_size = get_utf8_byte_length(origin_str.data[ni]);
477
528
            }
478
479
111
            const size_t utf8_origin_size = utf8_origin_offsets.size();
480
481
111
            if (pos >= utf8_origin_size || pos < 0) {
482
                // If pos is not within the length of the string, the original string is returned.
483
22
                col_res->insert_data(origin_str.data, origin_str.size);
484
22
                continue;
485
22
            }
486
89
            col_res_chars.insert(
487
89
                    origin_str.data,
488
89
                    origin_str.data +
489
89
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
490
89
            if (pos + len >= utf8_origin_size || len < 0) {
491
23
                col_res_chars.insert(insert_str.begin(),
492
23
                                     insert_str.end()); // copy all of insert_str.
493
66
            } else {
494
66
                col_res_chars.insert(insert_str.begin(),
495
66
                                     insert_str.end()); // copy all of insert_str.
496
66
                col_res_chars.insert(
497
66
                        origin_str.data + utf8_origin_offsets[pos + len],
498
66
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
499
66
            }
500
89
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
501
89
            col_res_offsets.push_back(col_res_chars.size());
502
89
        }
503
67
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
459
1
                                                  size_t input_rows_count) {
460
1
        auto& col_res_chars = col_res->get_chars();
461
1
        auto& col_res_offsets = col_res->get_offsets();
462
1
        StringRef origin_str, insert_str;
463
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
464
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
465
1
        std::vector<size_t> utf8_origin_offsets;
466
31
        for (size_t i = 0; i < input_rows_count; i++) {
467
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
468
            // pos is 1-based index,so we need to minus 1
469
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
470
30
            const auto len = col_len[index_check_const<len_const>(i)];
471
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
472
30
            utf8_origin_offsets.clear();
473
474
209
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
475
179
                utf8_origin_offsets.push_back(ni);
476
179
                char_size = get_utf8_byte_length(origin_str.data[ni]);
477
179
            }
478
479
30
            const size_t utf8_origin_size = utf8_origin_offsets.size();
480
481
30
            if (pos >= utf8_origin_size || pos < 0) {
482
                // If pos is not within the length of the string, the original string is returned.
483
9
                col_res->insert_data(origin_str.data, origin_str.size);
484
9
                continue;
485
9
            }
486
21
            col_res_chars.insert(
487
21
                    origin_str.data,
488
21
                    origin_str.data +
489
21
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
490
21
            if (pos + len >= utf8_origin_size || len < 0) {
491
3
                col_res_chars.insert(insert_str.begin(),
492
3
                                     insert_str.end()); // copy all of insert_str.
493
18
            } else {
494
18
                col_res_chars.insert(insert_str.begin(),
495
18
                                     insert_str.end()); // copy all of insert_str.
496
18
                col_res_chars.insert(
497
18
                        origin_str.data + utf8_origin_offsets[pos + len],
498
18
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
499
18
            }
500
21
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
501
21
            col_res_offsets.push_back(col_res_chars.size());
502
21
        }
503
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
459
1
                                                  size_t input_rows_count) {
460
1
        auto& col_res_chars = col_res->get_chars();
461
1
        auto& col_res_offsets = col_res->get_offsets();
462
1
        StringRef origin_str, insert_str;
463
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
464
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
465
1
        std::vector<size_t> utf8_origin_offsets;
466
31
        for (size_t i = 0; i < input_rows_count; i++) {
467
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
468
            // pos is 1-based index,so we need to minus 1
469
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
470
30
            const auto len = col_len[index_check_const<len_const>(i)];
471
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
472
30
            utf8_origin_offsets.clear();
473
474
209
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
475
179
                utf8_origin_offsets.push_back(ni);
476
179
                char_size = get_utf8_byte_length(origin_str.data[ni]);
477
179
            }
478
479
30
            const size_t utf8_origin_size = utf8_origin_offsets.size();
480
481
30
            if (pos >= utf8_origin_size || pos < 0) {
482
                // If pos is not within the length of the string, the original string is returned.
483
4
                col_res->insert_data(origin_str.data, origin_str.size);
484
4
                continue;
485
4
            }
486
26
            col_res_chars.insert(
487
26
                    origin_str.data,
488
26
                    origin_str.data +
489
26
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
490
26
            if (pos + len >= utf8_origin_size || len < 0) {
491
8
                col_res_chars.insert(insert_str.begin(),
492
8
                                     insert_str.end()); // copy all of insert_str.
493
18
            } else {
494
18
                col_res_chars.insert(insert_str.begin(),
495
18
                                     insert_str.end()); // copy all of insert_str.
496
18
                col_res_chars.insert(
497
18
                        origin_str.data + utf8_origin_offsets[pos + len],
498
18
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
499
18
            }
500
26
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
501
26
            col_res_offsets.push_back(col_res_chars.size());
502
26
        }
503
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
459
6
                                                  size_t input_rows_count) {
460
6
        auto& col_res_chars = col_res->get_chars();
461
6
        auto& col_res_offsets = col_res->get_offsets();
462
6
        StringRef origin_str, insert_str;
463
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
464
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
465
6
        std::vector<size_t> utf8_origin_offsets;
466
12
        for (size_t i = 0; i < input_rows_count; i++) {
467
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
468
            // pos is 1-based index,so we need to minus 1
469
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
470
6
            const auto len = col_len[index_check_const<len_const>(i)];
471
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
472
6
            utf8_origin_offsets.clear();
473
474
44
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
475
38
                utf8_origin_offsets.push_back(ni);
476
38
                char_size = get_utf8_byte_length(origin_str.data[ni]);
477
38
            }
478
479
6
            const size_t utf8_origin_size = utf8_origin_offsets.size();
480
481
6
            if (pos >= utf8_origin_size || pos < 0) {
482
                // If pos is not within the length of the string, the original string is returned.
483
3
                col_res->insert_data(origin_str.data, origin_str.size);
484
3
                continue;
485
3
            }
486
3
            col_res_chars.insert(
487
3
                    origin_str.data,
488
3
                    origin_str.data +
489
3
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
490
3
            if (pos + len >= utf8_origin_size || len < 0) {
491
1
                col_res_chars.insert(insert_str.begin(),
492
1
                                     insert_str.end()); // copy all of insert_str.
493
2
            } else {
494
2
                col_res_chars.insert(insert_str.begin(),
495
2
                                     insert_str.end()); // copy all of insert_str.
496
2
                col_res_chars.insert(
497
2
                        origin_str.data + utf8_origin_offsets[pos + len],
498
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
499
2
            }
500
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
501
3
            col_res_offsets.push_back(col_res_chars.size());
502
3
        }
503
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
504
};
505
506
#include "common/compile_check_avoid_end.h"
507
} // namespace doris