Coverage Report

Created: 2026-03-19 07:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/stringop_substring.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <sys/types.h>
21
22
#include <algorithm>
23
#include <array>
24
#include <boost/iterator/iterator_facade.hpp>
25
#include <boost/locale.hpp>
26
#include <climits>
27
#include <cmath>
28
#include <cstddef>
29
#include <cstdlib>
30
#include <cstring>
31
#include <utility>
32
#include <vector>
33
34
#include "common/compiler_util.h" // IWYU pragma: keep
35
#include "core/block/block.h"
36
#include "core/block/column_numbers.h"
37
#include "core/block/column_with_type_and_name.h"
38
#include "core/column/column.h"
39
#include "core/column/column_const.h"
40
#include "core/column/column_vector.h"
41
#include "core/data_type/data_type.h"
42
#include "core/pod_array_fwd.h"
43
#include "core/types.h"
44
45
#ifndef USE_LIBCPP
46
#include <memory_resource>
47
1.13k
#define PMR std::pmr
48
#else
49
#include <boost/container/pmr/monotonic_buffer_resource.hpp>
50
#include <boost/container/pmr/vector.hpp>
51
#define PMR boost::container::pmr
52
#endif
53
54
#include <fmt/format.h>
55
56
#include <cstdint>
57
#include <string_view>
58
59
#include "core/assert_cast.h"
60
#include "core/column/column_decimal.h"
61
#include "core/column/column_nullable.h"
62
#include "core/column/column_string.h"
63
#include "core/string_ref.h"
64
#include "util/simd/vstring_function.h"
65
66
namespace doris {
67
#include "common/compile_check_begin.h"
68
struct StringOP {
69
    static void push_empty_string(size_t index, ColumnString::Chars& chars,
70
546k
                                  ColumnString::Offsets& offsets) {
71
546k
        offsets[index] = (ColumnString::Offset)chars.size();
72
546k
    }
73
74
    static void push_null_string(size_t index, ColumnString::Chars& chars,
75
23.9k
                                 ColumnString::Offsets& offsets, NullMap& null_map) {
76
23.9k
        null_map[index] = 1;
77
23.9k
        push_empty_string(index, chars, offsets);
78
23.9k
    }
79
80
    static void push_value_string(const std::string_view& string_value, size_t index,
81
132k
                                  ColumnString::Chars& chars, ColumnString::Offsets& offsets) {
82
132k
        ColumnString::check_chars_length(chars.size() + string_value.size(), offsets.size());
83
84
132k
        chars.insert(string_value.data(), string_value.data() + string_value.size());
85
132k
        offsets[index] = (ColumnString::Offset)chars.size();
86
132k
    }
87
88
    static void push_value_string_reserved_and_allow_overflow(const std::string_view& string_value,
89
                                                              size_t index,
90
                                                              ColumnString::Chars& chars,
91
3.58M
                                                              ColumnString::Offsets& offsets) {
92
3.58M
        chars.insert_assume_reserved_and_allow_overflow(string_value.data(),
93
3.58M
                                                        string_value.data() + string_value.size());
94
3.58M
        offsets[index] = (ColumnString::Offset)chars.size();
95
3.58M
    }
96
97
    static void fast_repeat(uint8_t* dst, const uint8_t* src, size_t src_size,
98
2.50k
                            int32_t repeat_times) {
99
2.50k
        if (UNLIKELY(repeat_times <= 0)) {
100
73
            return;
101
73
        }
102
2.43k
        uint8_t* dst_begin = dst;
103
2.43k
        uint8_t* dst_curr = dst;
104
2.43k
        int32_t k = 0;
105
2.43k
        int32_t is_odd = repeat_times & 1;
106
2.43k
        repeat_times >>= 1;
107
108
2.43k
        memcpy(dst_curr, src, src_size);
109
2.43k
        dst_curr += src_size;
110
10.2k
        for (; repeat_times > 0; k += 1, is_odd = repeat_times & 1, repeat_times >>= 1) {
111
7.83k
            int64_t len = src_size * (1 << k);
112
7.83k
            memcpy(dst_curr, dst_begin, len);
113
7.83k
            dst_curr += len;
114
7.83k
            if (is_odd) {
115
5.78k
                memcpy(dst_curr, dst_begin, len);
116
5.78k
                dst_curr += len;
117
5.78k
            }
118
7.83k
        }
119
2.43k
    }
120
};
121
122
struct SubstringUtil {
123
    static constexpr auto name = "substring";
124
125
    static void substring_execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
126
34.7k
                                  size_t input_rows_count) {
127
34.7k
        DCHECK_EQ(arguments.size(), 3);
128
34.7k
        auto res = ColumnString::create();
129
130
34.7k
        bool col_const[3];
131
34.7k
        ColumnPtr argument_columns[3];
132
138k
        for (int i = 0; i < 3; ++i) {
133
104k
            std::tie(argument_columns[i], col_const[i]) =
134
104k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
135
104k
        }
136
137
34.7k
        const auto* specific_str_column =
138
34.7k
                assert_cast<const ColumnString*>(argument_columns[0].get());
139
34.7k
        const auto* specific_start_column =
140
34.7k
                assert_cast<const ColumnInt32*>(argument_columns[1].get());
141
34.7k
        const auto* specific_len_column =
142
34.7k
                assert_cast<const ColumnInt32*>(argument_columns[2].get());
143
144
34.7k
        bool is_ascii = specific_str_column->is_ascii();
145
146
34.7k
        std::visit(
147
34.7k
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
34.7k
                    vectors<is_ascii, str_const, start_const, len_const>(
149
34.7k
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
34.7k
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
34.7k
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
34.7k
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
86
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
86
                    vectors<is_ascii, str_const, start_const, len_const>(
149
86
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
86
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
86
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
86
                },
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_
Line
Count
Source
147
128
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
128
                    vectors<is_ascii, str_const, start_const, len_const>(
149
128
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
128
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
128
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
128
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
128
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
128
                    vectors<is_ascii, str_const, start_const, len_const>(
149
128
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
128
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
128
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
128
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_
Line
Count
Source
147
161
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
161
                    vectors<is_ascii, str_const, start_const, len_const>(
149
161
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
161
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
161
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
161
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
32.8k
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
32.8k
                    vectors<is_ascii, str_const, start_const, len_const>(
149
32.8k
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
32.8k
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
32.8k
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
32.8k
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_
Line
Count
Source
147
146
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
146
                    vectors<is_ascii, str_const, start_const, len_const>(
149
146
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
146
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
146
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
146
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
160
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
160
                    vectors<is_ascii, str_const, start_const, len_const>(
149
160
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
160
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
160
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
160
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_
Line
Count
Source
147
582
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
582
                    vectors<is_ascii, str_const, start_const, len_const>(
149
582
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
582
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
582
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
582
                },
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
16
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
16
                    vectors<is_ascii, str_const, start_const, len_const>(
149
16
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
16
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
16
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
16
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_
Line
Count
Source
147
16
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
16
                    vectors<is_ascii, str_const, start_const, len_const>(
149
16
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
16
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
16
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
16
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
21
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
21
                    vectors<is_ascii, str_const, start_const, len_const>(
149
21
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
21
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
21
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
21
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_
Line
Count
Source
147
402
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
402
                    vectors<is_ascii, str_const, start_const, len_const>(
149
402
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
402
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
402
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
402
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_
Line
Count
Source
147
16
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
16
                    vectors<is_ascii, str_const, start_const, len_const>(
149
16
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
16
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
16
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
16
                },
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_
Line
Count
Source
147
16
                [&](auto is_ascii, auto str_const, auto start_const, auto len_const) {
148
16
                    vectors<is_ascii, str_const, start_const, len_const>(
149
16
                            specific_str_column->get_chars(), specific_str_column->get_offsets(),
150
16
                            specific_start_column->get_data(), specific_len_column->get_data(),
151
16
                            res->get_chars(), res->get_offsets(), input_rows_count);
152
16
                },
153
34.7k
                make_bool_variant(is_ascii), make_bool_variant(col_const[0]),
154
34.7k
                make_bool_variant(col_const[1]), make_bool_variant(col_const[2]));
155
34.7k
        block.get_by_position(result).column = std::move(res);
156
34.7k
    }
157
158
private:
159
    template <bool is_ascii, bool str_const, bool start_const, bool len_const>
160
    static void vectors(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets,
161
                        const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
162
                        ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets,
163
34.6k
                        size_t size) {
164
34.6k
        res_offsets.resize(size);
165
166
34.6k
        if constexpr (start_const && len_const) {
167
33.2k
            if (start[0] == 0 || len[0] <= 0) {
168
74
                for (size_t i = 0; i < size; ++i) {
169
37
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
37
                }
171
37
                return;
172
37
            }
173
33.2k
        }
174
175
33.1k
        if constexpr (str_const) {
176
470
            res_chars.reserve(size * chars.size());
177
34.2k
        } else {
178
34.2k
            res_chars.reserve(chars.size());
179
34.2k
        }
180
181
34.6k
        if constexpr (is_ascii) {
182
34.1k
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
34.1k
                                                             res_offsets, size);
184
34.1k
        } else {
185
573
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
573
                                                            res_offsets, size);
187
573
        }
188
34.6k
    }
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
86
                        size_t size) {
164
86
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
86
        } else {
178
86
            res_chars.reserve(chars.size());
179
86
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
86
        } else {
185
86
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
86
                                                            res_offsets, size);
187
86
        }
188
86
    }
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
128
                        size_t size) {
164
128
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
128
        if constexpr (str_const) {
176
128
            res_chars.reserve(size * chars.size());
177
        } else {
178
            res_chars.reserve(chars.size());
179
        }
180
181
128
        if constexpr (is_ascii) {
182
128
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
128
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
128
    }
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
128
                        size_t size) {
164
128
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
128
        if constexpr (str_const) {
176
128
            res_chars.reserve(size * chars.size());
177
        } else {
178
            res_chars.reserve(chars.size());
179
        }
180
181
128
        if constexpr (is_ascii) {
182
128
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
128
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
128
    }
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
161
                        size_t size) {
164
161
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
161
        if constexpr (str_const) {
176
161
            res_chars.reserve(size * chars.size());
177
        } else {
178
            res_chars.reserve(chars.size());
179
        }
180
181
161
        if constexpr (is_ascii) {
182
161
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
161
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
161
    }
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
32.8k
                        size_t size) {
164
32.8k
        res_offsets.resize(size);
165
166
32.8k
        if constexpr (start_const && len_const) {
167
32.8k
            if (start[0] == 0 || len[0] <= 0) {
168
66
                for (size_t i = 0; i < size; ++i) {
169
33
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
33
                }
171
33
                return;
172
33
            }
173
32.8k
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
32.8k
        } else {
178
32.8k
            res_chars.reserve(chars.size());
179
32.8k
        }
180
181
32.8k
        if constexpr (is_ascii) {
182
32.8k
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
32.8k
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
32.8k
    }
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
146
                        size_t size) {
164
146
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
146
        } else {
178
146
            res_chars.reserve(chars.size());
179
146
        }
180
181
146
        if constexpr (is_ascii) {
182
146
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
146
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
146
    }
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
160
                        size_t size) {
164
160
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
160
        } else {
178
160
            res_chars.reserve(chars.size());
179
160
        }
180
181
160
        if constexpr (is_ascii) {
182
160
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
160
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
160
    }
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
582
                        size_t size) {
164
582
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
582
        } else {
178
582
            res_chars.reserve(chars.size());
179
582
        }
180
181
582
        if constexpr (is_ascii) {
182
582
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
582
                                                             res_offsets, size);
184
        } else {
185
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
                                                            res_offsets, size);
187
        }
188
582
    }
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
16
                        size_t size) {
164
16
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
16
        if constexpr (str_const) {
176
16
            res_chars.reserve(size * chars.size());
177
        } else {
178
            res_chars.reserve(chars.size());
179
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
16
        } else {
185
16
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
16
                                                            res_offsets, size);
187
16
        }
188
16
    }
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
16
                        size_t size) {
164
16
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
16
        if constexpr (str_const) {
176
16
            res_chars.reserve(size * chars.size());
177
        } else {
178
            res_chars.reserve(chars.size());
179
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
16
        } else {
185
16
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
16
                                                            res_offsets, size);
187
16
        }
188
16
    }
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
21
                        size_t size) {
164
21
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
21
        if constexpr (str_const) {
176
21
            res_chars.reserve(size * chars.size());
177
        } else {
178
            res_chars.reserve(chars.size());
179
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
21
        } else {
185
21
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
21
                                                            res_offsets, size);
187
21
        }
188
21
    }
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
402
                        size_t size) {
164
402
        res_offsets.resize(size);
165
166
402
        if constexpr (start_const && len_const) {
167
402
            if (start[0] == 0 || len[0] <= 0) {
168
8
                for (size_t i = 0; i < size; ++i) {
169
4
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
4
                }
171
4
                return;
172
4
            }
173
402
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
402
        } else {
178
402
            res_chars.reserve(chars.size());
179
402
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
402
        } else {
185
402
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
402
                                                            res_offsets, size);
187
402
        }
188
402
    }
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
16
                        size_t size) {
164
16
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
16
        } else {
178
16
            res_chars.reserve(chars.size());
179
16
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
16
        } else {
185
16
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
16
                                                            res_offsets, size);
187
16
        }
188
16
    }
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
163
16
                        size_t size) {
164
16
        res_offsets.resize(size);
165
166
        if constexpr (start_const && len_const) {
167
            if (start[0] == 0 || len[0] <= 0) {
168
                for (size_t i = 0; i < size; ++i) {
169
                    StringOP::push_empty_string(i, res_chars, res_offsets);
170
                }
171
                return;
172
            }
173
        }
174
175
        if constexpr (str_const) {
176
            res_chars.reserve(size * chars.size());
177
16
        } else {
178
16
            res_chars.reserve(chars.size());
179
16
        }
180
181
        if constexpr (is_ascii) {
182
            vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
183
                                                             res_offsets, size);
184
16
        } else {
185
16
            vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars,
186
16
                                                            res_offsets, size);
187
16
        }
188
16
    }
189
190
    template <bool str_const, bool start_const, bool len_const>
191
    NO_SANITIZE_UNDEFINED static void vectors_utf8(
192
            const ColumnString::Chars& chars, const ColumnString::Offsets& offsets,
193
            const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
194
569
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
569
        std::array<std::byte, 128 * 1024> buf;
196
569
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
569
        PMR::vector<size_t> index {&pool};
198
199
10.1k
        for (size_t i = 0; i < size; ++i) {
200
9.59k
            int str_size = offsets[index_check_const<str_const>(i)] -
201
9.59k
                           offsets[index_check_const<str_const>(i) - 1];
202
9.59k
            const char* str_data =
203
9.59k
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
9.59k
            int start_value = start[index_check_const<start_const>(i)];
205
9.59k
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
9.59k
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
9.59k
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
1.12k
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
1.12k
                continue;
213
1.12k
            }
214
215
8.47k
            size_t byte_pos = 0;
216
8.47k
            index.clear();
217
932k
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
924k
                char_size = get_utf8_byte_length(str_data[j]);
219
924k
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
924k
                if (start_value > 0 && index.size() > start_value + len_value) {
223
245
                    break;
224
245
                }
225
924k
            }
226
227
8.47k
            int64_t fixed_pos = start_value;
228
8.47k
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
8.47k
            if (fixed_pos < 0) {
233
98
                fixed_pos = index.size() + fixed_pos + 1;
234
98
            }
235
236
8.47k
            byte_pos = index[fixed_pos - 1];
237
8.47k
            size_t fixed_len = str_size - byte_pos;
238
8.47k
            if (fixed_pos + len_value <= index.size()) {
239
255
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
255
            }
241
242
8.47k
            if (byte_pos <= str_size && fixed_len > 0) {
243
8.47k
                StringOP::push_value_string_reserved_and_allow_overflow(
244
8.47k
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
8.47k
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
8.47k
        }
249
569
    }
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
86
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
86
        std::array<std::byte, 128 * 1024> buf;
196
86
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
86
        PMR::vector<size_t> index {&pool};
198
199
446
        for (size_t i = 0; i < size; ++i) {
200
360
            int str_size = offsets[index_check_const<str_const>(i)] -
201
360
                           offsets[index_check_const<str_const>(i) - 1];
202
360
            const char* str_data =
203
360
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
360
            int start_value = start[index_check_const<start_const>(i)];
205
360
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
360
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
360
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
207
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
207
                continue;
213
207
            }
214
215
153
            size_t byte_pos = 0;
216
153
            index.clear();
217
1.34k
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
1.22k
                char_size = get_utf8_byte_length(str_data[j]);
219
1.22k
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
1.22k
                if (start_value > 0 && index.size() > start_value + len_value) {
223
31
                    break;
224
31
                }
225
1.22k
            }
226
227
153
            int64_t fixed_pos = start_value;
228
153
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
153
            if (fixed_pos < 0) {
233
78
                fixed_pos = index.size() + fixed_pos + 1;
234
78
            }
235
236
153
            byte_pos = index[fixed_pos - 1];
237
153
            size_t fixed_len = str_size - byte_pos;
238
153
            if (fixed_pos + len_value <= index.size()) {
239
33
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
33
            }
241
242
153
            if (byte_pos <= str_size && fixed_len > 0) {
243
153
                StringOP::push_value_string_reserved_and_allow_overflow(
244
153
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
153
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
153
        }
249
86
    }
Unexecuted instantiation: _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
16
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
16
        std::array<std::byte, 128 * 1024> buf;
196
16
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
16
        PMR::vector<size_t> index {&pool};
198
199
32
        for (size_t i = 0; i < size; ++i) {
200
16
            int str_size = offsets[index_check_const<str_const>(i)] -
201
16
                           offsets[index_check_const<str_const>(i) - 1];
202
16
            const char* str_data =
203
16
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
16
            int start_value = start[index_check_const<start_const>(i)];
205
16
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
16
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
16
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
7
                continue;
213
7
            }
214
215
9
            size_t byte_pos = 0;
216
9
            index.clear();
217
65
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
59
                char_size = get_utf8_byte_length(str_data[j]);
219
59
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
59
                if (start_value > 0 && index.size() > start_value + len_value) {
223
3
                    break;
224
3
                }
225
59
            }
226
227
9
            int64_t fixed_pos = start_value;
228
9
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
9
            if (fixed_pos < 0) {
233
3
                fixed_pos = index.size() + fixed_pos + 1;
234
3
            }
235
236
9
            byte_pos = index[fixed_pos - 1];
237
9
            size_t fixed_len = str_size - byte_pos;
238
9
            if (fixed_pos + len_value <= index.size()) {
239
3
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
3
            }
241
242
9
            if (byte_pos <= str_size && fixed_len > 0) {
243
9
                StringOP::push_value_string_reserved_and_allow_overflow(
244
9
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
9
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
9
        }
249
16
    }
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
16
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
16
        std::array<std::byte, 128 * 1024> buf;
196
16
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
16
        PMR::vector<size_t> index {&pool};
198
199
32
        for (size_t i = 0; i < size; ++i) {
200
16
            int str_size = offsets[index_check_const<str_const>(i)] -
201
16
                           offsets[index_check_const<str_const>(i) - 1];
202
16
            const char* str_data =
203
16
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
16
            int start_value = start[index_check_const<start_const>(i)];
205
16
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
16
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
16
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
7
                continue;
213
7
            }
214
215
9
            size_t byte_pos = 0;
216
9
            index.clear();
217
65
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
59
                char_size = get_utf8_byte_length(str_data[j]);
219
59
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
59
                if (start_value > 0 && index.size() > start_value + len_value) {
223
3
                    break;
224
3
                }
225
59
            }
226
227
9
            int64_t fixed_pos = start_value;
228
9
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
9
            if (fixed_pos < 0) {
233
3
                fixed_pos = index.size() + fixed_pos + 1;
234
3
            }
235
236
9
            byte_pos = index[fixed_pos - 1];
237
9
            size_t fixed_len = str_size - byte_pos;
238
9
            if (fixed_pos + len_value <= index.size()) {
239
3
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
3
            }
241
242
9
            if (byte_pos <= str_size && fixed_len > 0) {
243
9
                StringOP::push_value_string_reserved_and_allow_overflow(
244
9
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
9
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
9
        }
249
16
    }
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
21
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
21
        std::array<std::byte, 128 * 1024> buf;
196
21
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
21
        PMR::vector<size_t> index {&pool};
198
199
42
        for (size_t i = 0; i < size; ++i) {
200
21
            int str_size = offsets[index_check_const<str_const>(i)] -
201
21
                           offsets[index_check_const<str_const>(i) - 1];
202
21
            const char* str_data =
203
21
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
21
            int start_value = start[index_check_const<start_const>(i)];
205
21
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
21
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
21
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
8
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
8
                continue;
213
8
            }
214
215
13
            size_t byte_pos = 0;
216
13
            index.clear();
217
82
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
72
                char_size = get_utf8_byte_length(str_data[j]);
219
72
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
72
                if (start_value > 0 && index.size() > start_value + len_value) {
223
3
                    break;
224
3
                }
225
72
            }
226
227
13
            int64_t fixed_pos = start_value;
228
13
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
13
            if (fixed_pos < 0) {
233
5
                fixed_pos = index.size() + fixed_pos + 1;
234
5
            }
235
236
13
            byte_pos = index[fixed_pos - 1];
237
13
            size_t fixed_len = str_size - byte_pos;
238
13
            if (fixed_pos + len_value <= index.size()) {
239
3
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
3
            }
241
242
13
            if (byte_pos <= str_size && fixed_len > 0) {
243
13
                StringOP::push_value_string_reserved_and_allow_overflow(
244
13
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
13
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
13
        }
249
21
    }
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
398
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
398
        std::array<std::byte, 128 * 1024> buf;
196
398
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
398
        PMR::vector<size_t> index {&pool};
198
199
9.54k
        for (size_t i = 0; i < size; ++i) {
200
9.14k
            int str_size = offsets[index_check_const<str_const>(i)] -
201
9.14k
                           offsets[index_check_const<str_const>(i) - 1];
202
9.14k
            const char* str_data =
203
9.14k
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
9.14k
            int start_value = start[index_check_const<start_const>(i)];
205
9.14k
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
9.14k
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
9.14k
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
877
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
877
                continue;
213
877
            }
214
215
8.27k
            size_t byte_pos = 0;
216
8.27k
            index.clear();
217
930k
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
922k
                char_size = get_utf8_byte_length(str_data[j]);
219
922k
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
922k
                if (start_value > 0 && index.size() > start_value + len_value) {
223
199
                    break;
224
199
                }
225
922k
            }
226
227
8.27k
            int64_t fixed_pos = start_value;
228
8.27k
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
8.27k
            if (fixed_pos < 0) {
233
3
                fixed_pos = index.size() + fixed_pos + 1;
234
3
            }
235
236
8.27k
            byte_pos = index[fixed_pos - 1];
237
8.27k
            size_t fixed_len = str_size - byte_pos;
238
8.27k
            if (fixed_pos + len_value <= index.size()) {
239
207
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
207
            }
241
242
8.27k
            if (byte_pos <= str_size && fixed_len > 0) {
243
8.27k
                StringOP::push_value_string_reserved_and_allow_overflow(
244
8.27k
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
8.27k
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
8.27k
        }
249
398
    }
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
16
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
16
        std::array<std::byte, 128 * 1024> buf;
196
16
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
16
        PMR::vector<size_t> index {&pool};
198
199
32
        for (size_t i = 0; i < size; ++i) {
200
16
            int str_size = offsets[index_check_const<str_const>(i)] -
201
16
                           offsets[index_check_const<str_const>(i) - 1];
202
16
            const char* str_data =
203
16
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
16
            int start_value = start[index_check_const<start_const>(i)];
205
16
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
16
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
16
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
7
                continue;
213
7
            }
214
215
9
            size_t byte_pos = 0;
216
9
            index.clear();
217
65
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
59
                char_size = get_utf8_byte_length(str_data[j]);
219
59
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
59
                if (start_value > 0 && index.size() > start_value + len_value) {
223
3
                    break;
224
3
                }
225
59
            }
226
227
9
            int64_t fixed_pos = start_value;
228
9
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
9
            if (fixed_pos < 0) {
233
3
                fixed_pos = index.size() + fixed_pos + 1;
234
3
            }
235
236
9
            byte_pos = index[fixed_pos - 1];
237
9
            size_t fixed_len = str_size - byte_pos;
238
9
            if (fixed_pos + len_value <= index.size()) {
239
3
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
3
            }
241
242
9
            if (byte_pos <= str_size && fixed_len > 0) {
243
9
                StringOP::push_value_string_reserved_and_allow_overflow(
244
9
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
9
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
9
        }
249
16
    }
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
194
16
            ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) {
195
16
        std::array<std::byte, 128 * 1024> buf;
196
16
        PMR::monotonic_buffer_resource pool {buf.data(), buf.size()};
197
16
        PMR::vector<size_t> index {&pool};
198
199
32
        for (size_t i = 0; i < size; ++i) {
200
16
            int str_size = offsets[index_check_const<str_const>(i)] -
201
16
                           offsets[index_check_const<str_const>(i) - 1];
202
16
            const char* str_data =
203
16
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
204
16
            int start_value = start[index_check_const<start_const>(i)];
205
16
            int len_value = len[index_check_const<len_const>(i)];
206
            // Unsigned numbers cannot be used here because start_value can be negative.
207
16
            int char_len = simd::VStringFunctions::get_char_len(str_data, str_size);
208
            // return empty string if start > src.length
209
            // Here, start_value is compared against the length of the character.
210
16
            if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) {
211
7
                StringOP::push_empty_string(i, res_chars, res_offsets);
212
7
                continue;
213
7
            }
214
215
9
            size_t byte_pos = 0;
216
9
            index.clear();
217
65
            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
218
59
                char_size = get_utf8_byte_length(str_data[j]);
219
59
                index.push_back(j);
220
                // index_size represents the number of characters from the beginning of the character to the current position.
221
                // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters.
222
59
                if (start_value > 0 && index.size() > start_value + len_value) {
223
3
                    break;
224
3
                }
225
59
            }
226
227
9
            int64_t fixed_pos = start_value;
228
9
            if (fixed_pos < -(int)index.size()) {
229
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
230
0
                continue;
231
0
            }
232
9
            if (fixed_pos < 0) {
233
3
                fixed_pos = index.size() + fixed_pos + 1;
234
3
            }
235
236
9
            byte_pos = index[fixed_pos - 1];
237
9
            size_t fixed_len = str_size - byte_pos;
238
9
            if (fixed_pos + len_value <= index.size()) {
239
3
                fixed_len = index[fixed_pos + len_value - 1] - byte_pos;
240
3
            }
241
242
9
            if (byte_pos <= str_size && fixed_len > 0) {
243
9
                StringOP::push_value_string_reserved_and_allow_overflow(
244
9
                        {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets);
245
9
            } else {
246
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
247
0
            }
248
9
        }
249
16
    }
250
251
    template <bool str_const, bool start_const, bool len_const>
252
    static void vectors_ascii(const ColumnString::Chars& chars,
253
                              const ColumnString::Offsets& offsets,
254
                              const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
255
                              ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets,
256
34.1k
                              size_t size) {
257
4.11M
        for (size_t i = 0; i < size; ++i) {
258
4.08M
            int str_size = offsets[index_check_const<str_const>(i)] -
259
4.08M
                           offsets[index_check_const<str_const>(i) - 1];
260
4.08M
            const char* str_data =
261
4.08M
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
4.08M
            int start_value = start[index_check_const<start_const>(i)];
263
4.08M
            int len_value = len[index_check_const<len_const>(i)];
264
265
4.08M
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
4.08M
                len_value <= 0) {
267
520k
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
520k
                continue;
269
520k
            }
270
3.56M
            int fixed_pos = start_value - 1;
271
3.56M
            if (fixed_pos < 0) {
272
505
                fixed_pos = str_size + fixed_pos + 1;
273
505
            }
274
3.56M
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
3.56M
            StringOP::push_value_string_reserved_and_allow_overflow(
276
3.56M
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
3.56M
        }
278
34.1k
    }
Unexecuted instantiation: _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
128
                              size_t size) {
257
256
        for (size_t i = 0; i < size; ++i) {
258
128
            int str_size = offsets[index_check_const<str_const>(i)] -
259
128
                           offsets[index_check_const<str_const>(i) - 1];
260
128
            const char* str_data =
261
128
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
128
            int start_value = start[index_check_const<start_const>(i)];
263
128
            int len_value = len[index_check_const<len_const>(i)];
264
265
128
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
128
                len_value <= 0) {
267
65
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
65
                continue;
269
65
            }
270
63
            int fixed_pos = start_value - 1;
271
63
            if (fixed_pos < 0) {
272
21
                fixed_pos = str_size + fixed_pos + 1;
273
21
            }
274
63
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
63
            StringOP::push_value_string_reserved_and_allow_overflow(
276
63
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
63
        }
278
128
    }
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
128
                              size_t size) {
257
256
        for (size_t i = 0; i < size; ++i) {
258
128
            int str_size = offsets[index_check_const<str_const>(i)] -
259
128
                           offsets[index_check_const<str_const>(i) - 1];
260
128
            const char* str_data =
261
128
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
128
            int start_value = start[index_check_const<start_const>(i)];
263
128
            int len_value = len[index_check_const<len_const>(i)];
264
265
128
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
128
                len_value <= 0) {
267
65
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
65
                continue;
269
65
            }
270
63
            int fixed_pos = start_value - 1;
271
63
            if (fixed_pos < 0) {
272
21
                fixed_pos = str_size + fixed_pos + 1;
273
21
            }
274
63
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
63
            StringOP::push_value_string_reserved_and_allow_overflow(
276
63
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
63
        }
278
128
    }
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
161
                              size_t size) {
257
322
        for (size_t i = 0; i < size; ++i) {
258
161
            int str_size = offsets[index_check_const<str_const>(i)] -
259
161
                           offsets[index_check_const<str_const>(i) - 1];
260
161
            const char* str_data =
261
161
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
161
            int start_value = start[index_check_const<start_const>(i)];
263
161
            int len_value = len[index_check_const<len_const>(i)];
264
265
161
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
161
                len_value <= 0) {
267
81
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
81
                continue;
269
81
            }
270
80
            int fixed_pos = start_value - 1;
271
80
            if (fixed_pos < 0) {
272
31
                fixed_pos = str_size + fixed_pos + 1;
273
31
            }
274
80
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
80
            StringOP::push_value_string_reserved_and_allow_overflow(
276
80
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
80
        }
278
161
    }
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
32.8k
                              size_t size) {
257
4.11M
        for (size_t i = 0; i < size; ++i) {
258
4.07M
            int str_size = offsets[index_check_const<str_const>(i)] -
259
4.07M
                           offsets[index_check_const<str_const>(i) - 1];
260
4.07M
            const char* str_data =
261
4.07M
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
4.07M
            int start_value = start[index_check_const<start_const>(i)];
263
4.07M
            int len_value = len[index_check_const<len_const>(i)];
264
265
4.07M
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
4.07M
                len_value <= 0) {
267
520k
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
520k
                continue;
269
520k
            }
270
3.55M
            int fixed_pos = start_value - 1;
271
3.55M
            if (fixed_pos < 0) {
272
65
                fixed_pos = str_size + fixed_pos + 1;
273
65
            }
274
3.55M
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
3.55M
            StringOP::push_value_string_reserved_and_allow_overflow(
276
3.55M
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
3.55M
        }
278
32.8k
    }
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
146
                              size_t size) {
257
292
        for (size_t i = 0; i < size; ++i) {
258
146
            int str_size = offsets[index_check_const<str_const>(i)] -
259
146
                           offsets[index_check_const<str_const>(i) - 1];
260
146
            const char* str_data =
261
146
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
146
            int start_value = start[index_check_const<start_const>(i)];
263
146
            int len_value = len[index_check_const<len_const>(i)];
264
265
146
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
146
                len_value <= 0) {
267
77
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
77
                continue;
269
77
            }
270
69
            int fixed_pos = start_value - 1;
271
69
            if (fixed_pos < 0) {
272
24
                fixed_pos = str_size + fixed_pos + 1;
273
24
            }
274
69
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
69
            StringOP::push_value_string_reserved_and_allow_overflow(
276
69
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
69
        }
278
146
    }
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
160
                              size_t size) {
257
388
        for (size_t i = 0; i < size; ++i) {
258
228
            int str_size = offsets[index_check_const<str_const>(i)] -
259
228
                           offsets[index_check_const<str_const>(i) - 1];
260
228
            const char* str_data =
261
228
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
228
            int start_value = start[index_check_const<start_const>(i)];
263
228
            int len_value = len[index_check_const<len_const>(i)];
264
265
228
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
228
                len_value <= 0) {
267
97
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
97
                continue;
269
97
            }
270
131
            int fixed_pos = start_value - 1;
271
131
            if (fixed_pos < 0) {
272
25
                fixed_pos = str_size + fixed_pos + 1;
273
25
            }
274
131
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
131
            StringOP::push_value_string_reserved_and_allow_overflow(
276
131
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
131
        }
278
160
    }
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m
Line
Count
Source
256
582
                              size_t size) {
257
1.39k
        for (size_t i = 0; i < size; ++i) {
258
814
            int str_size = offsets[index_check_const<str_const>(i)] -
259
814
                           offsets[index_check_const<str_const>(i) - 1];
260
814
            const char* str_data =
261
814
                    (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1];
262
814
            int start_value = start[index_check_const<start_const>(i)];
263
814
            int len_value = len[index_check_const<len_const>(i)];
264
265
814
            if (start_value > str_size || start_value < -str_size || str_size == 0 ||
266
814
                len_value <= 0) {
267
207
                StringOP::push_empty_string(i, res_chars, res_offsets);
268
207
                continue;
269
207
            }
270
607
            int fixed_pos = start_value - 1;
271
607
            if (fixed_pos < 0) {
272
318
                fixed_pos = str_size + fixed_pos + 1;
273
318
            }
274
607
            size_t fixed_len = std::min(str_size - fixed_pos, len_value);
275
607
            StringOP::push_value_string_reserved_and_allow_overflow(
276
607
                    {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets);
277
607
        }
278
582
    }
279
};
280
281
#include "common/compile_check_end.h"
282
283
} // namespace doris