be/src/exec/common/stringop_substring.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <sys/types.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <array> |
24 | | #include <boost/iterator/iterator_facade.hpp> |
25 | | #include <boost/locale.hpp> |
26 | | #include <climits> |
27 | | #include <cmath> |
28 | | #include <cstddef> |
29 | | #include <cstdlib> |
30 | | #include <cstring> |
31 | | #include <utility> |
32 | | #include <vector> |
33 | | |
34 | | #include "common/compiler_util.h" // IWYU pragma: keep |
35 | | #include "core/block/block.h" |
36 | | #include "core/block/column_numbers.h" |
37 | | #include "core/block/column_with_type_and_name.h" |
38 | | #include "core/column/column.h" |
39 | | #include "core/column/column_const.h" |
40 | | #include "core/column/column_vector.h" |
41 | | #include "core/data_type/data_type.h" |
42 | | #include "core/pod_array_fwd.h" |
43 | | #include "core/types.h" |
44 | | |
45 | | #ifndef USE_LIBCPP |
46 | | #include <memory_resource> |
47 | 256 | #define PMR std::pmr |
48 | | #else |
49 | | #include <boost/container/pmr/monotonic_buffer_resource.hpp> |
50 | | #include <boost/container/pmr/vector.hpp> |
51 | | #define PMR boost::container::pmr |
52 | | #endif |
53 | | |
54 | | #include <fmt/format.h> |
55 | | |
56 | | #include <cstdint> |
57 | | #include <string_view> |
58 | | |
59 | | #include "core/assert_cast.h" |
60 | | #include "core/column/column_decimal.h" |
61 | | #include "core/column/column_nullable.h" |
62 | | #include "core/column/column_string.h" |
63 | | #include "core/string_ref.h" |
64 | | #include "util/simd/vstring_function.h" |
65 | | |
66 | | namespace doris { |
67 | | struct StringOP { |
68 | | static void push_empty_string(size_t index, ColumnString::Chars& chars, |
69 | 989 | ColumnString::Offsets& offsets) { |
70 | 989 | offsets[index] = (ColumnString::Offset)chars.size(); |
71 | 989 | } |
72 | | |
73 | | static void push_null_string(size_t index, ColumnString::Chars& chars, |
74 | 103 | ColumnString::Offsets& offsets, NullMap& null_map) { |
75 | 103 | null_map[index] = 1; |
76 | 103 | push_empty_string(index, chars, offsets); |
77 | 103 | } |
78 | | |
79 | | static void push_value_string(const std::string_view& string_value, size_t index, |
80 | 1.92k | ColumnString::Chars& chars, ColumnString::Offsets& offsets) { |
81 | 1.92k | ColumnString::check_chars_length(chars.size() + string_value.size(), offsets.size()); |
82 | | |
83 | 1.92k | chars.insert(string_value.data(), string_value.data() + string_value.size()); |
84 | 1.92k | offsets[index] = (ColumnString::Offset)chars.size(); |
85 | 1.92k | } |
86 | | |
87 | | static void push_value_string_reserved_and_allow_overflow(const std::string_view& string_value, |
88 | | size_t index, |
89 | | ColumnString::Chars& chars, |
90 | 812 | ColumnString::Offsets& offsets) { |
91 | 812 | chars.insert_assume_reserved_and_allow_overflow(string_value.data(), |
92 | 812 | string_value.data() + string_value.size()); |
93 | 812 | offsets[index] = (ColumnString::Offset)chars.size(); |
94 | 812 | } |
95 | | |
96 | | static void fast_repeat(uint8_t* dst, const uint8_t* src, size_t src_size, |
97 | 49 | int32_t repeat_times) { |
98 | 49 | if (UNLIKELY(repeat_times <= 0)) { |
99 | 39 | return; |
100 | 39 | } |
101 | 10 | uint8_t* dst_begin = dst; |
102 | 10 | uint8_t* dst_curr = dst; |
103 | 10 | int32_t k = 0; |
104 | 10 | int32_t is_odd = repeat_times & 1; |
105 | 10 | repeat_times >>= 1; |
106 | | |
107 | 10 | memcpy(dst_curr, src, src_size); |
108 | 10 | dst_curr += src_size; |
109 | 29 | for (; repeat_times > 0; k += 1, is_odd = repeat_times & 1, repeat_times >>= 1) { |
110 | 19 | int64_t len = src_size * (1 << k); |
111 | 19 | memcpy(dst_curr, dst_begin, len); |
112 | 19 | dst_curr += len; |
113 | 19 | if (is_odd) { |
114 | 7 | memcpy(dst_curr, dst_begin, len); |
115 | 7 | dst_curr += len; |
116 | 7 | } |
117 | 19 | } |
118 | 10 | } |
119 | | }; |
120 | | |
121 | | struct SubstringUtil { |
122 | | static constexpr auto name = "substring"; |
123 | | |
124 | | static void substring_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
125 | 1.13k | size_t input_rows_count) { |
126 | 1.13k | DCHECK_EQ(arguments.size(), 3); |
127 | 1.13k | auto res = ColumnString::create(); |
128 | | |
129 | 1.13k | bool col_const[3]; |
130 | 1.13k | ColumnPtr argument_columns[3]; |
131 | 4.55k | for (int i = 0; i < 3; ++i) { |
132 | 3.41k | std::tie(argument_columns[i], col_const[i]) = |
133 | 3.41k | unpack_if_const(block.get_by_position(arguments[i]).column); |
134 | 3.41k | } |
135 | | |
136 | 1.13k | const auto* specific_str_column = |
137 | 1.13k | assert_cast<const ColumnString*>(argument_columns[0].get()); |
138 | 1.13k | const auto* specific_start_column = |
139 | 1.13k | assert_cast<const ColumnInt32*>(argument_columns[1].get()); |
140 | 1.13k | const auto* specific_len_column = |
141 | 1.13k | assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
142 | | |
143 | 1.13k | bool is_ascii = specific_str_column->is_ascii(); |
144 | | |
145 | 1.13k | std::visit( |
146 | 1.13k | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { |
147 | 1.13k | vectors<is_ascii, str_const, start_const, len_const>( |
148 | 1.13k | specific_str_column->get_chars(), specific_str_column->get_offsets(), |
149 | 1.13k | specific_start_column->get_data(), specific_len_column->get_data(), |
150 | 1.13k | res->get_chars(), res->get_offsets(), input_rows_count); |
151 | 1.13k | }, _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 31 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 31 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 31 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 31 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 31 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 31 | }, |
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_ _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 161 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 161 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 161 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 161 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 161 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 161 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 129 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 129 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 129 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 129 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 129 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 129 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 146 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 146 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 146 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 146 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 146 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 146 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 187 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 187 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 187 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 187 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 187 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 187 | }, |
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_ _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 21 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 21 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 21 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 21 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 21 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 21 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_ Line | Count | Source | 146 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 147 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 148 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 149 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 150 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 151 | 16 | }, |
|
152 | 1.13k | make_bool_variant(is_ascii), make_bool_variant(col_const[0]), |
153 | 1.13k | make_bool_variant(col_const[1]), make_bool_variant(col_const[2])); |
154 | 1.13k | block.get_by_position(result).column = std::move(res); |
155 | 1.13k | } |
156 | | |
157 | | private: |
158 | | template <bool is_ascii, bool str_const, bool start_const, bool len_const> |
159 | | static void vectors(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, |
160 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
161 | | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, |
162 | 1.13k | size_t size) { |
163 | 1.13k | res_offsets.resize(size); |
164 | | |
165 | 1.13k | if constexpr (start_const && len_const) { |
166 | 145 | if (start[0] == 0 || len[0] <= 0) { |
167 | 72 | for (size_t i = 0; i < size; ++i) { |
168 | 36 | StringOP::push_empty_string(i, res_chars, res_offsets); |
169 | 36 | } |
170 | 36 | return; |
171 | 36 | } |
172 | 145 | } |
173 | | |
174 | 470 | if constexpr (str_const) { |
175 | 470 | res_chars.reserve(size * chars.size()); |
176 | 669 | } else { |
177 | 669 | res_chars.reserve(chars.size()); |
178 | 669 | } |
179 | | |
180 | 1.13k | if constexpr (is_ascii) { |
181 | 1.00k | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, |
182 | 1.00k | res_offsets, size); |
183 | 1.00k | } else { |
184 | 132 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, |
185 | 132 | res_offsets, size); |
186 | 132 | } |
187 | 1.13k | } _ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 31 | size_t size) { | 163 | 31 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 31 | } else { | 177 | 31 | res_chars.reserve(chars.size()); | 178 | 31 | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 31 | } else { | 184 | 31 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 31 | res_offsets, size); | 186 | 31 | } | 187 | 31 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 128 | size_t size) { | 163 | 128 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | 128 | if constexpr (str_const) { | 175 | 128 | res_chars.reserve(size * chars.size()); | 176 | | } else { | 177 | | res_chars.reserve(chars.size()); | 178 | | } | 179 | | | 180 | 128 | if constexpr (is_ascii) { | 181 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 128 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 128 | size_t size) { | 163 | 128 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | 128 | if constexpr (str_const) { | 175 | 128 | res_chars.reserve(size * chars.size()); | 176 | | } else { | 177 | | res_chars.reserve(chars.size()); | 178 | | } | 179 | | | 180 | 128 | if constexpr (is_ascii) { | 181 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 128 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 161 | size_t size) { | 163 | 161 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | 161 | if constexpr (str_const) { | 175 | 161 | res_chars.reserve(size * chars.size()); | 176 | | } else { | 177 | | res_chars.reserve(chars.size()); | 178 | | } | 179 | | | 180 | 161 | if constexpr (is_ascii) { | 181 | 161 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 161 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 161 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 129 | size_t size) { | 163 | 129 | res_offsets.resize(size); | 164 | | | 165 | 129 | if constexpr (start_const && len_const) { | 166 | 129 | if (start[0] == 0 || len[0] <= 0) { | 167 | 64 | for (size_t i = 0; i < size; ++i) { | 168 | 32 | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | 32 | } | 170 | 32 | return; | 171 | 32 | } | 172 | 129 | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 129 | } else { | 177 | 129 | res_chars.reserve(chars.size()); | 178 | 129 | } | 179 | | | 180 | 129 | if constexpr (is_ascii) { | 181 | 129 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 129 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 129 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 146 | size_t size) { | 163 | 146 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 146 | } else { | 177 | 146 | res_chars.reserve(chars.size()); | 178 | 146 | } | 179 | | | 180 | 146 | if constexpr (is_ascii) { | 181 | 146 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 146 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 146 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 128 | size_t size) { | 163 | 128 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 128 | } else { | 177 | 128 | res_chars.reserve(chars.size()); | 178 | 128 | } | 179 | | | 180 | 128 | if constexpr (is_ascii) { | 181 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 128 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 187 | size_t size) { | 163 | 187 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 187 | } else { | 177 | 187 | res_chars.reserve(chars.size()); | 178 | 187 | } | 179 | | | 180 | 187 | if constexpr (is_ascii) { | 181 | 187 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | 187 | res_offsets, size); | 183 | | } else { | 184 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | | res_offsets, size); | 186 | | } | 187 | 187 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 16 | size_t size) { | 163 | 16 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | 16 | if constexpr (str_const) { | 175 | 16 | res_chars.reserve(size * chars.size()); | 176 | | } else { | 177 | | res_chars.reserve(chars.size()); | 178 | | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 16 | } else { | 184 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 16 | res_offsets, size); | 186 | 16 | } | 187 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 16 | size_t size) { | 163 | 16 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | 16 | if constexpr (str_const) { | 175 | 16 | res_chars.reserve(size * chars.size()); | 176 | | } else { | 177 | | res_chars.reserve(chars.size()); | 178 | | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 16 | } else { | 184 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 16 | res_offsets, size); | 186 | 16 | } | 187 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 21 | size_t size) { | 163 | 21 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | 21 | if constexpr (str_const) { | 175 | 21 | res_chars.reserve(size * chars.size()); | 176 | | } else { | 177 | | res_chars.reserve(chars.size()); | 178 | | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 21 | } else { | 184 | 21 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 21 | res_offsets, size); | 186 | 21 | } | 187 | 21 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 16 | size_t size) { | 163 | 16 | res_offsets.resize(size); | 164 | | | 165 | 16 | if constexpr (start_const && len_const) { | 166 | 16 | if (start[0] == 0 || len[0] <= 0) { | 167 | 8 | for (size_t i = 0; i < size; ++i) { | 168 | 4 | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | 4 | } | 170 | 4 | return; | 171 | 4 | } | 172 | 16 | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 16 | } else { | 177 | 16 | res_chars.reserve(chars.size()); | 178 | 16 | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 16 | } else { | 184 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 16 | res_offsets, size); | 186 | 16 | } | 187 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 16 | size_t size) { | 163 | 16 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 16 | } else { | 177 | 16 | res_chars.reserve(chars.size()); | 178 | 16 | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 16 | } else { | 184 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 16 | res_offsets, size); | 186 | 16 | } | 187 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 162 | 16 | size_t size) { | 163 | 16 | res_offsets.resize(size); | 164 | | | 165 | | if constexpr (start_const && len_const) { | 166 | | if (start[0] == 0 || len[0] <= 0) { | 167 | | for (size_t i = 0; i < size; ++i) { | 168 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 169 | | } | 170 | | return; | 171 | | } | 172 | | } | 173 | | | 174 | | if constexpr (str_const) { | 175 | | res_chars.reserve(size * chars.size()); | 176 | 16 | } else { | 177 | 16 | res_chars.reserve(chars.size()); | 178 | 16 | } | 179 | | | 180 | | if constexpr (is_ascii) { | 181 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 182 | | res_offsets, size); | 183 | 16 | } else { | 184 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 185 | 16 | res_offsets, size); | 186 | 16 | } | 187 | 16 | } |
|
188 | | |
189 | | template <bool str_const, bool start_const, bool len_const> |
190 | | NO_SANITIZE_UNDEFINED static void vectors_utf8( |
191 | | const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, |
192 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
193 | 128 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { |
194 | 128 | std::array<std::byte, 128 * 1024> buf; |
195 | 128 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; |
196 | 128 | PMR::vector<size_t> index {&pool}; |
197 | | |
198 | 530 | for (size_t i = 0; i < size; ++i) { |
199 | 402 | int str_size = offsets[index_check_const<str_const>(i)] - |
200 | 402 | offsets[index_check_const<str_const>(i) - 1]; |
201 | 402 | const char* str_data = |
202 | 402 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; |
203 | 402 | int start_value = start[index_check_const<start_const>(i)]; |
204 | 402 | int len_value = len[index_check_const<len_const>(i)]; |
205 | | // Unsigned numbers cannot be used here because start_value can be negative. |
206 | 402 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); |
207 | | // return empty string if start > src.length |
208 | | // Here, start_value is compared against the length of the character. |
209 | 402 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { |
210 | 237 | StringOP::push_empty_string(i, res_chars, res_offsets); |
211 | 237 | continue; |
212 | 237 | } |
213 | | |
214 | 165 | size_t byte_pos = 0; |
215 | 165 | index.clear(); |
216 | 1.28k | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { |
217 | 1.16k | char_size = get_utf8_byte_length(str_data[j]); |
218 | 1.16k | index.push_back(j); |
219 | | // index_size represents the number of characters from the beginning of the character to the current position. |
220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. |
221 | 1.16k | if (start_value > 0 && index.size() > start_value + len_value) { |
222 | 48 | break; |
223 | 48 | } |
224 | 1.16k | } |
225 | | |
226 | 165 | int64_t fixed_pos = start_value; |
227 | 165 | if (fixed_pos < -(int)index.size()) { |
228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
229 | 0 | continue; |
230 | 0 | } |
231 | 165 | if (fixed_pos < 0) { |
232 | 60 | fixed_pos = index.size() + fixed_pos + 1; |
233 | 60 | } |
234 | | |
235 | 165 | byte_pos = index[fixed_pos - 1]; |
236 | 165 | size_t fixed_len = str_size - byte_pos; |
237 | 165 | if (fixed_pos + len_value <= index.size()) { |
238 | 49 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; |
239 | 49 | } |
240 | | |
241 | 165 | if (byte_pos <= str_size && fixed_len > 0) { |
242 | 165 | StringOP::push_value_string_reserved_and_allow_overflow( |
243 | 165 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); |
244 | 165 | } else { |
245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
246 | 0 | } |
247 | 165 | } |
248 | 128 | } _ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 31 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 31 | std::array<std::byte, 128 * 1024> buf; | 195 | 31 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 31 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 336 | for (size_t i = 0; i < size; ++i) { | 199 | 305 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 305 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 305 | const char* str_data = | 202 | 305 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 305 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 305 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 305 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 305 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 198 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 198 | continue; | 212 | 198 | } | 213 | | | 214 | 107 | size_t byte_pos = 0; | 215 | 107 | index.clear(); | 216 | 879 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 802 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 802 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 802 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 30 | break; | 223 | 30 | } | 224 | 802 | } | 225 | | | 226 | 107 | int64_t fixed_pos = start_value; | 227 | 107 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 107 | if (fixed_pos < 0) { | 232 | 40 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 40 | } | 234 | | | 235 | 107 | byte_pos = index[fixed_pos - 1]; | 236 | 107 | size_t fixed_len = str_size - byte_pos; | 237 | 107 | if (fixed_pos + len_value <= index.size()) { | 238 | 31 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 31 | } | 240 | | | 241 | 107 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 107 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 107 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 107 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 107 | } | 248 | 31 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 16 | std::array<std::byte, 128 * 1024> buf; | 195 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 16 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 32 | for (size_t i = 0; i < size; ++i) { | 199 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 16 | const char* str_data = | 202 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 7 | continue; | 212 | 7 | } | 213 | | | 214 | 9 | size_t byte_pos = 0; | 215 | 9 | index.clear(); | 216 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 59 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 3 | break; | 223 | 3 | } | 224 | 59 | } | 225 | | | 226 | 9 | int64_t fixed_pos = start_value; | 227 | 9 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 9 | if (fixed_pos < 0) { | 232 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 3 | } | 234 | | | 235 | 9 | byte_pos = index[fixed_pos - 1]; | 236 | 9 | size_t fixed_len = str_size - byte_pos; | 237 | 9 | if (fixed_pos + len_value <= index.size()) { | 238 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 3 | } | 240 | | | 241 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 9 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 9 | } | 248 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 16 | std::array<std::byte, 128 * 1024> buf; | 195 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 16 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 32 | for (size_t i = 0; i < size; ++i) { | 199 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 16 | const char* str_data = | 202 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 7 | continue; | 212 | 7 | } | 213 | | | 214 | 9 | size_t byte_pos = 0; | 215 | 9 | index.clear(); | 216 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 59 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 3 | break; | 223 | 3 | } | 224 | 59 | } | 225 | | | 226 | 9 | int64_t fixed_pos = start_value; | 227 | 9 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 9 | if (fixed_pos < 0) { | 232 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 3 | } | 234 | | | 235 | 9 | byte_pos = index[fixed_pos - 1]; | 236 | 9 | size_t fixed_len = str_size - byte_pos; | 237 | 9 | if (fixed_pos + len_value <= index.size()) { | 238 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 3 | } | 240 | | | 241 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 9 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 9 | } | 248 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 21 | std::array<std::byte, 128 * 1024> buf; | 195 | 21 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 21 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 42 | for (size_t i = 0; i < size; ++i) { | 199 | 21 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 21 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 21 | const char* str_data = | 202 | 21 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 21 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 21 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 21 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 21 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 8 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 8 | continue; | 212 | 8 | } | 213 | | | 214 | 13 | size_t byte_pos = 0; | 215 | 13 | index.clear(); | 216 | 82 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 72 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 72 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 72 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 3 | break; | 223 | 3 | } | 224 | 72 | } | 225 | | | 226 | 13 | int64_t fixed_pos = start_value; | 227 | 13 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 13 | if (fixed_pos < 0) { | 232 | 5 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 5 | } | 234 | | | 235 | 13 | byte_pos = index[fixed_pos - 1]; | 236 | 13 | size_t fixed_len = str_size - byte_pos; | 237 | 13 | if (fixed_pos + len_value <= index.size()) { | 238 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 3 | } | 240 | | | 241 | 13 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 13 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 13 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 13 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 13 | } | 248 | 21 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 12 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 12 | std::array<std::byte, 128 * 1024> buf; | 195 | 12 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 12 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 24 | for (size_t i = 0; i < size; ++i) { | 199 | 12 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 12 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 12 | const char* str_data = | 202 | 12 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 12 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 12 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 12 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 12 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 3 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 3 | continue; | 212 | 3 | } | 213 | | | 214 | 9 | size_t byte_pos = 0; | 215 | 9 | index.clear(); | 216 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 59 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 3 | break; | 223 | 3 | } | 224 | 59 | } | 225 | | | 226 | 9 | int64_t fixed_pos = start_value; | 227 | 9 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 9 | if (fixed_pos < 0) { | 232 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 3 | } | 234 | | | 235 | 9 | byte_pos = index[fixed_pos - 1]; | 236 | 9 | size_t fixed_len = str_size - byte_pos; | 237 | 9 | if (fixed_pos + len_value <= index.size()) { | 238 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 3 | } | 240 | | | 241 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 9 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 9 | } | 248 | 12 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 16 | std::array<std::byte, 128 * 1024> buf; | 195 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 16 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 32 | for (size_t i = 0; i < size; ++i) { | 199 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 16 | const char* str_data = | 202 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 7 | continue; | 212 | 7 | } | 213 | | | 214 | 9 | size_t byte_pos = 0; | 215 | 9 | index.clear(); | 216 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 59 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 3 | break; | 223 | 3 | } | 224 | 59 | } | 225 | | | 226 | 9 | int64_t fixed_pos = start_value; | 227 | 9 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 9 | if (fixed_pos < 0) { | 232 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 3 | } | 234 | | | 235 | 9 | byte_pos = index[fixed_pos - 1]; | 236 | 9 | size_t fixed_len = str_size - byte_pos; | 237 | 9 | if (fixed_pos + len_value <= index.size()) { | 238 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 3 | } | 240 | | | 241 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 9 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 9 | } | 248 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 193 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 194 | 16 | std::array<std::byte, 128 * 1024> buf; | 195 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 196 | 16 | PMR::vector<size_t> index {&pool}; | 197 | | | 198 | 32 | for (size_t i = 0; i < size; ++i) { | 199 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 200 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 201 | 16 | const char* str_data = | 202 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 203 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 204 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 205 | | // Unsigned numbers cannot be used here because start_value can be negative. | 206 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 207 | | // return empty string if start > src.length | 208 | | // Here, start_value is compared against the length of the character. | 209 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 210 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 211 | 7 | continue; | 212 | 7 | } | 213 | | | 214 | 9 | size_t byte_pos = 0; | 215 | 9 | index.clear(); | 216 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 217 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 218 | 59 | index.push_back(j); | 219 | | // index_size represents the number of characters from the beginning of the character to the current position. | 220 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 221 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 222 | 3 | break; | 223 | 3 | } | 224 | 59 | } | 225 | | | 226 | 9 | int64_t fixed_pos = start_value; | 227 | 9 | if (fixed_pos < -(int)index.size()) { | 228 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 229 | 0 | continue; | 230 | 0 | } | 231 | 9 | if (fixed_pos < 0) { | 232 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 233 | 3 | } | 234 | | | 235 | 9 | byte_pos = index[fixed_pos - 1]; | 236 | 9 | size_t fixed_len = str_size - byte_pos; | 237 | 9 | if (fixed_pos + len_value <= index.size()) { | 238 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 239 | 3 | } | 240 | | | 241 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 242 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 243 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 244 | 9 | } else { | 245 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 246 | 0 | } | 247 | 9 | } | 248 | 16 | } |
|
249 | | |
250 | | template <bool str_const, bool start_const, bool len_const> |
251 | | static void vectors_ascii(const ColumnString::Chars& chars, |
252 | | const ColumnString::Offsets& offsets, |
253 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
254 | | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, |
255 | 975 | size_t size) { |
256 | 1.97k | for (size_t i = 0; i < size; ++i) { |
257 | 1.00k | int str_size = offsets[index_check_const<str_const>(i)] - |
258 | 1.00k | offsets[index_check_const<str_const>(i) - 1]; |
259 | 1.00k | const char* str_data = |
260 | 1.00k | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; |
261 | 1.00k | int start_value = start[index_check_const<start_const>(i)]; |
262 | 1.00k | int len_value = len[index_check_const<len_const>(i)]; |
263 | | |
264 | 1.00k | if (start_value > str_size || start_value < -str_size || str_size == 0 || |
265 | 1.00k | len_value <= 0) { |
266 | 496 | StringOP::push_empty_string(i, res_chars, res_offsets); |
267 | 496 | continue; |
268 | 496 | } |
269 | 506 | int fixed_pos = start_value - 1; |
270 | 506 | if (fixed_pos < 0) { |
271 | 184 | fixed_pos = str_size + fixed_pos + 1; |
272 | 184 | } |
273 | 506 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); |
274 | 506 | StringOP::push_value_string_reserved_and_allow_overflow( |
275 | 506 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); |
276 | 506 | } |
277 | 975 | } Unexecuted instantiation: _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 128 | size_t size) { | 256 | 256 | for (size_t i = 0; i < size; ++i) { | 257 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 128 | const char* str_data = | 260 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 128 | len_value <= 0) { | 266 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 65 | continue; | 268 | 65 | } | 269 | 63 | int fixed_pos = start_value - 1; | 270 | 63 | if (fixed_pos < 0) { | 271 | 21 | fixed_pos = str_size + fixed_pos + 1; | 272 | 21 | } | 273 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 63 | } | 277 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 128 | size_t size) { | 256 | 256 | for (size_t i = 0; i < size; ++i) { | 257 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 128 | const char* str_data = | 260 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 128 | len_value <= 0) { | 266 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 65 | continue; | 268 | 65 | } | 269 | 63 | int fixed_pos = start_value - 1; | 270 | 63 | if (fixed_pos < 0) { | 271 | 21 | fixed_pos = str_size + fixed_pos + 1; | 272 | 21 | } | 273 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 63 | } | 277 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 161 | size_t size) { | 256 | 322 | for (size_t i = 0; i < size; ++i) { | 257 | 161 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 161 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 161 | const char* str_data = | 260 | 161 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 161 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 161 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 161 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 161 | len_value <= 0) { | 266 | 81 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 81 | continue; | 268 | 81 | } | 269 | 80 | int fixed_pos = start_value - 1; | 270 | 80 | if (fixed_pos < 0) { | 271 | 31 | fixed_pos = str_size + fixed_pos + 1; | 272 | 31 | } | 273 | 80 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 80 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 80 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 80 | } | 277 | 161 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 97 | size_t size) { | 256 | 194 | for (size_t i = 0; i < size; ++i) { | 257 | 97 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 97 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 97 | const char* str_data = | 260 | 97 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 97 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 97 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 97 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 97 | len_value <= 0) { | 266 | 33 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 33 | continue; | 268 | 33 | } | 269 | 64 | int fixed_pos = start_value - 1; | 270 | 64 | if (fixed_pos < 0) { | 271 | 21 | fixed_pos = str_size + fixed_pos + 1; | 272 | 21 | } | 273 | 64 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 64 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 64 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 64 | } | 277 | 97 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 146 | size_t size) { | 256 | 292 | for (size_t i = 0; i < size; ++i) { | 257 | 146 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 146 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 146 | const char* str_data = | 260 | 146 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 146 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 146 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 146 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 146 | len_value <= 0) { | 266 | 77 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 77 | continue; | 268 | 77 | } | 269 | 69 | int fixed_pos = start_value - 1; | 270 | 69 | if (fixed_pos < 0) { | 271 | 24 | fixed_pos = str_size + fixed_pos + 1; | 272 | 24 | } | 273 | 69 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 69 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 69 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 69 | } | 277 | 146 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 128 | size_t size) { | 256 | 256 | for (size_t i = 0; i < size; ++i) { | 257 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 128 | const char* str_data = | 260 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 128 | len_value <= 0) { | 266 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 65 | continue; | 268 | 65 | } | 269 | 63 | int fixed_pos = start_value - 1; | 270 | 63 | if (fixed_pos < 0) { | 271 | 21 | fixed_pos = str_size + fixed_pos + 1; | 272 | 21 | } | 273 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 63 | } | 277 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 255 | 187 | size_t size) { | 256 | 401 | for (size_t i = 0; i < size; ++i) { | 257 | 214 | int str_size = offsets[index_check_const<str_const>(i)] - | 258 | 214 | offsets[index_check_const<str_const>(i) - 1]; | 259 | 214 | const char* str_data = | 260 | 214 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 261 | 214 | int start_value = start[index_check_const<start_const>(i)]; | 262 | 214 | int len_value = len[index_check_const<len_const>(i)]; | 263 | | | 264 | 214 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 265 | 214 | len_value <= 0) { | 266 | 110 | StringOP::push_empty_string(i, res_chars, res_offsets); | 267 | 110 | continue; | 268 | 110 | } | 269 | 104 | int fixed_pos = start_value - 1; | 270 | 104 | if (fixed_pos < 0) { | 271 | 45 | fixed_pos = str_size + fixed_pos + 1; | 272 | 45 | } | 273 | 104 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 274 | 104 | StringOP::push_value_string_reserved_and_allow_overflow( | 275 | 104 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 276 | 104 | } | 277 | 187 | } |
|
278 | | }; |
279 | | |
280 | | } // namespace doris |