be/src/exec/common/stringop_substring.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <sys/types.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <array> |
24 | | #include <boost/iterator/iterator_facade.hpp> |
25 | | #include <boost/locale.hpp> |
26 | | #include <climits> |
27 | | #include <cmath> |
28 | | #include <cstddef> |
29 | | #include <cstdlib> |
30 | | #include <cstring> |
31 | | #include <utility> |
32 | | #include <vector> |
33 | | |
34 | | #include "common/compiler_util.h" // IWYU pragma: keep |
35 | | #include "core/block/block.h" |
36 | | #include "core/block/column_numbers.h" |
37 | | #include "core/block/column_with_type_and_name.h" |
38 | | #include "core/column/column.h" |
39 | | #include "core/column/column_const.h" |
40 | | #include "core/column/column_vector.h" |
41 | | #include "core/data_type/data_type.h" |
42 | | #include "core/pod_array_fwd.h" |
43 | | #include "core/types.h" |
44 | | |
45 | | #ifndef USE_LIBCPP |
46 | | #include <memory_resource> |
47 | 864 | #define PMR std::pmr |
48 | | #else |
49 | | #include <boost/container/pmr/monotonic_buffer_resource.hpp> |
50 | | #include <boost/container/pmr/vector.hpp> |
51 | | #define PMR boost::container::pmr |
52 | | #endif |
53 | | |
54 | | #include <fmt/format.h> |
55 | | |
56 | | #include <cstdint> |
57 | | #include <string_view> |
58 | | |
59 | | #include "core/assert_cast.h" |
60 | | #include "core/column/column_decimal.h" |
61 | | #include "core/column/column_nullable.h" |
62 | | #include "core/column/column_string.h" |
63 | | #include "core/string_ref.h" |
64 | | #include "util/simd/vstring_function.h" |
65 | | |
66 | | namespace doris { |
67 | | #include "common/compile_check_begin.h" |
68 | | struct StringOP { |
69 | | static void push_empty_string(size_t index, ColumnString::Chars& chars, |
70 | 543k | ColumnString::Offsets& offsets) { |
71 | 543k | offsets[index] = (ColumnString::Offset)chars.size(); |
72 | 543k | } |
73 | | |
74 | | static void push_null_string(size_t index, ColumnString::Chars& chars, |
75 | 23.8k | ColumnString::Offsets& offsets, NullMap& null_map) { |
76 | 23.8k | null_map[index] = 1; |
77 | 23.8k | push_empty_string(index, chars, offsets); |
78 | 23.8k | } |
79 | | |
80 | | static void push_value_string(const std::string_view& string_value, size_t index, |
81 | 125k | ColumnString::Chars& chars, ColumnString::Offsets& offsets) { |
82 | 125k | ColumnString::check_chars_length(chars.size() + string_value.size(), offsets.size()); |
83 | | |
84 | 125k | chars.insert(string_value.data(), string_value.data() + string_value.size()); |
85 | 125k | offsets[index] = (ColumnString::Offset)chars.size(); |
86 | 125k | } |
87 | | |
88 | | static void push_value_string_reserved_and_allow_overflow(const std::string_view& string_value, |
89 | | size_t index, |
90 | | ColumnString::Chars& chars, |
91 | 2.88M | ColumnString::Offsets& offsets) { |
92 | 2.88M | chars.insert_assume_reserved_and_allow_overflow(string_value.data(), |
93 | 2.88M | string_value.data() + string_value.size()); |
94 | 2.88M | offsets[index] = (ColumnString::Offset)chars.size(); |
95 | 2.88M | } |
96 | | |
97 | | static void fast_repeat(uint8_t* dst, const uint8_t* src, size_t src_size, |
98 | 2.21k | int32_t repeat_times) { |
99 | 2.21k | if (UNLIKELY(repeat_times <= 0)) { |
100 | 73 | return; |
101 | 73 | } |
102 | 2.14k | uint8_t* dst_begin = dst; |
103 | 2.14k | uint8_t* dst_curr = dst; |
104 | 2.14k | int32_t k = 0; |
105 | 2.14k | int32_t is_odd = repeat_times & 1; |
106 | 2.14k | repeat_times >>= 1; |
107 | | |
108 | 2.14k | memcpy(dst_curr, src, src_size); |
109 | 2.14k | dst_curr += src_size; |
110 | 7.35k | for (; repeat_times > 0; k += 1, is_odd = repeat_times & 1, repeat_times >>= 1) { |
111 | 5.21k | int64_t len = src_size * (1 << k); |
112 | 5.21k | memcpy(dst_curr, dst_begin, len); |
113 | 5.21k | dst_curr += len; |
114 | 5.21k | if (is_odd) { |
115 | 3.10k | memcpy(dst_curr, dst_begin, len); |
116 | 3.10k | dst_curr += len; |
117 | 3.10k | } |
118 | 5.21k | } |
119 | 2.14k | } |
120 | | }; |
121 | | |
122 | | struct SubstringUtil { |
123 | | static constexpr auto name = "substring"; |
124 | | |
125 | | static void substring_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
126 | 28.4k | size_t input_rows_count) { |
127 | 28.4k | DCHECK_EQ(arguments.size(), 3); |
128 | 28.4k | auto res = ColumnString::create(); |
129 | | |
130 | 28.4k | bool col_const[3]; |
131 | 28.4k | ColumnPtr argument_columns[3]; |
132 | 113k | for (int i = 0; i < 3; ++i) { |
133 | 85.3k | std::tie(argument_columns[i], col_const[i]) = |
134 | 85.3k | unpack_if_const(block.get_by_position(arguments[i]).column); |
135 | 85.3k | } |
136 | | |
137 | 28.4k | const auto* specific_str_column = |
138 | 28.4k | assert_cast<const ColumnString*>(argument_columns[0].get()); |
139 | 28.4k | const auto* specific_start_column = |
140 | 28.4k | assert_cast<const ColumnInt32*>(argument_columns[1].get()); |
141 | 28.4k | const auto* specific_len_column = |
142 | 28.4k | assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
143 | | |
144 | 28.4k | bool is_ascii = specific_str_column->is_ascii(); |
145 | | |
146 | 28.4k | std::visit( |
147 | 28.4k | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { |
148 | 28.4k | vectors<is_ascii, str_const, start_const, len_const>( |
149 | 28.4k | specific_str_column->get_chars(), specific_str_column->get_offsets(), |
150 | 28.4k | specific_start_column->get_data(), specific_len_column->get_data(), |
151 | 28.4k | res->get_chars(), res->get_offsets(), input_rows_count); |
152 | 28.4k | }, _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 43 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 43 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 43 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 43 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 43 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 43 | }, |
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_ _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 161 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 161 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 161 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 161 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 161 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 161 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 26.8k | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 26.8k | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 26.8k | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 26.8k | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 26.8k | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 26.8k | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 146 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 146 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 146 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 146 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 146 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 146 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 160 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 160 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 160 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 160 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 160 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 160 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 415 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 415 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 415 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 415 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 415 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 415 | }, |
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_ _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 21 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 21 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 21 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 21 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 21 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 21 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 308 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 308 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 308 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 308 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 308 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 308 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
|
153 | 28.4k | make_bool_variant(is_ascii), make_bool_variant(col_const[0]), |
154 | 28.4k | make_bool_variant(col_const[1]), make_bool_variant(col_const[2])); |
155 | 28.4k | block.get_by_position(result).column = std::move(res); |
156 | 28.4k | } |
157 | | |
158 | | private: |
159 | | template <bool is_ascii, bool str_const, bool start_const, bool len_const> |
160 | | static void vectors(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, |
161 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
162 | | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, |
163 | 28.4k | size_t size) { |
164 | 28.4k | res_offsets.resize(size); |
165 | | |
166 | 28.4k | if constexpr (start_const && len_const) { |
167 | 27.1k | if (start[0] == 0 || len[0] <= 0) { |
168 | 74 | for (size_t i = 0; i < size; ++i) { |
169 | 37 | StringOP::push_empty_string(i, res_chars, res_offsets); |
170 | 37 | } |
171 | 37 | return; |
172 | 37 | } |
173 | 27.1k | } |
174 | | |
175 | 27.1k | if constexpr (str_const) { |
176 | 470 | res_chars.reserve(size * chars.size()); |
177 | 27.9k | } else { |
178 | 27.9k | res_chars.reserve(chars.size()); |
179 | 27.9k | } |
180 | | |
181 | 28.4k | if constexpr (is_ascii) { |
182 | 28.0k | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, |
183 | 28.0k | res_offsets, size); |
184 | 28.0k | } else { |
185 | 436 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, |
186 | 436 | res_offsets, size); |
187 | 436 | } |
188 | 28.4k | } _ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 43 | size_t size) { | 164 | 43 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 43 | } else { | 178 | 43 | res_chars.reserve(chars.size()); | 179 | 43 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 43 | } else { | 185 | 43 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 43 | res_offsets, size); | 187 | 43 | } | 188 | 43 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 128 | size_t size) { | 164 | 128 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 128 | if constexpr (str_const) { | 176 | 128 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | 128 | if constexpr (is_ascii) { | 182 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 128 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 128 | size_t size) { | 164 | 128 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 128 | if constexpr (str_const) { | 176 | 128 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | 128 | if constexpr (is_ascii) { | 182 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 128 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 161 | size_t size) { | 164 | 161 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 161 | if constexpr (str_const) { | 176 | 161 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | 161 | if constexpr (is_ascii) { | 182 | 161 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 161 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 161 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 26.8k | size_t size) { | 164 | 26.8k | res_offsets.resize(size); | 165 | | | 166 | 26.8k | if constexpr (start_const && len_const) { | 167 | 26.8k | if (start[0] == 0 || len[0] <= 0) { | 168 | 66 | for (size_t i = 0; i < size; ++i) { | 169 | 33 | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | 33 | } | 171 | 33 | return; | 172 | 33 | } | 173 | 26.8k | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 26.8k | } else { | 178 | 26.8k | res_chars.reserve(chars.size()); | 179 | 26.8k | } | 180 | | | 181 | 26.8k | if constexpr (is_ascii) { | 182 | 26.8k | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 26.8k | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 26.8k | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 146 | size_t size) { | 164 | 146 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 146 | } else { | 178 | 146 | res_chars.reserve(chars.size()); | 179 | 146 | } | 180 | | | 181 | 146 | if constexpr (is_ascii) { | 182 | 146 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 146 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 146 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 160 | size_t size) { | 164 | 160 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 160 | } else { | 178 | 160 | res_chars.reserve(chars.size()); | 179 | 160 | } | 180 | | | 181 | 160 | if constexpr (is_ascii) { | 182 | 160 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 160 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 160 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 415 | size_t size) { | 164 | 415 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 415 | } else { | 178 | 415 | res_chars.reserve(chars.size()); | 179 | 415 | } | 180 | | | 181 | 415 | if constexpr (is_ascii) { | 182 | 415 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 415 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 415 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 16 | if constexpr (str_const) { | 176 | 16 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 16 | if constexpr (str_const) { | 176 | 16 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 21 | size_t size) { | 164 | 21 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 21 | if constexpr (str_const) { | 176 | 21 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 21 | } else { | 185 | 21 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 21 | res_offsets, size); | 187 | 21 | } | 188 | 21 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 308 | size_t size) { | 164 | 308 | res_offsets.resize(size); | 165 | | | 166 | 308 | if constexpr (start_const && len_const) { | 167 | 308 | if (start[0] == 0 || len[0] <= 0) { | 168 | 8 | for (size_t i = 0; i < size; ++i) { | 169 | 4 | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | 4 | } | 171 | 4 | return; | 172 | 4 | } | 173 | 308 | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 308 | } else { | 178 | 308 | res_chars.reserve(chars.size()); | 179 | 308 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 308 | } else { | 185 | 308 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 308 | res_offsets, size); | 187 | 308 | } | 188 | 308 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 16 | } else { | 178 | 16 | res_chars.reserve(chars.size()); | 179 | 16 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 16 | } else { | 178 | 16 | res_chars.reserve(chars.size()); | 179 | 16 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
|
189 | | |
190 | | template <bool str_const, bool start_const, bool len_const> |
191 | | NO_SANITIZE_UNDEFINED static void vectors_utf8( |
192 | | const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, |
193 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
194 | 432 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { |
195 | 432 | std::array<std::byte, 128 * 1024> buf; |
196 | 432 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; |
197 | 432 | PMR::vector<size_t> index {&pool}; |
198 | | |
199 | 1.17k | for (size_t i = 0; i < size; ++i) { |
200 | 739 | int str_size = offsets[index_check_const<str_const>(i)] - |
201 | 739 | offsets[index_check_const<str_const>(i) - 1]; |
202 | 739 | const char* str_data = |
203 | 739 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; |
204 | 739 | int start_value = start[index_check_const<start_const>(i)]; |
205 | 739 | int len_value = len[index_check_const<len_const>(i)]; |
206 | | // Unsigned numbers cannot be used here because start_value can be negative. |
207 | 739 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); |
208 | | // return empty string if start > src.length |
209 | | // Here, start_value is compared against the length of the character. |
210 | 739 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { |
211 | 242 | StringOP::push_empty_string(i, res_chars, res_offsets); |
212 | 242 | continue; |
213 | 242 | } |
214 | | |
215 | 497 | size_t byte_pos = 0; |
216 | 497 | index.clear(); |
217 | 3.90k | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { |
218 | 3.46k | char_size = get_utf8_byte_length(str_data[j]); |
219 | 3.46k | index.push_back(j); |
220 | | // index_size represents the number of characters from the beginning of the character to the current position. |
221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. |
222 | 3.46k | if (start_value > 0 && index.size() > start_value + len_value) { |
223 | 52 | break; |
224 | 52 | } |
225 | 3.46k | } |
226 | | |
227 | 497 | int64_t fixed_pos = start_value; |
228 | 497 | if (fixed_pos < -(int)index.size()) { |
229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
230 | 0 | continue; |
231 | 0 | } |
232 | 497 | if (fixed_pos < 0) { |
233 | 69 | fixed_pos = index.size() + fixed_pos + 1; |
234 | 69 | } |
235 | | |
236 | 497 | byte_pos = index[fixed_pos - 1]; |
237 | 497 | size_t fixed_len = str_size - byte_pos; |
238 | 497 | if (fixed_pos + len_value <= index.size()) { |
239 | 62 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; |
240 | 62 | } |
241 | | |
242 | 497 | if (byte_pos <= str_size && fixed_len > 0) { |
243 | 497 | StringOP::push_value_string_reserved_and_allow_overflow( |
244 | 497 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); |
245 | 497 | } else { |
246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
247 | 0 | } |
248 | 497 | } |
249 | 432 | } _ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 43 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 43 | std::array<std::byte, 128 * 1024> buf; | 196 | 43 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 43 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 360 | for (size_t i = 0; i < size; ++i) { | 200 | 317 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 317 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 317 | const char* str_data = | 203 | 317 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 317 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 317 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 317 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 317 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 199 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 199 | continue; | 213 | 199 | } | 214 | | | 215 | 118 | size_t byte_pos = 0; | 216 | 118 | index.clear(); | 217 | 948 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 860 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 860 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 860 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 30 | break; | 224 | 30 | } | 225 | 860 | } | 226 | | | 227 | 118 | int64_t fixed_pos = start_value; | 228 | 118 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 118 | if (fixed_pos < 0) { | 233 | 49 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 49 | } | 235 | | | 236 | 118 | byte_pos = index[fixed_pos - 1]; | 237 | 118 | size_t fixed_len = str_size - byte_pos; | 238 | 118 | if (fixed_pos + len_value <= index.size()) { | 239 | 32 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 32 | } | 241 | | | 242 | 118 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 118 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 118 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 118 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 118 | } | 249 | 43 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 21 | std::array<std::byte, 128 * 1024> buf; | 196 | 21 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 21 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 42 | for (size_t i = 0; i < size; ++i) { | 200 | 21 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 21 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 21 | const char* str_data = | 203 | 21 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 21 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 21 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 21 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 21 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 8 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 8 | continue; | 213 | 8 | } | 214 | | | 215 | 13 | size_t byte_pos = 0; | 216 | 13 | index.clear(); | 217 | 82 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 72 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 72 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 72 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 72 | } | 226 | | | 227 | 13 | int64_t fixed_pos = start_value; | 228 | 13 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 13 | if (fixed_pos < 0) { | 233 | 5 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 5 | } | 235 | | | 236 | 13 | byte_pos = index[fixed_pos - 1]; | 237 | 13 | size_t fixed_len = str_size - byte_pos; | 238 | 13 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 13 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 13 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 13 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 13 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 13 | } | 249 | 21 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 304 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 304 | std::array<std::byte, 128 * 1024> buf; | 196 | 304 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 304 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 641 | for (size_t i = 0; i < size; ++i) { | 200 | 337 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 337 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 337 | const char* str_data = | 203 | 337 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 337 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 337 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 337 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 337 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 330 | size_t byte_pos = 0; | 216 | 330 | index.clear(); | 217 | 2.61k | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 2.29k | char_size = get_utf8_byte_length(str_data[j]); | 219 | 2.29k | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 2.29k | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 7 | break; | 224 | 7 | } | 225 | 2.29k | } | 226 | | | 227 | 330 | int64_t fixed_pos = start_value; | 228 | 330 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 330 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 330 | byte_pos = index[fixed_pos - 1]; | 237 | 330 | size_t fixed_len = str_size - byte_pos; | 238 | 330 | if (fixed_pos + len_value <= index.size()) { | 239 | 15 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 15 | } | 241 | | | 242 | 330 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 330 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 330 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 330 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 330 | } | 249 | 304 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
|
250 | | |
251 | | template <bool str_const, bool start_const, bool len_const> |
252 | | static void vectors_ascii(const ColumnString::Chars& chars, |
253 | | const ColumnString::Offsets& offsets, |
254 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
255 | | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, |
256 | 27.9k | size_t size) { |
257 | 3.43M | for (size_t i = 0; i < size; ++i) { |
258 | 3.40M | int str_size = offsets[index_check_const<str_const>(i)] - |
259 | 3.40M | offsets[index_check_const<str_const>(i) - 1]; |
260 | 3.40M | const char* str_data = |
261 | 3.40M | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; |
262 | 3.40M | int start_value = start[index_check_const<start_const>(i)]; |
263 | 3.40M | int len_value = len[index_check_const<len_const>(i)]; |
264 | | |
265 | 3.40M | if (start_value > str_size || start_value < -str_size || str_size == 0 || |
266 | 3.40M | len_value <= 0) { |
267 | 518k | StringOP::push_empty_string(i, res_chars, res_offsets); |
268 | 518k | continue; |
269 | 518k | } |
270 | 2.88M | int fixed_pos = start_value - 1; |
271 | 2.88M | if (fixed_pos < 0) { |
272 | 372 | fixed_pos = str_size + fixed_pos + 1; |
273 | 372 | } |
274 | 2.88M | size_t fixed_len = std::min(str_size - fixed_pos, len_value); |
275 | 2.88M | StringOP::push_value_string_reserved_and_allow_overflow( |
276 | 2.88M | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); |
277 | 2.88M | } |
278 | 27.9k | } Unexecuted instantiation: _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 128 | size_t size) { | 257 | 256 | for (size_t i = 0; i < size; ++i) { | 258 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 128 | const char* str_data = | 261 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 128 | len_value <= 0) { | 267 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 65 | continue; | 269 | 65 | } | 270 | 63 | int fixed_pos = start_value - 1; | 271 | 63 | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 63 | } | 278 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 128 | size_t size) { | 257 | 256 | for (size_t i = 0; i < size; ++i) { | 258 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 128 | const char* str_data = | 261 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 128 | len_value <= 0) { | 267 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 65 | continue; | 269 | 65 | } | 270 | 63 | int fixed_pos = start_value - 1; | 271 | 63 | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 63 | } | 278 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 161 | size_t size) { | 257 | 322 | for (size_t i = 0; i < size; ++i) { | 258 | 161 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 161 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 161 | const char* str_data = | 261 | 161 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 161 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 161 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 161 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 161 | len_value <= 0) { | 267 | 81 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 81 | continue; | 269 | 81 | } | 270 | 80 | int fixed_pos = start_value - 1; | 271 | 80 | if (fixed_pos < 0) { | 272 | 31 | fixed_pos = str_size + fixed_pos + 1; | 273 | 31 | } | 274 | 80 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 80 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 80 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 80 | } | 278 | 161 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 26.8k | size_t size) { | 257 | 3.42M | for (size_t i = 0; i < size; ++i) { | 258 | 3.40M | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 3.40M | offsets[index_check_const<str_const>(i) - 1]; | 260 | 3.40M | const char* str_data = | 261 | 3.40M | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 3.40M | int start_value = start[index_check_const<start_const>(i)]; | 263 | 3.40M | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 3.40M | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 3.40M | len_value <= 0) { | 267 | 518k | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 518k | continue; | 269 | 518k | } | 270 | 2.88M | int fixed_pos = start_value - 1; | 271 | 2.88M | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 2.88M | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 2.88M | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 2.88M | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 2.88M | } | 278 | 26.8k | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 146 | size_t size) { | 257 | 292 | for (size_t i = 0; i < size; ++i) { | 258 | 146 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 146 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 146 | const char* str_data = | 261 | 146 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 146 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 146 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 146 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 146 | len_value <= 0) { | 267 | 77 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 77 | continue; | 269 | 77 | } | 270 | 69 | int fixed_pos = start_value - 1; | 271 | 69 | if (fixed_pos < 0) { | 272 | 24 | fixed_pos = str_size + fixed_pos + 1; | 273 | 24 | } | 274 | 69 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 69 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 69 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 69 | } | 278 | 146 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 160 | size_t size) { | 257 | 388 | for (size_t i = 0; i < size; ++i) { | 258 | 228 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 228 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 228 | const char* str_data = | 261 | 228 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 228 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 228 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 228 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 228 | len_value <= 0) { | 267 | 97 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 97 | continue; | 269 | 97 | } | 270 | 131 | int fixed_pos = start_value - 1; | 271 | 131 | if (fixed_pos < 0) { | 272 | 25 | fixed_pos = str_size + fixed_pos + 1; | 273 | 25 | } | 274 | 131 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 131 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 131 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 131 | } | 278 | 160 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 415 | size_t size) { | 257 | 1.02k | for (size_t i = 0; i < size; ++i) { | 258 | 611 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 611 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 611 | const char* str_data = | 261 | 611 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 611 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 611 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 611 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 611 | len_value <= 0) { | 267 | 150 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 150 | continue; | 269 | 150 | } | 270 | 461 | int fixed_pos = start_value - 1; | 271 | 461 | if (fixed_pos < 0) { | 272 | 229 | fixed_pos = str_size + fixed_pos + 1; | 273 | 229 | } | 274 | 461 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 461 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 461 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 461 | } | 278 | 415 | } |
|
279 | | }; |
280 | | |
281 | | #include "common/compile_check_end.h" |
282 | | |
283 | | } // namespace doris |