be/src/exec/common/stringop_substring.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <sys/types.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <array> |
24 | | #include <boost/iterator/iterator_facade.hpp> |
25 | | #include <boost/locale.hpp> |
26 | | #include <climits> |
27 | | #include <cmath> |
28 | | #include <cstddef> |
29 | | #include <cstdlib> |
30 | | #include <cstring> |
31 | | #include <utility> |
32 | | #include <vector> |
33 | | |
34 | | #include "common/compiler_util.h" // IWYU pragma: keep |
35 | | #include "core/block/block.h" |
36 | | #include "core/block/column_numbers.h" |
37 | | #include "core/block/column_with_type_and_name.h" |
38 | | #include "core/column/column.h" |
39 | | #include "core/column/column_const.h" |
40 | | #include "core/column/column_vector.h" |
41 | | #include "core/data_type/data_type.h" |
42 | | #include "core/pod_array_fwd.h" |
43 | | #include "core/types.h" |
44 | | |
45 | | #ifndef USE_LIBCPP |
46 | | #include <memory_resource> |
47 | 256 | #define PMR std::pmr |
48 | | #else |
49 | | #include <boost/container/pmr/monotonic_buffer_resource.hpp> |
50 | | #include <boost/container/pmr/vector.hpp> |
51 | | #define PMR boost::container::pmr |
52 | | #endif |
53 | | |
54 | | #include <fmt/format.h> |
55 | | |
56 | | #include <cstdint> |
57 | | #include <string_view> |
58 | | |
59 | | #include "core/assert_cast.h" |
60 | | #include "core/column/column_decimal.h" |
61 | | #include "core/column/column_nullable.h" |
62 | | #include "core/column/column_string.h" |
63 | | #include "core/string_ref.h" |
64 | | #include "util/simd/vstring_function.h" |
65 | | |
66 | | namespace doris { |
67 | | #include "common/compile_check_begin.h" |
68 | | struct StringOP { |
69 | | static void push_empty_string(size_t index, ColumnString::Chars& chars, |
70 | 989 | ColumnString::Offsets& offsets) { |
71 | 989 | offsets[index] = (ColumnString::Offset)chars.size(); |
72 | 989 | } |
73 | | |
74 | | static void push_null_string(size_t index, ColumnString::Chars& chars, |
75 | 103 | ColumnString::Offsets& offsets, NullMap& null_map) { |
76 | 103 | null_map[index] = 1; |
77 | 103 | push_empty_string(index, chars, offsets); |
78 | 103 | } |
79 | | |
80 | | static void push_value_string(const std::string_view& string_value, size_t index, |
81 | 1.92k | ColumnString::Chars& chars, ColumnString::Offsets& offsets) { |
82 | 1.92k | ColumnString::check_chars_length(chars.size() + string_value.size(), offsets.size()); |
83 | | |
84 | 1.92k | chars.insert(string_value.data(), string_value.data() + string_value.size()); |
85 | 1.92k | offsets[index] = (ColumnString::Offset)chars.size(); |
86 | 1.92k | } |
87 | | |
88 | | static void push_value_string_reserved_and_allow_overflow(const std::string_view& string_value, |
89 | | size_t index, |
90 | | ColumnString::Chars& chars, |
91 | 812 | ColumnString::Offsets& offsets) { |
92 | 812 | chars.insert_assume_reserved_and_allow_overflow(string_value.data(), |
93 | 812 | string_value.data() + string_value.size()); |
94 | 812 | offsets[index] = (ColumnString::Offset)chars.size(); |
95 | 812 | } |
96 | | |
97 | | static void fast_repeat(uint8_t* dst, const uint8_t* src, size_t src_size, |
98 | 49 | int32_t repeat_times) { |
99 | 49 | if (UNLIKELY(repeat_times <= 0)) { |
100 | 39 | return; |
101 | 39 | } |
102 | 10 | uint8_t* dst_begin = dst; |
103 | 10 | uint8_t* dst_curr = dst; |
104 | 10 | int32_t k = 0; |
105 | 10 | int32_t is_odd = repeat_times & 1; |
106 | 10 | repeat_times >>= 1; |
107 | | |
108 | 10 | memcpy(dst_curr, src, src_size); |
109 | 10 | dst_curr += src_size; |
110 | 29 | for (; repeat_times > 0; k += 1, is_odd = repeat_times & 1, repeat_times >>= 1) { |
111 | 19 | int64_t len = src_size * (1 << k); |
112 | 19 | memcpy(dst_curr, dst_begin, len); |
113 | 19 | dst_curr += len; |
114 | 19 | if (is_odd) { |
115 | 7 | memcpy(dst_curr, dst_begin, len); |
116 | 7 | dst_curr += len; |
117 | 7 | } |
118 | 19 | } |
119 | 10 | } |
120 | | }; |
121 | | |
122 | | struct SubstringUtil { |
123 | | static constexpr auto name = "substring"; |
124 | | |
125 | | static void substring_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
126 | 1.13k | size_t input_rows_count) { |
127 | 1.13k | DCHECK_EQ(arguments.size(), 3); |
128 | 1.13k | auto res = ColumnString::create(); |
129 | | |
130 | 1.13k | bool col_const[3]; |
131 | 1.13k | ColumnPtr argument_columns[3]; |
132 | 4.55k | for (int i = 0; i < 3; ++i) { |
133 | 3.41k | std::tie(argument_columns[i], col_const[i]) = |
134 | 3.41k | unpack_if_const(block.get_by_position(arguments[i]).column); |
135 | 3.41k | } |
136 | | |
137 | 1.13k | const auto* specific_str_column = |
138 | 1.13k | assert_cast<const ColumnString*>(argument_columns[0].get()); |
139 | 1.13k | const auto* specific_start_column = |
140 | 1.13k | assert_cast<const ColumnInt32*>(argument_columns[1].get()); |
141 | 1.13k | const auto* specific_len_column = |
142 | 1.13k | assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
143 | | |
144 | 1.13k | bool is_ascii = specific_str_column->is_ascii(); |
145 | | |
146 | 1.13k | std::visit( |
147 | 1.13k | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { |
148 | 1.13k | vectors<is_ascii, str_const, start_const, len_const>( |
149 | 1.13k | specific_str_column->get_chars(), specific_str_column->get_offsets(), |
150 | 1.13k | specific_start_column->get_data(), specific_len_column->get_data(), |
151 | 1.13k | res->get_chars(), res->get_offsets(), input_rows_count); |
152 | 1.13k | }, _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 31 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 31 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 31 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 31 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 31 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 31 | }, |
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_ _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 161 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 161 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 161 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 161 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 161 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 161 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 129 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 129 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 129 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 129 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 129 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 129 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 146 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 146 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 146 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 146 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 146 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 146 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 128 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 128 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 128 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 128 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 128 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 128 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 187 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 187 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 187 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 187 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 187 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 187 | }, |
Unexecuted instantiation: _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_ _ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 21 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 21 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 21 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 21 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 21 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 21 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
_ZZN5doris13SubstringUtil17substring_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_ Line | Count | Source | 147 | 16 | [&](auto is_ascii, auto str_const, auto start_const, auto len_const) { | 148 | 16 | vectors<is_ascii, str_const, start_const, len_const>( | 149 | 16 | specific_str_column->get_chars(), specific_str_column->get_offsets(), | 150 | 16 | specific_start_column->get_data(), specific_len_column->get_data(), | 151 | 16 | res->get_chars(), res->get_offsets(), input_rows_count); | 152 | 16 | }, |
|
153 | 1.13k | make_bool_variant(is_ascii), make_bool_variant(col_const[0]), |
154 | 1.13k | make_bool_variant(col_const[1]), make_bool_variant(col_const[2])); |
155 | 1.13k | block.get_by_position(result).column = std::move(res); |
156 | 1.13k | } |
157 | | |
158 | | private: |
159 | | template <bool is_ascii, bool str_const, bool start_const, bool len_const> |
160 | | static void vectors(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, |
161 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
162 | | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, |
163 | 1.13k | size_t size) { |
164 | 1.13k | res_offsets.resize(size); |
165 | | |
166 | 1.13k | if constexpr (start_const && len_const) { |
167 | 145 | if (start[0] == 0 || len[0] <= 0) { |
168 | 72 | for (size_t i = 0; i < size; ++i) { |
169 | 36 | StringOP::push_empty_string(i, res_chars, res_offsets); |
170 | 36 | } |
171 | 36 | return; |
172 | 36 | } |
173 | 145 | } |
174 | | |
175 | 470 | if constexpr (str_const) { |
176 | 470 | res_chars.reserve(size * chars.size()); |
177 | 669 | } else { |
178 | 669 | res_chars.reserve(chars.size()); |
179 | 669 | } |
180 | | |
181 | 1.13k | if constexpr (is_ascii) { |
182 | 1.00k | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, |
183 | 1.00k | res_offsets, size); |
184 | 1.00k | } else { |
185 | 132 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, |
186 | 132 | res_offsets, size); |
187 | 132 | } |
188 | 1.13k | } _ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 31 | size_t size) { | 164 | 31 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 31 | } else { | 178 | 31 | res_chars.reserve(chars.size()); | 179 | 31 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 31 | } else { | 185 | 31 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 31 | res_offsets, size); | 187 | 31 | } | 188 | 31 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil7vectorsILb1ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 128 | size_t size) { | 164 | 128 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 128 | if constexpr (str_const) { | 176 | 128 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | 128 | if constexpr (is_ascii) { | 182 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 128 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 128 | size_t size) { | 164 | 128 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 128 | if constexpr (str_const) { | 176 | 128 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | 128 | if constexpr (is_ascii) { | 182 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 128 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 161 | size_t size) { | 164 | 161 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 161 | if constexpr (str_const) { | 176 | 161 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | 161 | if constexpr (is_ascii) { | 182 | 161 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 161 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 161 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 129 | size_t size) { | 164 | 129 | res_offsets.resize(size); | 165 | | | 166 | 129 | if constexpr (start_const && len_const) { | 167 | 129 | if (start[0] == 0 || len[0] <= 0) { | 168 | 64 | for (size_t i = 0; i < size; ++i) { | 169 | 32 | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | 32 | } | 171 | 32 | return; | 172 | 32 | } | 173 | 129 | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 129 | } else { | 178 | 129 | res_chars.reserve(chars.size()); | 179 | 129 | } | 180 | | | 181 | 129 | if constexpr (is_ascii) { | 182 | 129 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 129 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 129 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 146 | size_t size) { | 164 | 146 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 146 | } else { | 178 | 146 | res_chars.reserve(chars.size()); | 179 | 146 | } | 180 | | | 181 | 146 | if constexpr (is_ascii) { | 182 | 146 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 146 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 146 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 128 | size_t size) { | 164 | 128 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 128 | } else { | 178 | 128 | res_chars.reserve(chars.size()); | 179 | 128 | } | 180 | | | 181 | 128 | if constexpr (is_ascii) { | 182 | 128 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 128 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 128 | } |
_ZN5doris13SubstringUtil7vectorsILb1ELb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 187 | size_t size) { | 164 | 187 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 187 | } else { | 178 | 187 | res_chars.reserve(chars.size()); | 179 | 187 | } | 180 | | | 181 | 187 | if constexpr (is_ascii) { | 182 | 187 | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | 187 | res_offsets, size); | 184 | | } else { | 185 | | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | | res_offsets, size); | 187 | | } | 188 | 187 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil7vectorsILb0ELb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 16 | if constexpr (str_const) { | 176 | 16 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 16 | if constexpr (str_const) { | 176 | 16 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 21 | size_t size) { | 164 | 21 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | 21 | if constexpr (str_const) { | 176 | 21 | res_chars.reserve(size * chars.size()); | 177 | | } else { | 178 | | res_chars.reserve(chars.size()); | 179 | | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 21 | } else { | 185 | 21 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 21 | res_offsets, size); | 187 | 21 | } | 188 | 21 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | 16 | if constexpr (start_const && len_const) { | 167 | 16 | if (start[0] == 0 || len[0] <= 0) { | 168 | 8 | for (size_t i = 0; i < size; ++i) { | 169 | 4 | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | 4 | } | 171 | 4 | return; | 172 | 4 | } | 173 | 16 | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 16 | } else { | 178 | 16 | res_chars.reserve(chars.size()); | 179 | 16 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 16 | } else { | 178 | 16 | res_chars.reserve(chars.size()); | 179 | 16 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
_ZN5doris13SubstringUtil7vectorsILb0ELb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 163 | 16 | size_t size) { | 164 | 16 | res_offsets.resize(size); | 165 | | | 166 | | if constexpr (start_const && len_const) { | 167 | | if (start[0] == 0 || len[0] <= 0) { | 168 | | for (size_t i = 0; i < size; ++i) { | 169 | | StringOP::push_empty_string(i, res_chars, res_offsets); | 170 | | } | 171 | | return; | 172 | | } | 173 | | } | 174 | | | 175 | | if constexpr (str_const) { | 176 | | res_chars.reserve(size * chars.size()); | 177 | 16 | } else { | 178 | 16 | res_chars.reserve(chars.size()); | 179 | 16 | } | 180 | | | 181 | | if constexpr (is_ascii) { | 182 | | vectors_ascii<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 183 | | res_offsets, size); | 184 | 16 | } else { | 185 | 16 | vectors_utf8<str_const, start_const, len_const>(chars, offsets, start, len, res_chars, | 186 | 16 | res_offsets, size); | 187 | 16 | } | 188 | 16 | } |
|
189 | | |
190 | | template <bool str_const, bool start_const, bool len_const> |
191 | | NO_SANITIZE_UNDEFINED static void vectors_utf8( |
192 | | const ColumnString::Chars& chars, const ColumnString::Offsets& offsets, |
193 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
194 | 128 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { |
195 | 128 | std::array<std::byte, 128 * 1024> buf; |
196 | 128 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; |
197 | 128 | PMR::vector<size_t> index {&pool}; |
198 | | |
199 | 530 | for (size_t i = 0; i < size; ++i) { |
200 | 402 | int str_size = offsets[index_check_const<str_const>(i)] - |
201 | 402 | offsets[index_check_const<str_const>(i) - 1]; |
202 | 402 | const char* str_data = |
203 | 402 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; |
204 | 402 | int start_value = start[index_check_const<start_const>(i)]; |
205 | 402 | int len_value = len[index_check_const<len_const>(i)]; |
206 | | // Unsigned numbers cannot be used here because start_value can be negative. |
207 | 402 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); |
208 | | // return empty string if start > src.length |
209 | | // Here, start_value is compared against the length of the character. |
210 | 402 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { |
211 | 237 | StringOP::push_empty_string(i, res_chars, res_offsets); |
212 | 237 | continue; |
213 | 237 | } |
214 | | |
215 | 165 | size_t byte_pos = 0; |
216 | 165 | index.clear(); |
217 | 1.28k | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { |
218 | 1.16k | char_size = get_utf8_byte_length(str_data[j]); |
219 | 1.16k | index.push_back(j); |
220 | | // index_size represents the number of characters from the beginning of the character to the current position. |
221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. |
222 | 1.16k | if (start_value > 0 && index.size() > start_value + len_value) { |
223 | 48 | break; |
224 | 48 | } |
225 | 1.16k | } |
226 | | |
227 | 165 | int64_t fixed_pos = start_value; |
228 | 165 | if (fixed_pos < -(int)index.size()) { |
229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
230 | 0 | continue; |
231 | 0 | } |
232 | 165 | if (fixed_pos < 0) { |
233 | 60 | fixed_pos = index.size() + fixed_pos + 1; |
234 | 60 | } |
235 | | |
236 | 165 | byte_pos = index[fixed_pos - 1]; |
237 | 165 | size_t fixed_len = str_size - byte_pos; |
238 | 165 | if (fixed_pos + len_value <= index.size()) { |
239 | 49 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; |
240 | 49 | } |
241 | | |
242 | 165 | if (byte_pos <= str_size && fixed_len > 0) { |
243 | 165 | StringOP::push_value_string_reserved_and_allow_overflow( |
244 | 165 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); |
245 | 165 | } else { |
246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
247 | 0 | } |
248 | 165 | } |
249 | 128 | } _ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 31 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 31 | std::array<std::byte, 128 * 1024> buf; | 196 | 31 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 31 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 336 | for (size_t i = 0; i < size; ++i) { | 200 | 305 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 305 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 305 | const char* str_data = | 203 | 305 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 305 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 305 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 305 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 305 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 198 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 198 | continue; | 213 | 198 | } | 214 | | | 215 | 107 | size_t byte_pos = 0; | 216 | 107 | index.clear(); | 217 | 879 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 802 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 802 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 802 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 30 | break; | 224 | 30 | } | 225 | 802 | } | 226 | | | 227 | 107 | int64_t fixed_pos = start_value; | 228 | 107 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 107 | if (fixed_pos < 0) { | 233 | 40 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 40 | } | 235 | | | 236 | 107 | byte_pos = index[fixed_pos - 1]; | 237 | 107 | size_t fixed_len = str_size - byte_pos; | 238 | 107 | if (fixed_pos + len_value <= index.size()) { | 239 | 31 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 31 | } | 241 | | | 242 | 107 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 107 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 107 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 107 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 107 | } | 249 | 31 | } |
Unexecuted instantiation: _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil12vectors_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 21 | std::array<std::byte, 128 * 1024> buf; | 196 | 21 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 21 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 42 | for (size_t i = 0; i < size; ++i) { | 200 | 21 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 21 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 21 | const char* str_data = | 203 | 21 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 21 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 21 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 21 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 21 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 8 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 8 | continue; | 213 | 8 | } | 214 | | | 215 | 13 | size_t byte_pos = 0; | 216 | 13 | index.clear(); | 217 | 82 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 72 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 72 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 72 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 72 | } | 226 | | | 227 | 13 | int64_t fixed_pos = start_value; | 228 | 13 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 13 | if (fixed_pos < 0) { | 233 | 5 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 5 | } | 235 | | | 236 | 13 | byte_pos = index[fixed_pos - 1]; | 237 | 13 | size_t fixed_len = str_size - byte_pos; | 238 | 13 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 13 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 13 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 13 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 13 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 13 | } | 249 | 21 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 12 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 12 | std::array<std::byte, 128 * 1024> buf; | 196 | 12 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 12 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 24 | for (size_t i = 0; i < size; ++i) { | 200 | 12 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 12 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 12 | const char* str_data = | 203 | 12 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 12 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 12 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 12 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 12 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 3 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 3 | continue; | 213 | 3 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 12 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
_ZN5doris13SubstringUtil12vectors_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 194 | 16 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, size_t size) { | 195 | 16 | std::array<std::byte, 128 * 1024> buf; | 196 | 16 | PMR::monotonic_buffer_resource pool {buf.data(), buf.size()}; | 197 | 16 | PMR::vector<size_t> index {&pool}; | 198 | | | 199 | 32 | for (size_t i = 0; i < size; ++i) { | 200 | 16 | int str_size = offsets[index_check_const<str_const>(i)] - | 201 | 16 | offsets[index_check_const<str_const>(i) - 1]; | 202 | 16 | const char* str_data = | 203 | 16 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 204 | 16 | int start_value = start[index_check_const<start_const>(i)]; | 205 | 16 | int len_value = len[index_check_const<len_const>(i)]; | 206 | | // Unsigned numbers cannot be used here because start_value can be negative. | 207 | 16 | int char_len = simd::VStringFunctions::get_char_len(str_data, str_size); | 208 | | // return empty string if start > src.length | 209 | | // Here, start_value is compared against the length of the character. | 210 | 16 | if (start_value > char_len || str_size == 0 || start_value == 0 || len_value <= 0) { | 211 | 7 | StringOP::push_empty_string(i, res_chars, res_offsets); | 212 | 7 | continue; | 213 | 7 | } | 214 | | | 215 | 9 | size_t byte_pos = 0; | 216 | 9 | index.clear(); | 217 | 65 | for (size_t j = 0, char_size = 0; j < str_size; j += char_size) { | 218 | 59 | char_size = get_utf8_byte_length(str_data[j]); | 219 | 59 | index.push_back(j); | 220 | | // index_size represents the number of characters from the beginning of the character to the current position. | 221 | | // So index.size() > start_value + len_value breaks because you don't need to get the characters after start + len characters. | 222 | 59 | if (start_value > 0 && index.size() > start_value + len_value) { | 223 | 3 | break; | 224 | 3 | } | 225 | 59 | } | 226 | | | 227 | 9 | int64_t fixed_pos = start_value; | 228 | 9 | if (fixed_pos < -(int)index.size()) { | 229 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 230 | 0 | continue; | 231 | 0 | } | 232 | 9 | if (fixed_pos < 0) { | 233 | 3 | fixed_pos = index.size() + fixed_pos + 1; | 234 | 3 | } | 235 | | | 236 | 9 | byte_pos = index[fixed_pos - 1]; | 237 | 9 | size_t fixed_len = str_size - byte_pos; | 238 | 9 | if (fixed_pos + len_value <= index.size()) { | 239 | 3 | fixed_len = index[fixed_pos + len_value - 1] - byte_pos; | 240 | 3 | } | 241 | | | 242 | 9 | if (byte_pos <= str_size && fixed_len > 0) { | 243 | 9 | StringOP::push_value_string_reserved_and_allow_overflow( | 244 | 9 | {str_data + byte_pos, fixed_len}, i, res_chars, res_offsets); | 245 | 9 | } else { | 246 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); | 247 | 0 | } | 248 | 9 | } | 249 | 16 | } |
|
250 | | |
251 | | template <bool str_const, bool start_const, bool len_const> |
252 | | static void vectors_ascii(const ColumnString::Chars& chars, |
253 | | const ColumnString::Offsets& offsets, |
254 | | const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len, |
255 | | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets, |
256 | 975 | size_t size) { |
257 | 1.97k | for (size_t i = 0; i < size; ++i) { |
258 | 1.00k | int str_size = offsets[index_check_const<str_const>(i)] - |
259 | 1.00k | offsets[index_check_const<str_const>(i) - 1]; |
260 | 1.00k | const char* str_data = |
261 | 1.00k | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; |
262 | 1.00k | int start_value = start[index_check_const<start_const>(i)]; |
263 | 1.00k | int len_value = len[index_check_const<len_const>(i)]; |
264 | | |
265 | 1.00k | if (start_value > str_size || start_value < -str_size || str_size == 0 || |
266 | 1.00k | len_value <= 0) { |
267 | 496 | StringOP::push_empty_string(i, res_chars, res_offsets); |
268 | 496 | continue; |
269 | 496 | } |
270 | 506 | int fixed_pos = start_value - 1; |
271 | 506 | if (fixed_pos < 0) { |
272 | 184 | fixed_pos = str_size + fixed_pos + 1; |
273 | 184 | } |
274 | 506 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); |
275 | 506 | StringOP::push_value_string_reserved_and_allow_overflow( |
276 | 506 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); |
277 | 506 | } |
278 | 975 | } Unexecuted instantiation: _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m _ZN5doris13SubstringUtil13vectors_asciiILb1ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 128 | size_t size) { | 257 | 256 | for (size_t i = 0; i < size; ++i) { | 258 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 128 | const char* str_data = | 261 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 128 | len_value <= 0) { | 267 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 65 | continue; | 269 | 65 | } | 270 | 63 | int fixed_pos = start_value - 1; | 271 | 63 | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 63 | } | 278 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 128 | size_t size) { | 257 | 256 | for (size_t i = 0; i < size; ++i) { | 258 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 128 | const char* str_data = | 261 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 128 | len_value <= 0) { | 267 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 65 | continue; | 269 | 65 | } | 270 | 63 | int fixed_pos = start_value - 1; | 271 | 63 | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 63 | } | 278 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb1ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 161 | size_t size) { | 257 | 322 | for (size_t i = 0; i < size; ++i) { | 258 | 161 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 161 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 161 | const char* str_data = | 261 | 161 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 161 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 161 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 161 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 161 | len_value <= 0) { | 267 | 81 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 81 | continue; | 269 | 81 | } | 270 | 80 | int fixed_pos = start_value - 1; | 271 | 80 | if (fixed_pos < 0) { | 272 | 31 | fixed_pos = str_size + fixed_pos + 1; | 273 | 31 | } | 274 | 80 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 80 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 80 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 80 | } | 278 | 161 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 97 | size_t size) { | 257 | 194 | for (size_t i = 0; i < size; ++i) { | 258 | 97 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 97 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 97 | const char* str_data = | 261 | 97 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 97 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 97 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 97 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 97 | len_value <= 0) { | 267 | 33 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 33 | continue; | 269 | 33 | } | 270 | 64 | int fixed_pos = start_value - 1; | 271 | 64 | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 64 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 64 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 64 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 64 | } | 278 | 97 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb1ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 146 | size_t size) { | 257 | 292 | for (size_t i = 0; i < size; ++i) { | 258 | 146 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 146 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 146 | const char* str_data = | 261 | 146 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 146 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 146 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 146 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 146 | len_value <= 0) { | 267 | 77 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 77 | continue; | 269 | 77 | } | 270 | 69 | int fixed_pos = start_value - 1; | 271 | 69 | if (fixed_pos < 0) { | 272 | 24 | fixed_pos = str_size + fixed_pos + 1; | 273 | 24 | } | 274 | 69 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 69 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 69 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 69 | } | 278 | 146 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb1EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 128 | size_t size) { | 257 | 256 | for (size_t i = 0; i < size; ++i) { | 258 | 128 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 128 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 128 | const char* str_data = | 261 | 128 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 128 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 128 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 128 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 128 | len_value <= 0) { | 267 | 65 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 65 | continue; | 269 | 65 | } | 270 | 63 | int fixed_pos = start_value - 1; | 271 | 63 | if (fixed_pos < 0) { | 272 | 21 | fixed_pos = str_size + fixed_pos + 1; | 273 | 21 | } | 274 | 63 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 63 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 63 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 63 | } | 278 | 128 | } |
_ZN5doris13SubstringUtil13vectors_asciiILb0ELb0ELb0EEEvRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS2_IiLm4096ES5_Lm16ELm15EEESE_RS6_RS9_m Line | Count | Source | 256 | 187 | size_t size) { | 257 | 401 | for (size_t i = 0; i < size; ++i) { | 258 | 214 | int str_size = offsets[index_check_const<str_const>(i)] - | 259 | 214 | offsets[index_check_const<str_const>(i) - 1]; | 260 | 214 | const char* str_data = | 261 | 214 | (char*)chars.data() + offsets[index_check_const<str_const>(i) - 1]; | 262 | 214 | int start_value = start[index_check_const<start_const>(i)]; | 263 | 214 | int len_value = len[index_check_const<len_const>(i)]; | 264 | | | 265 | 214 | if (start_value > str_size || start_value < -str_size || str_size == 0 || | 266 | 214 | len_value <= 0) { | 267 | 110 | StringOP::push_empty_string(i, res_chars, res_offsets); | 268 | 110 | continue; | 269 | 110 | } | 270 | 104 | int fixed_pos = start_value - 1; | 271 | 104 | if (fixed_pos < 0) { | 272 | 45 | fixed_pos = str_size + fixed_pos + 1; | 273 | 45 | } | 274 | 104 | size_t fixed_len = std::min(str_size - fixed_pos, len_value); | 275 | 104 | StringOP::push_value_string_reserved_and_allow_overflow( | 276 | 104 | {str_data + fixed_pos, fixed_len}, i, res_chars, res_offsets); | 277 | 104 | } | 278 | 187 | } |
|
279 | | }; |
280 | | |
281 | | #include "common/compile_check_end.h" |
282 | | |
283 | | } // namespace doris |