be/src/exprs/function/function_string_url.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cstddef> |
19 | | #include <string> |
20 | | #include <string_view> |
21 | | #include <vector> |
22 | | |
23 | | #include "common/status.h" |
24 | | #include "core/assert_cast.h" |
25 | | #include "core/block/block.h" |
26 | | #include "core/block/column_numbers.h" |
27 | | #include "core/column/column_const.h" |
28 | | #include "core/column/column_nullable.h" |
29 | | #include "core/column/column_string.h" |
30 | | #include "core/column/column_vector.h" |
31 | | #include "core/data_type/data_type_nullable.h" |
32 | | #include "core/data_type/data_type_string.h" |
33 | | #include "core/string_ref.h" |
34 | | #include "exec/common/stringop_substring.h" |
35 | | #include "exec/common/template_helpers.hpp" |
36 | | #include "exprs/function/function.h" |
37 | | #include "exprs/function/function_helpers.h" |
38 | | #include "exprs/function/simple_function_factory.h" |
39 | | #include "exprs/function_context.h" |
40 | | #include "util/url_coding.h" |
41 | | #include "util/url_parser.h" |
42 | | |
43 | | namespace doris { |
44 | | #include "common/compile_check_avoid_begin.h" |
45 | | |
46 | | class FunctionExtractURLParameter : public IFunction { |
47 | | public: |
48 | | static constexpr auto name = "extract_url_parameter"; |
49 | 39 | static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); } |
50 | 1 | String get_name() const override { return name; } |
51 | 37 | size_t get_number_of_arguments() const override { return 2; } |
52 | | |
53 | 37 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
54 | 37 | return std::make_shared<DataTypeString>(); |
55 | 37 | } |
56 | | |
57 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
58 | 37 | uint32_t result, size_t input_rows_count) const override { |
59 | 37 | auto col_url = |
60 | 37 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
61 | 37 | auto col_parameter = |
62 | 37 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
63 | 37 | auto url_col = assert_cast<const ColumnString*>(col_url.get()); |
64 | 37 | auto parameter_col = assert_cast<const ColumnString*>(col_parameter.get()); |
65 | | |
66 | 37 | ColumnString::MutablePtr col_res = ColumnString::create(); |
67 | | |
68 | 85 | for (int i = 0; i < input_rows_count; ++i) { |
69 | 48 | auto source = url_col->get_data_at(i); |
70 | 48 | auto param = parameter_col->get_data_at(i); |
71 | 48 | auto res = extract_url(source, param); |
72 | | |
73 | 48 | col_res->insert_data(res.data, res.size); |
74 | 48 | } |
75 | | |
76 | 37 | block.replace_by_position(result, std::move(col_res)); |
77 | 37 | return Status::OK(); |
78 | 37 | } |
79 | | |
80 | | private: |
81 | 48 | StringRef extract_url(StringRef url, StringRef parameter) const { |
82 | 48 | if (url.size == 0 || parameter.size == 0) { |
83 | 8 | return StringRef("", 0); |
84 | 8 | } |
85 | 40 | return UrlParser::extract_url(url, parameter); |
86 | 48 | } |
87 | | }; |
88 | | |
89 | | class FunctionStringParseUrl : public IFunction { |
90 | | public: |
91 | | static constexpr auto name = "parse_url"; |
92 | 95 | static FunctionPtr create() { return std::make_shared<FunctionStringParseUrl>(); } |
93 | 0 | String get_name() const override { return name; } |
94 | 0 | size_t get_number_of_arguments() const override { return 0; } |
95 | 94 | bool is_variadic() const override { return true; } |
96 | | |
97 | 93 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
98 | 93 | return make_nullable(std::make_shared<DataTypeString>()); |
99 | 93 | } |
100 | | |
101 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
102 | 93 | uint32_t result, size_t input_rows_count) const override { |
103 | 93 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
104 | 93 | auto& null_map_data = null_map->get_data(); |
105 | 93 | DCHECK_GE(3, arguments.size()); |
106 | 93 | auto res = ColumnString::create(); |
107 | 93 | auto& res_offsets = res->get_offsets(); |
108 | 93 | auto& res_chars = res->get_chars(); |
109 | 93 | res_offsets.resize(input_rows_count); |
110 | | |
111 | 93 | size_t argument_size = arguments.size(); |
112 | 93 | const bool has_key = argument_size == 3; |
113 | | |
114 | 93 | std::vector<ColumnPtr> argument_columns(argument_size); |
115 | 93 | std::vector<UInt8> col_const(argument_size); |
116 | 308 | for (size_t i = 0; i < argument_size; ++i) { |
117 | 215 | std::tie(argument_columns[i], col_const[i]) = |
118 | 215 | unpack_if_const(block.get_by_position(arguments[i]).column); |
119 | 215 | } |
120 | | |
121 | 93 | const auto* url_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
122 | 93 | const auto* part_col = assert_cast<const ColumnString*>(argument_columns[1].get()); |
123 | 93 | const bool part_const = col_const[1]; |
124 | 93 | std::vector<UrlParser::UrlPart> url_parts; |
125 | 93 | const int part_nums = part_const ? 1 : input_rows_count; |
126 | | |
127 | 93 | url_parts.resize(part_nums); |
128 | 209 | for (int i = 0; i < part_nums; i++) { |
129 | 116 | StringRef part = part_col->get_data_at(i); |
130 | 116 | UrlParser::UrlPart url_part = UrlParser::get_url_part(part); |
131 | 116 | if (url_part == UrlParser::INVALID) { |
132 | 0 | return Status::RuntimeError("Invalid URL part: {}\n{}", |
133 | 0 | std::string(part.data, part.size), |
134 | 0 | "(Valid URL parts are 'PROTOCOL', 'HOST', " |
135 | 0 | "'PATH', 'REF', 'AUTHORITY', " |
136 | 0 | "'FILE', 'USERINFO', 'PORT' and 'QUERY')"); |
137 | 0 | } |
138 | 116 | url_parts[i] = url_part; |
139 | 116 | } |
140 | | |
141 | 93 | if (has_key) { |
142 | 29 | const bool url_const = col_const[0]; |
143 | 29 | const bool key_const = col_const[2]; |
144 | 29 | const auto* key_col = assert_cast<const ColumnString*>(argument_columns[2].get()); |
145 | 29 | RETURN_IF_ERROR(std::visit( |
146 | 29 | [&](auto url_const, auto part_const, auto key_const) { |
147 | 29 | return vector_parse_key<url_const, part_const, key_const>( |
148 | 29 | url_col, url_parts, key_col, input_rows_count, null_map_data, |
149 | 29 | res_chars, res_offsets); |
150 | 29 | }, |
151 | 29 | make_bool_variant(url_const), make_bool_variant(part_const), |
152 | 29 | make_bool_variant(key_const))); |
153 | 64 | } else { |
154 | 64 | const bool url_const = col_const[0]; |
155 | 64 | RETURN_IF_ERROR(std::visit( |
156 | 64 | [&](auto url_const, auto part_const) { |
157 | 64 | return vector_parse<url_const, part_const>(url_col, url_parts, |
158 | 64 | input_rows_count, null_map_data, |
159 | 64 | res_chars, res_offsets); |
160 | 64 | }, |
161 | 64 | make_bool_variant(url_const), make_bool_variant(part_const))); |
162 | 64 | } |
163 | 93 | block.get_by_position(result).column = |
164 | 93 | ColumnNullable::create(std::move(res), std::move(null_map)); |
165 | 93 | return Status::OK(); |
166 | 93 | } |
167 | | template <bool url_const, bool part_const> |
168 | | static Status vector_parse(const ColumnString* url_col, |
169 | | std::vector<UrlParser::UrlPart>& url_parts, const int size, |
170 | | ColumnUInt8::Container& null_map_data, |
171 | 64 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { |
172 | 148 | for (size_t i = 0; i < size; ++i) { |
173 | 84 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; |
174 | 84 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); |
175 | 84 | StringRef parse_res; |
176 | 84 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { |
177 | 64 | if (parse_res.empty()) [[unlikely]] { |
178 | 4 | StringOP::push_empty_string(i, res_chars, res_offsets); |
179 | 4 | continue; |
180 | 4 | } |
181 | 60 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, |
182 | 60 | res_chars, res_offsets); |
183 | 60 | } else { |
184 | 20 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
185 | 20 | } |
186 | 84 | } |
187 | 64 | return Status::OK(); |
188 | 64 | } _ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 171 | 22 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { | 172 | 64 | for (size_t i = 0; i < size; ++i) { | 173 | 42 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 174 | 42 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 175 | 42 | StringRef parse_res; | 176 | 42 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { | 177 | 32 | if (parse_res.empty()) [[unlikely]] { | 178 | 2 | StringOP::push_empty_string(i, res_chars, res_offsets); | 179 | 2 | continue; | 180 | 2 | } | 181 | 30 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 182 | 30 | res_chars, res_offsets); | 183 | 30 | } else { | 184 | 10 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 185 | 10 | } | 186 | 42 | } | 187 | 22 | return Status::OK(); | 188 | 22 | } |
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 171 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { | 172 | 42 | for (size_t i = 0; i < size; ++i) { | 173 | 21 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 174 | 21 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 175 | 21 | StringRef parse_res; | 176 | 21 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { | 177 | 16 | if (parse_res.empty()) [[unlikely]] { | 178 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 179 | 1 | continue; | 180 | 1 | } | 181 | 15 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 182 | 15 | res_chars, res_offsets); | 183 | 15 | } else { | 184 | 5 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 185 | 5 | } | 186 | 21 | } | 187 | 21 | return Status::OK(); | 188 | 21 | } |
_ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 171 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { | 172 | 42 | for (size_t i = 0; i < size; ++i) { | 173 | 21 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 174 | 21 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 175 | 21 | StringRef parse_res; | 176 | 21 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { | 177 | 16 | if (parse_res.empty()) [[unlikely]] { | 178 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 179 | 1 | continue; | 180 | 1 | } | 181 | 15 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 182 | 15 | res_chars, res_offsets); | 183 | 15 | } else { | 184 | 5 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 185 | 5 | } | 186 | 21 | } | 187 | 21 | return Status::OK(); | 188 | 21 | } |
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE |
189 | | template <bool url_const, bool part_const, bool key_const> |
190 | | static Status vector_parse_key(const ColumnString* url_col, |
191 | | std::vector<UrlParser::UrlPart>& url_parts, |
192 | | const ColumnString* key_col, const int size, |
193 | | ColumnUInt8::Container& null_map_data, |
194 | | ColumnString::Chars& res_chars, |
195 | 29 | ColumnString::Offsets& res_offsets) { |
196 | 61 | for (size_t i = 0; i < size; ++i) { |
197 | 32 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; |
198 | 32 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); |
199 | 32 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); |
200 | 32 | StringRef parse_res; |
201 | 32 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { |
202 | 16 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, |
203 | 16 | res_chars, res_offsets); |
204 | 16 | } else { |
205 | 16 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
206 | 16 | continue; |
207 | 16 | } |
208 | 32 | } |
209 | 29 | return Status::OK(); |
210 | 29 | } _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 5 | ColumnString::Offsets& res_offsets) { | 196 | 13 | for (size_t i = 0; i < size; ++i) { | 197 | 8 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 8 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 8 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 8 | StringRef parse_res; | 201 | 8 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 4 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 4 | res_chars, res_offsets); | 204 | 4 | } else { | 205 | 4 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 4 | continue; | 207 | 4 | } | 208 | 8 | } | 209 | 5 | return Status::OK(); | 210 | 5 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 4 | ColumnString::Offsets& res_offsets) { | 196 | 8 | for (size_t i = 0; i < size; ++i) { | 197 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 4 | StringRef parse_res; | 201 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 2 | res_chars, res_offsets); | 204 | 2 | } else { | 205 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 2 | continue; | 207 | 2 | } | 208 | 4 | } | 209 | 4 | return Status::OK(); | 210 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 4 | ColumnString::Offsets& res_offsets) { | 196 | 8 | for (size_t i = 0; i < size; ++i) { | 197 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 4 | StringRef parse_res; | 201 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 2 | res_chars, res_offsets); | 204 | 2 | } else { | 205 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 2 | continue; | 207 | 2 | } | 208 | 4 | } | 209 | 4 | return Status::OK(); | 210 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 4 | ColumnString::Offsets& res_offsets) { | 196 | 8 | for (size_t i = 0; i < size; ++i) { | 197 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 4 | StringRef parse_res; | 201 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 2 | res_chars, res_offsets); | 204 | 2 | } else { | 205 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 2 | continue; | 207 | 2 | } | 208 | 4 | } | 209 | 4 | return Status::OK(); | 210 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 4 | ColumnString::Offsets& res_offsets) { | 196 | 8 | for (size_t i = 0; i < size; ++i) { | 197 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 4 | StringRef parse_res; | 201 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 2 | res_chars, res_offsets); | 204 | 2 | } else { | 205 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 2 | continue; | 207 | 2 | } | 208 | 4 | } | 209 | 4 | return Status::OK(); | 210 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 4 | ColumnString::Offsets& res_offsets) { | 196 | 8 | for (size_t i = 0; i < size; ++i) { | 197 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 4 | StringRef parse_res; | 201 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 2 | res_chars, res_offsets); | 204 | 2 | } else { | 205 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 2 | continue; | 207 | 2 | } | 208 | 4 | } | 209 | 4 | return Status::OK(); | 210 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 195 | 4 | ColumnString::Offsets& res_offsets) { | 196 | 8 | for (size_t i = 0; i < size; ++i) { | 197 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 198 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 199 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 200 | 4 | StringRef parse_res; | 201 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 202 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 203 | 2 | res_chars, res_offsets); | 204 | 2 | } else { | 205 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 206 | 2 | continue; | 207 | 2 | } | 208 | 4 | } | 209 | 4 | return Status::OK(); | 210 | 4 | } |
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE |
211 | | }; |
212 | | |
213 | | class FunctionUrlDecode : public IFunction { |
214 | | public: |
215 | | static constexpr auto name = "url_decode"; |
216 | 2 | static FunctionPtr create() { return std::make_shared<FunctionUrlDecode>(); } |
217 | 1 | String get_name() const override { return name; } |
218 | 0 | size_t get_number_of_arguments() const override { return 1; } |
219 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
220 | 0 | return std::make_shared<DataTypeString>(); |
221 | 0 | } |
222 | | |
223 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
224 | 0 | uint32_t result, size_t input_rows_count) const override { |
225 | 0 | auto res = ColumnString::create(); |
226 | 0 | res->get_offsets().reserve(input_rows_count); |
227 | |
|
228 | 0 | const auto* url_col = |
229 | 0 | assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get()); |
230 | |
|
231 | 0 | std::string decoded_url; |
232 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
233 | 0 | auto url = url_col->get_data_at(i); |
234 | 0 | if (!url_decode(url.to_string(), &decoded_url)) { |
235 | 0 | return Status::InternalError("Decode url failed"); |
236 | 0 | } |
237 | 0 | res->insert_data(decoded_url.data(), decoded_url.size()); |
238 | 0 | decoded_url.clear(); |
239 | 0 | } |
240 | | |
241 | 0 | block.get_by_position(result).column = std::move(res); |
242 | 0 | return Status::OK(); |
243 | 0 | } |
244 | | }; |
245 | | |
246 | | class FunctionUrlEncode : public IFunction { |
247 | | public: |
248 | | static constexpr auto name = "url_encode"; |
249 | 6 | static FunctionPtr create() { return std::make_shared<FunctionUrlEncode>(); } |
250 | 1 | String get_name() const override { return name; } |
251 | 4 | size_t get_number_of_arguments() const override { return 1; } |
252 | 4 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
253 | 4 | return std::make_shared<DataTypeString>(); |
254 | 4 | } |
255 | | |
256 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
257 | 4 | uint32_t result, size_t input_rows_count) const override { |
258 | 4 | auto res = ColumnString::create(); |
259 | 4 | res->get_offsets().reserve(input_rows_count); |
260 | | |
261 | 4 | const auto* url_col = |
262 | 4 | assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get()); |
263 | | |
264 | 4 | std::string encoded_url; |
265 | 10 | for (size_t i = 0; i < input_rows_count; ++i) { |
266 | 6 | auto url = url_col->get_data_at(i); |
267 | 6 | url_encode(url.to_string_view(), &encoded_url); |
268 | 6 | res->insert_data(encoded_url.data(), encoded_url.size()); |
269 | 6 | encoded_url.clear(); |
270 | 6 | } |
271 | | |
272 | 4 | block.get_by_position(result).column = std::move(res); |
273 | 4 | return Status::OK(); |
274 | 4 | } |
275 | | }; |
276 | | |
277 | 1 | void register_function_string_url(SimpleFunctionFactory& factory) { |
278 | 1 | factory.register_function<FunctionExtractURLParameter>(); |
279 | 1 | factory.register_function<FunctionStringParseUrl>(); |
280 | 1 | factory.register_function<FunctionUrlDecode>(); |
281 | 1 | factory.register_function<FunctionUrlEncode>(); |
282 | 1 | } |
283 | | |
284 | | #include "common/compile_check_avoid_end.h" |
285 | | } // namespace doris |