be/src/exprs/function/array/function_array_join.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #pragma once |
18 | | |
19 | | #include "core/block/block.h" |
20 | | #include "core/column/column_array.h" |
21 | | #include "core/column/column_const.h" |
22 | | #include "core/column/column_execute_util.h" |
23 | | #include "core/data_type/data_type_array.h" |
24 | | #include "core/data_type/data_type_string.h" |
25 | | #include "core/string_ref.h" |
26 | | #include "exprs/function/array/function_array_utils.h" |
27 | | |
28 | | namespace doris { |
29 | | |
30 | | #include "common/compile_check_begin.h" |
31 | | struct NameArrayJoin { |
32 | | static constexpr auto name = "array_join"; |
33 | | }; |
34 | | |
35 | | struct ArrayJoinImpl { |
36 | | public: |
37 | | using column_type = ColumnArray; |
38 | | using NullMapType = PaddedPODArray<UInt8>; |
39 | | |
40 | 1 | static bool _is_variadic() { return true; } |
41 | | |
42 | 0 | static size_t _get_number_of_arguments() { return 0; } |
43 | | |
44 | 0 | static DataTypePtr get_return_type(const DataTypes& arguments) { |
45 | 0 | DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY) |
46 | 0 | << "first argument for function: array_join should be DataTypeArray" |
47 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
48 | 0 | DCHECK(is_string_type(arguments[1]->get_primitive_type())) |
49 | 0 | << "second argument for function: array_join should be DataTypeString" |
50 | 0 | << ", and arguments[1] is " << arguments[1]->get_name(); |
51 | 0 | if (arguments.size() > 2) { |
52 | 0 | DCHECK(is_string_type(arguments[2]->get_primitive_type())) |
53 | 0 | << "third argument for function: array_join should be DataTypeString" |
54 | 0 | << ", and arguments[2] is " << arguments[2]->get_name(); |
55 | 0 | } |
56 | |
|
57 | 0 | return std::make_shared<DataTypeString>(); |
58 | 0 | } |
59 | | |
60 | | static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
61 | 0 | const DataTypeArray* data_type_array, const ColumnArray& array) { |
62 | 0 | ColumnPtr src_column = |
63 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
64 | 0 | ColumnArrayExecutionData src; |
65 | 0 | if (!extract_column_array_info(*src_column, src)) { |
66 | 0 | return Status::RuntimeError(fmt::format( |
67 | 0 | "execute failed, unsupported types for function {}({})", "array_join", |
68 | 0 | block.get_by_position(arguments[0]).type->get_name())); |
69 | 0 | } |
70 | | |
71 | 0 | auto nested_type = data_type_array->get_nested_type(); |
72 | 0 | auto dest_column_ptr = ColumnString::create(); |
73 | |
|
74 | 0 | auto& dest_chars = dest_column_ptr->get_chars(); |
75 | 0 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
76 | |
|
77 | 0 | dest_offsets.resize_fill(src_column->size(), 0); |
78 | |
|
79 | 0 | auto sep_column = |
80 | 0 | ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[1]).column); |
81 | |
|
82 | 0 | if (arguments.size() > 2) { |
83 | 0 | auto null_replace_column = |
84 | 0 | ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[2]).column); |
85 | |
|
86 | 0 | _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, sep_column, |
87 | 0 | null_replace_column, dest_chars, dest_offsets); |
88 | |
|
89 | 0 | } else { |
90 | 0 | auto tmp_column_string = ColumnString::create(); |
91 | | // insert default value for null replacement, which is empty string |
92 | 0 | tmp_column_string->insert_default(); |
93 | 0 | ColumnPtr tmp_const_column = |
94 | 0 | ColumnConst::create(std::move(tmp_column_string), sep_column.size()); |
95 | |
|
96 | 0 | auto null_replace_column = ColumnView<TYPE_STRING>::create(tmp_const_column); |
97 | |
|
98 | 0 | _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data, sep_column, |
99 | 0 | null_replace_column, dest_chars, dest_offsets); |
100 | 0 | } |
101 | |
|
102 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
103 | 0 | return Status::OK(); |
104 | 0 | } |
105 | | |
106 | | private: |
107 | | // same as ColumnString::insert_data |
108 | | static void insert_to_chars(int64_t i, ColumnString::Chars& chars, uint32_t& total_size, |
109 | 0 | const char* pos, size_t length) { |
110 | 0 | const size_t old_size = chars.size(); |
111 | 0 | const size_t new_size = old_size + length; |
112 | |
|
113 | 0 | if (length) { |
114 | 0 | ColumnString::check_chars_length(new_size, i); |
115 | 0 | chars.resize(new_size); |
116 | 0 | memcpy(chars.data() + old_size, pos, length); |
117 | 0 | total_size += length; |
118 | 0 | } |
119 | 0 | } |
120 | | |
121 | | static void _fill_result_string(int64_t i, const StringRef& input_str, const StringRef& sep_str, |
122 | | ColumnString::Chars& dest_chars, uint32_t& total_size, |
123 | 0 | bool& is_first_elem) { |
124 | 0 | if (is_first_elem) { |
125 | 0 | insert_to_chars(i, dest_chars, total_size, input_str.data, input_str.size); |
126 | 0 | is_first_elem = false; |
127 | 0 | } else { |
128 | 0 | insert_to_chars(i, dest_chars, total_size, sep_str.data, sep_str.size); |
129 | 0 | insert_to_chars(i, dest_chars, total_size, input_str.data, input_str.size); |
130 | 0 | } |
131 | 0 | } |
132 | | |
133 | | static void _execute_string(const IColumn& src_column, |
134 | | const ColumnArray::Offsets64& src_offsets, |
135 | | const UInt8* src_null_map, ColumnView<TYPE_STRING>& sep_column, |
136 | | ColumnView<TYPE_STRING>& null_replace_column, |
137 | | ColumnString::Chars& dest_chars, |
138 | 0 | ColumnString::Offsets& dest_offsets) { |
139 | 0 | const auto& src_data = assert_cast<const ColumnString&>(src_column); |
140 | |
|
141 | 0 | uint32_t total_size = 0; |
142 | |
|
143 | 0 | for (int64_t i = 0; i < src_offsets.size(); ++i) { |
144 | 0 | auto begin = src_offsets[i - 1]; |
145 | 0 | auto end = src_offsets[i]; |
146 | |
|
147 | 0 | auto sep_str = sep_column.value_at(i); |
148 | 0 | auto null_replace_str = null_replace_column.value_at(i); |
149 | |
|
150 | 0 | bool is_first_elem = true; |
151 | |
|
152 | 0 | for (size_t j = begin; j < end; ++j) { |
153 | 0 | if (src_null_map && src_null_map[j]) { |
154 | 0 | if (null_replace_str.size != 0) { |
155 | 0 | _fill_result_string(i, null_replace_str, sep_str, dest_chars, total_size, |
156 | 0 | is_first_elem); |
157 | 0 | } |
158 | 0 | continue; |
159 | 0 | } |
160 | | |
161 | 0 | StringRef src_str_ref = src_data.get_data_at(j); |
162 | 0 | _fill_result_string(i, src_str_ref, sep_str, dest_chars, total_size, is_first_elem); |
163 | 0 | } |
164 | |
|
165 | 0 | dest_offsets[i] = total_size; |
166 | 0 | } |
167 | 0 | } |
168 | | }; |
169 | | |
170 | | #include "common/compile_check_end.h" |
171 | | } // namespace doris |