be/src/exprs/function/array/function_array_join.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #pragma once |
18 | | |
19 | | #include "core/block/block.h" |
20 | | #include "core/column/column_array.h" |
21 | | #include "core/column/column_array_view.h" |
22 | | #include "core/column/column_const.h" |
23 | | #include "core/column/column_execute_util.h" |
24 | | #include "core/data_type/data_type_array.h" |
25 | | #include "core/data_type/data_type_string.h" |
26 | | #include "core/string_ref.h" |
27 | | |
28 | | namespace doris { |
29 | | |
30 | | struct NameArrayJoin { |
31 | | static constexpr auto name = "array_join"; |
32 | | }; |
33 | | |
34 | | struct ArrayJoinImpl { |
35 | | public: |
36 | | using column_type = ColumnArray; |
37 | | using NullMapType = PaddedPODArray<UInt8>; |
38 | | |
39 | 1 | static bool _is_variadic() { return true; } |
40 | | |
41 | 0 | static size_t _get_number_of_arguments() { return 0; } |
42 | | |
43 | 0 | static DataTypePtr get_return_type(const DataTypes& arguments) { |
44 | 0 | DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY) |
45 | 0 | << "first argument for function: array_join should be DataTypeArray" |
46 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
47 | 0 | DCHECK(is_string_type(arguments[1]->get_primitive_type())) |
48 | 0 | << "second argument for function: array_join should be DataTypeString" |
49 | 0 | << ", and arguments[1] is " << arguments[1]->get_name(); |
50 | 0 | if (arguments.size() > 2) { |
51 | 0 | DCHECK(is_string_type(arguments[2]->get_primitive_type())) |
52 | 0 | << "third argument for function: array_join should be DataTypeString" |
53 | 0 | << ", and arguments[2] is " << arguments[2]->get_name(); |
54 | 0 | } |
55 | |
|
56 | 0 | return std::make_shared<DataTypeString>(); |
57 | 0 | } |
58 | | |
59 | | static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
60 | 0 | const DataTypeArray* data_type_array, const ColumnArray& array) { |
61 | 0 | ColumnPtr src_column = block.get_by_position(arguments[0]).column; |
62 | 0 | auto array_view = ColumnArrayView<TYPE_STRING>::create(src_column); |
63 | |
|
64 | 0 | auto dest_column_ptr = ColumnString::create(); |
65 | |
|
66 | 0 | auto& dest_chars = dest_column_ptr->get_chars(); |
67 | 0 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
68 | |
|
69 | 0 | dest_offsets.resize_fill(array_view.size(), 0); |
70 | |
|
71 | 0 | auto sep_column = |
72 | 0 | ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[1]).column); |
73 | |
|
74 | 0 | if (arguments.size() > 2) { |
75 | 0 | auto null_replace_column = |
76 | 0 | ColumnView<TYPE_STRING>::create(block.get_by_position(arguments[2]).column); |
77 | |
|
78 | 0 | _execute_string(array_view, sep_column, null_replace_column, dest_chars, dest_offsets); |
79 | |
|
80 | 0 | } else { |
81 | 0 | auto tmp_column_string = ColumnString::create(); |
82 | | // insert default value for null replacement, which is empty string |
83 | 0 | tmp_column_string->insert_default(); |
84 | 0 | ColumnPtr tmp_const_column = |
85 | 0 | ColumnConst::create(std::move(tmp_column_string), sep_column.size()); |
86 | |
|
87 | 0 | auto null_replace_column = ColumnView<TYPE_STRING>::create(tmp_const_column); |
88 | |
|
89 | 0 | _execute_string(array_view, sep_column, null_replace_column, dest_chars, dest_offsets); |
90 | 0 | } |
91 | |
|
92 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
93 | 0 | return Status::OK(); |
94 | 0 | } |
95 | | |
96 | | private: |
97 | | // same as ColumnString::insert_data |
98 | | static void insert_to_chars(int64_t i, ColumnString::Chars& chars, uint32_t& total_size, |
99 | 0 | const char* pos, size_t length) { |
100 | 0 | const size_t old_size = chars.size(); |
101 | 0 | const size_t new_size = old_size + length; |
102 | |
|
103 | 0 | if (length) { |
104 | 0 | ColumnString::check_chars_length(new_size, i); |
105 | 0 | chars.resize(new_size); |
106 | 0 | memcpy(chars.data() + old_size, pos, length); |
107 | 0 | total_size += length; |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | static void _fill_result_string(int64_t i, const StringRef& input_str, const StringRef& sep_str, |
112 | | ColumnString::Chars& dest_chars, uint32_t& total_size, |
113 | 0 | bool& is_first_elem) { |
114 | 0 | if (is_first_elem) { |
115 | 0 | insert_to_chars(i, dest_chars, total_size, input_str.data, input_str.size); |
116 | 0 | is_first_elem = false; |
117 | 0 | } else { |
118 | 0 | insert_to_chars(i, dest_chars, total_size, sep_str.data, sep_str.size); |
119 | 0 | insert_to_chars(i, dest_chars, total_size, input_str.data, input_str.size); |
120 | 0 | } |
121 | 0 | } |
122 | | |
123 | | static void _execute_string(const ColumnArrayView<TYPE_STRING>& array_view, |
124 | | ColumnView<TYPE_STRING>& sep_column, |
125 | | ColumnView<TYPE_STRING>& null_replace_column, |
126 | | ColumnString::Chars& dest_chars, |
127 | 0 | ColumnString::Offsets& dest_offsets) { |
128 | 0 | uint32_t total_size = 0; |
129 | |
|
130 | 0 | for (int64_t i = 0; i < array_view.size(); ++i) { |
131 | 0 | auto arr = array_view[i]; |
132 | |
|
133 | 0 | auto sep_str = sep_column.value_at(i); |
134 | 0 | auto null_replace_str = null_replace_column.value_at(i); |
135 | |
|
136 | 0 | bool is_first_elem = true; |
137 | |
|
138 | 0 | for (size_t j = 0; j < arr.size(); ++j) { |
139 | 0 | if (arr.is_null_at(j)) { |
140 | 0 | if (null_replace_str.size != 0) { |
141 | 0 | _fill_result_string(i, null_replace_str, sep_str, dest_chars, total_size, |
142 | 0 | is_first_elem); |
143 | 0 | } |
144 | 0 | continue; |
145 | 0 | } |
146 | | |
147 | 0 | StringRef src_str_ref = arr.value_at(j); |
148 | 0 | _fill_result_string(i, src_str_ref, sep_str, dest_chars, total_size, is_first_elem); |
149 | 0 | } |
150 | |
|
151 | 0 | dest_offsets[i] = total_size; |
152 | 0 | } |
153 | 0 | } |
154 | | }; |
155 | | |
156 | | } // namespace doris |