be/src/exprs/function/array/function_array_compact.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <fmt/format.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | |
24 | | #include <memory> |
25 | | #include <ostream> |
26 | | #include <string> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "core/assert_cast.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/block/column_numbers.h" |
33 | | #include "core/block/column_with_type_and_name.h" |
34 | | #include "core/column/column.h" |
35 | | #include "core/column/column_array.h" |
36 | | #include "core/data_type/data_type.h" |
37 | | #include "core/data_type/data_type_array.h" |
38 | | #include "core/pod_array_fwd.h" |
39 | | #include "core/types.h" |
40 | | #include "exprs/function/function.h" |
41 | | |
42 | | namespace doris { |
43 | | class FunctionContext; |
44 | | } // namespace doris |
45 | | |
46 | | namespace doris { |
47 | | |
48 | | class FunctionArrayCompact : public IFunction { |
49 | | public: |
50 | | static constexpr auto name = "array_compact"; |
51 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArrayCompact>(); } |
52 | | using NullMapType = PaddedPODArray<UInt8>; |
53 | | |
54 | | /// Get function name. |
55 | 1 | String get_name() const override { return name; } |
56 | | |
57 | 1 | bool is_variadic() const override { return false; } |
58 | | |
59 | 0 | size_t get_number_of_arguments() const override { return 1; } |
60 | | |
61 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
62 | 0 | DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY) |
63 | 0 | << "first argument for function: " << name << " should be DataTypeArray" |
64 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
65 | 0 | return arguments[0]; |
66 | 0 | } |
67 | | |
68 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
69 | 0 | uint32_t result, size_t input_rows_count) const override { |
70 | 0 | ColumnPtr src_column = |
71 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
72 | 0 | const auto& src_column_array = check_and_get_column<ColumnArray>(*src_column); |
73 | 0 | if (!src_column_array) { |
74 | 0 | return Status::RuntimeError( |
75 | 0 | fmt::format("unsupported types for function {}({})", get_name(), |
76 | 0 | block.get_by_position(arguments[0]).type->get_name())); |
77 | 0 | } |
78 | 0 | const auto& src_offsets = src_column_array->get_offsets(); |
79 | 0 | const auto* src_nested_column = &src_column_array->get_data(); |
80 | 0 | DCHECK(src_nested_column != nullptr); |
81 | |
|
82 | 0 | DataTypePtr src_column_type = block.get_by_position(arguments[0]).type; |
83 | 0 | auto nested_type = assert_cast<const DataTypeArray&>(*src_column_type).get_nested_type(); |
84 | 0 | auto dest_column_ptr = ColumnArray::create(nested_type->create_column(), |
85 | 0 | ColumnArray::ColumnOffsets::create()); |
86 | 0 | IColumn* dest_nested_column = &dest_column_ptr->get_data(); |
87 | 0 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
88 | 0 | DCHECK(dest_nested_column != nullptr); |
89 | |
|
90 | 0 | auto res_val = _execute(*src_nested_column, src_offsets, *dest_nested_column, dest_offsets); |
91 | 0 | if (!res_val) { |
92 | 0 | return Status::RuntimeError( |
93 | 0 | fmt::format("execute failed or unsupported types for function {}({})", |
94 | 0 | get_name(), block.get_by_position(arguments[0]).type->get_name())); |
95 | 0 | } |
96 | | |
97 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
98 | 0 | return Status::OK(); |
99 | 0 | } |
100 | | |
101 | | private: |
102 | | bool _execute(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, |
103 | 0 | IColumn& dest_column, ColumnArray::Offsets64& dest_offsets) const { |
104 | 0 | ColumnArray::Offset64 src_offsets_size = src_offsets.size(); |
105 | 0 | ColumnArray::Offset64 src_pos = 0; |
106 | 0 | ColumnArray::Offset64 dest_pos = 0; |
107 | |
|
108 | 0 | for (size_t i = 0; i < src_offsets_size; ++i) { |
109 | 0 | auto src_offset = src_offsets[i]; |
110 | 0 | if (src_pos < src_offset) { |
111 | | // Insert first element |
112 | 0 | dest_column.insert_from(src_column, src_pos); |
113 | |
|
114 | 0 | ++src_pos; |
115 | 0 | ++dest_pos; |
116 | | |
117 | | // For the rest of elements, insert if the element is different from the previous. |
118 | 0 | for (; src_pos < src_offset; ++src_pos) { |
119 | 0 | if (0 != (src_column.compare_at(src_pos - 1, src_pos, src_column, 1))) { |
120 | 0 | dest_column.insert_from(src_column, src_pos); |
121 | 0 | ++dest_pos; |
122 | 0 | } |
123 | 0 | } |
124 | 0 | } |
125 | 0 | dest_offsets.push_back(dest_pos); |
126 | 0 | } |
127 | 0 | return true; |
128 | 0 | } |
129 | | }; |
130 | | |
131 | | } // namespace doris |