be/src/exprs/function/array/function_array_shuffle.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #include <fmt/format.h> |
18 | | #include <glog/logging.h> |
19 | | #include <stdint.h> |
20 | | #include <time.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <memory> |
24 | | #include <ostream> |
25 | | #include <random> |
26 | | #include <string> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "core/assert_cast.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/block/column_numbers.h" |
33 | | #include "core/block/column_with_type_and_name.h" |
34 | | #include "core/column/column.h" |
35 | | #include "core/column/column_array.h" |
36 | | #include "core/data_type/data_type.h" |
37 | | #include "core/types.h" |
38 | | #include "exprs/aggregate/aggregate_function.h" |
39 | | #include "exprs/function/function.h" |
40 | | #include "exprs/function/simple_function_factory.h" |
41 | | |
42 | | namespace doris { |
43 | | class FunctionContext; |
44 | | } // namespace doris |
45 | | |
46 | | namespace doris { |
47 | | |
48 | | class FunctionArrayShuffle : public IFunction { |
49 | | public: |
50 | | static constexpr auto name = "array_shuffle"; |
51 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArrayShuffle>(); } |
52 | | |
53 | | /// Get function name. |
54 | 0 | String get_name() const override { return name; } |
55 | | |
56 | 1 | bool is_variadic() const override { return true; } |
57 | | |
58 | 0 | size_t get_number_of_arguments() const override { return 1; } |
59 | | |
60 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
61 | 0 | DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY) |
62 | 0 | << "first argument for function: " << name << " should be DataTypeArray" |
63 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
64 | 0 | return arguments[0]; |
65 | 0 | } |
66 | | |
67 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
68 | 0 | uint32_t result, size_t input_rows_count) const override { |
69 | 0 | ColumnPtr src_column = |
70 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
71 | 0 | const auto& src_column_array = assert_cast<const ColumnArray&>(*src_column); |
72 | |
|
73 | 0 | size_t seed = time(nullptr); |
74 | 0 | if (arguments.size() == 2) { |
75 | 0 | ColumnPtr seed_column = |
76 | 0 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
77 | 0 | seed = assert_cast<const ColumnInt64*>(seed_column.get())->get_element(0); |
78 | 0 | } |
79 | | |
80 | | // time() and seed will not exceed the range of uint32. |
81 | 0 | std::mt19937 g(cast_set<uint32_t>(seed)); |
82 | 0 | auto dest_column_ptr = _execute(src_column_array, g); |
83 | 0 | if (!dest_column_ptr) { |
84 | 0 | return Status::RuntimeError( |
85 | 0 | fmt::format("execute failed or unsupported types for function {}({})", |
86 | 0 | get_name(), block.get_by_position(arguments[0]).type->get_name())); |
87 | 0 | } |
88 | | |
89 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
90 | 0 | return Status::OK(); |
91 | 0 | } |
92 | | |
93 | | private: |
94 | 0 | ColumnPtr _execute(const ColumnArray& src_column_array, std::mt19937& g) const { |
95 | 0 | const auto& src_offsets = src_column_array.get_offsets(); |
96 | 0 | const auto src_nested_column = src_column_array.get_data_ptr(); |
97 | |
|
98 | 0 | ColumnArray::Offset64 src_offsets_size = src_offsets.size(); |
99 | 0 | IColumn::Permutation permutation(src_nested_column->size()); |
100 | |
|
101 | 0 | for (size_t i = 0; i < src_nested_column->size(); ++i) { |
102 | 0 | permutation[i] = i; |
103 | 0 | } |
104 | |
|
105 | 0 | for (size_t i = 0; i < src_offsets_size; ++i) { |
106 | 0 | auto last_offset = src_offsets[i - 1]; |
107 | 0 | auto src_offset = src_offsets[i]; |
108 | |
|
109 | 0 | std::shuffle(&permutation[last_offset], &permutation[src_offset], g); |
110 | 0 | } |
111 | 0 | return ColumnArray::create(src_nested_column->permute(permutation, 0), |
112 | 0 | src_column_array.get_offsets_ptr()); |
113 | 0 | } |
114 | | }; |
115 | | |
116 | 1 | void register_function_array_shuffle(SimpleFunctionFactory& factory) { |
117 | 1 | factory.register_function<FunctionArrayShuffle>(); |
118 | 1 | factory.register_alias("array_shuffle", "shuffle"); |
119 | 1 | } |
120 | | |
121 | | } // namespace doris |