be/src/exprs/function/array/function_array_shuffle.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #include <fmt/format.h> |
18 | | #include <glog/logging.h> |
19 | | #include <stdint.h> |
20 | | #include <time.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <memory> |
24 | | #include <ostream> |
25 | | #include <random> |
26 | | #include <string> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "core/assert_cast.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/block/column_numbers.h" |
33 | | #include "core/block/column_with_type_and_name.h" |
34 | | #include "core/column/column.h" |
35 | | #include "core/column/column_array.h" |
36 | | #include "core/data_type/data_type.h" |
37 | | #include "core/types.h" |
38 | | #include "exprs/aggregate/aggregate_function.h" |
39 | | #include "exprs/function/function.h" |
40 | | #include "exprs/function/simple_function_factory.h" |
41 | | |
42 | | namespace doris { |
43 | | class FunctionContext; |
44 | | } // namespace doris |
45 | | |
46 | | namespace doris { |
47 | | #include "common/compile_check_begin.h" |
48 | | |
49 | | class FunctionArrayShuffle : public IFunction { |
50 | | public: |
51 | | static constexpr auto name = "array_shuffle"; |
52 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArrayShuffle>(); } |
53 | | |
54 | | /// Get function name. |
55 | 0 | String get_name() const override { return name; } |
56 | | |
57 | 1 | bool is_variadic() const override { return true; } |
58 | | |
59 | 0 | size_t get_number_of_arguments() const override { return 1; } |
60 | | |
61 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
62 | 0 | DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY) |
63 | 0 | << "first argument for function: " << name << " should be DataTypeArray" |
64 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
65 | 0 | return arguments[0]; |
66 | 0 | } |
67 | | |
68 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
69 | 0 | uint32_t result, size_t input_rows_count) const override { |
70 | 0 | ColumnPtr src_column = |
71 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
72 | 0 | const auto& src_column_array = assert_cast<const ColumnArray&>(*src_column); |
73 | |
|
74 | 0 | size_t seed = time(nullptr); |
75 | 0 | if (arguments.size() == 2) { |
76 | 0 | ColumnPtr seed_column = |
77 | 0 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
78 | 0 | seed = assert_cast<const ColumnInt64*>(seed_column.get())->get_element(0); |
79 | 0 | } |
80 | | |
81 | | // time() and seed will not exceed the range of uint32. |
82 | 0 | std::mt19937 g(cast_set<uint32_t>(seed)); |
83 | 0 | auto dest_column_ptr = _execute(src_column_array, g); |
84 | 0 | if (!dest_column_ptr) { |
85 | 0 | return Status::RuntimeError( |
86 | 0 | fmt::format("execute failed or unsupported types for function {}({})", |
87 | 0 | get_name(), block.get_by_position(arguments[0]).type->get_name())); |
88 | 0 | } |
89 | | |
90 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
91 | 0 | return Status::OK(); |
92 | 0 | } |
93 | | |
94 | | private: |
95 | 0 | ColumnPtr _execute(const ColumnArray& src_column_array, std::mt19937& g) const { |
96 | 0 | const auto& src_offsets = src_column_array.get_offsets(); |
97 | 0 | const auto src_nested_column = src_column_array.get_data_ptr(); |
98 | |
|
99 | 0 | ColumnArray::Offset64 src_offsets_size = src_offsets.size(); |
100 | 0 | IColumn::Permutation permutation(src_nested_column->size()); |
101 | |
|
102 | 0 | for (size_t i = 0; i < src_nested_column->size(); ++i) { |
103 | 0 | permutation[i] = i; |
104 | 0 | } |
105 | |
|
106 | 0 | for (size_t i = 0; i < src_offsets_size; ++i) { |
107 | 0 | auto last_offset = src_offsets[i - 1]; |
108 | 0 | auto src_offset = src_offsets[i]; |
109 | |
|
110 | 0 | std::shuffle(&permutation[last_offset], &permutation[src_offset], g); |
111 | 0 | } |
112 | 0 | return ColumnArray::create(src_nested_column->permute(permutation, 0), |
113 | 0 | src_column_array.get_offsets_ptr()); |
114 | 0 | } |
115 | | }; |
116 | | |
117 | 1 | void register_function_array_shuffle(SimpleFunctionFactory& factory) { |
118 | 1 | factory.register_function<FunctionArrayShuffle>(); |
119 | 1 | factory.register_alias("array_shuffle", "shuffle"); |
120 | 1 | } |
121 | | |
122 | | } // namespace doris |