be/src/exprs/function/random.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <fmt/format.h> |
19 | | #include <glog/logging.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <boost/iterator/iterator_facade.hpp> |
23 | | #include <cstdint> |
24 | | #include <cstdlib> |
25 | | #include <memory> |
26 | | #include <random> |
27 | | #include <utility> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "core/assert_cast.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/block/column_numbers.h" |
33 | | #include "core/column/column.h" |
34 | | #include "core/column/column_vector.h" |
35 | | #include "core/data_type/data_type_number.h" |
36 | | #include "core/types.h" |
37 | | #include "exprs/aggregate/aggregate_function.h" |
38 | | #include "exprs/function/function.h" |
39 | | #include "exprs/function/simple_function_factory.h" |
40 | | #include "exprs/function_context.h" |
41 | | |
42 | | namespace doris { |
43 | | |
44 | | #include "common/compile_check_begin.h" |
45 | | class Random : public IFunction { |
46 | | public: |
47 | | static constexpr auto name = "random"; |
48 | | |
49 | 260 | static FunctionPtr create() { return std::make_shared<Random>(); } |
50 | | |
51 | 0 | String get_name() const override { return name; } |
52 | | |
53 | 1.23k | bool use_default_implementation_for_constants() const override { return false; } |
54 | | |
55 | 0 | size_t get_number_of_arguments() const override { return 0; } |
56 | | |
57 | 252 | bool is_variadic() const override { return true; } |
58 | | |
59 | 251 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
60 | 251 | if (arguments.size() == 2) { |
61 | 57 | return std::make_shared<DataTypeInt64>(); |
62 | 57 | } |
63 | 194 | return std::make_shared<DataTypeFloat64>(); |
64 | 251 | } |
65 | | |
66 | 2.77k | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
67 | 2.77k | std::shared_ptr<std::mt19937_64> generator(new std::mt19937_64()); |
68 | 2.77k | context->set_function_state(scope, generator); |
69 | 2.77k | if (scope == FunctionContext::THREAD_LOCAL) { |
70 | 2.52k | if (context->get_num_args() == 1) { |
71 | | // This is a call to RandSeed, initialize the seed |
72 | 48 | if (!context->is_col_constant(0)) { |
73 | 0 | return Status::InvalidArgument("The param of rand function must be literal"); |
74 | 0 | } |
75 | 48 | uint32_t seed = 0; |
76 | 48 | if (!context->get_constant_col(0)->column_ptr->is_null_at(0)) { |
77 | 47 | seed = (uint32_t)(*context->get_constant_col(0)->column_ptr)[0] |
78 | 47 | .get<TYPE_BIGINT>(); |
79 | 47 | } |
80 | 48 | generator->seed(seed); |
81 | 2.47k | } else if (context->get_num_args() == 2) { |
82 | 1.23k | if (!context->is_col_constant(0) || !context->is_col_constant(1)) { |
83 | 0 | return Status::InvalidArgument("The param of rand function must be literal"); |
84 | 0 | } |
85 | 1.22k | generator->seed(std::random_device()()); |
86 | 1.24k | } else { // zero args |
87 | 1.24k | generator->seed(std::random_device()()); |
88 | 1.24k | } |
89 | 2.52k | } |
90 | | |
91 | 2.77k | return Status::OK(); |
92 | 2.77k | } |
93 | | |
94 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
95 | 368 | uint32_t result, size_t input_rows_count) const override { |
96 | 368 | if (arguments.size() == 2) { |
97 | 132 | return _execute_int_range(context, block, arguments, result, input_rows_count); |
98 | 132 | } |
99 | 236 | return _execute_float(context, block, arguments, result, input_rows_count); |
100 | 368 | } |
101 | | |
102 | | private: |
103 | | static Status _execute_int_range(FunctionContext* context, Block& block, |
104 | | const ColumnNumbers& arguments, uint32_t result, |
105 | 132 | size_t input_rows_count) { |
106 | 132 | auto res_column = ColumnInt64::create(input_rows_count); |
107 | 132 | auto& res_data = static_cast<ColumnInt64&>(*res_column).get_data(); |
108 | | |
109 | 132 | auto* generator = reinterpret_cast<std::mt19937_64*>( |
110 | 132 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
111 | 132 | DCHECK(generator != nullptr); |
112 | | |
113 | | // checked in open() |
114 | 132 | Int64 min = assert_cast<const ColumnInt64*>( |
115 | 132 | assert_cast<const ColumnConst*>( |
116 | 132 | block.get_by_position(arguments[0]).column.get()) |
117 | 132 | ->get_data_column_ptr() |
118 | 132 | .get()) |
119 | 132 | ->get_element(0); |
120 | 132 | Int64 max = assert_cast<const ColumnInt64*>( |
121 | 132 | assert_cast<const ColumnConst*>( |
122 | 132 | block.get_by_position(arguments[1]).column.get()) |
123 | 132 | ->get_data_column_ptr() |
124 | 132 | .get()) |
125 | 132 | ->get_element(0); |
126 | 132 | if (min >= max) { |
127 | 1 | return Status::InvalidArgument(fmt::format( |
128 | 1 | "random's lower bound should less than upper bound, but got [{}, {})", min, |
129 | 1 | max)); |
130 | 1 | } |
131 | | |
132 | 131 | std::uniform_int_distribution<int64_t> distribution(min, max); |
133 | 364 | for (int i = 0; i < input_rows_count; i++) { |
134 | 233 | res_data[i] = distribution(*generator); |
135 | 233 | } |
136 | | |
137 | 131 | block.replace_by_position(result, std::move(res_column)); |
138 | 131 | return Status::OK(); |
139 | 132 | } |
140 | | |
141 | | static Status _execute_float(FunctionContext* context, Block& block, |
142 | | const ColumnNumbers& arguments, uint32_t result, |
143 | 236 | size_t input_rows_count) { |
144 | 236 | static const double min = 0.0; |
145 | 236 | static const double max = 1.0; |
146 | 236 | auto res_column = ColumnFloat64::create(input_rows_count); |
147 | 236 | auto& res_data = static_cast<ColumnFloat64&>(*res_column).get_data(); |
148 | | |
149 | 236 | auto* generator = reinterpret_cast<std::mt19937_64*>( |
150 | 236 | context->get_function_state(FunctionContext::THREAD_LOCAL)); |
151 | 236 | DCHECK(generator != nullptr); |
152 | | |
153 | 236 | std::uniform_real_distribution<double> distribution(min, max); |
154 | 1.04M | for (int i = 0; i < input_rows_count; i++) { |
155 | 1.04M | res_data[i] = distribution(*generator); |
156 | 1.04M | } |
157 | | |
158 | 236 | block.replace_by_position(result, std::move(res_column)); |
159 | 236 | return Status::OK(); |
160 | 236 | } |
161 | | }; |
162 | | |
163 | 8 | void register_function_random(SimpleFunctionFactory& factory) { |
164 | 8 | factory.register_function<Random>(); |
165 | 8 | factory.register_alias(Random::name, "rand"); |
166 | 8 | } |
167 | | #include "common/compile_check_end.h" |
168 | | } // namespace doris |