be/src/exprs/aggregate/aggregate_function_sequence_match.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | // This file is copied from |
19 | | // https://github.com/ClickHouse/ClickHouse/blob/master/AggregateFunctionSequenceMatch.h |
20 | | // and modified by Doris |
21 | | |
22 | | #pragma once |
23 | | |
24 | | #include <string.h> |
25 | | |
26 | | #include <algorithm> |
27 | | #include <bitset> |
28 | | #include <boost/iterator/iterator_facade.hpp> |
29 | | #include <cctype> |
30 | | #include <cstdint> |
31 | | #include <functional> |
32 | | #include <iterator> |
33 | | #include <memory> |
34 | | #include <stack> |
35 | | #include <string> |
36 | | #include <tuple> |
37 | | #include <utility> |
38 | | #include <vector> |
39 | | |
40 | | #include "common/logging.h" |
41 | | #include "core/assert_cast.h" |
42 | | #include "core/column/column_string.h" |
43 | | #include "core/column/column_vector.h" |
44 | | #include "core/data_type/data_type_number.h" |
45 | | #include "core/pod_array_fwd.h" |
46 | | #include "core/string_ref.h" |
47 | | #include "core/types.h" |
48 | | #include "exprs/aggregate/aggregate_function.h" |
49 | | #include "util/string_parser.hpp" |
50 | | |
51 | | namespace doris { |
52 | | class Arena; |
53 | | class BufferReadable; |
54 | | class BufferWritable; |
55 | | class IColumn; |
56 | | } // namespace doris |
57 | | |
58 | | namespace doris { |
59 | | |
60 | | template <template <typename> class Comparator> |
61 | | struct ComparePairFirst final { |
62 | | template <typename T1, typename T2> |
63 | 38 | bool operator()(const std::pair<T1, T2>& lhs, const std::pair<T1, T2>& rhs) const { |
64 | 38 | return Comparator<T1> {}(lhs.first, rhs.first); |
65 | 38 | } _ZNK5doris16ComparePairFirstISt4lessEclINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEEEbRKSt4pairIT_T0_ESE_ Line | Count | Source | 63 | 38 | bool operator()(const std::pair<T1, T2>& lhs, const std::pair<T1, T2>& rhs) const { | 64 | 38 | return Comparator<T1> {}(lhs.first, rhs.first); | 65 | 38 | } |
Unexecuted instantiation: _ZNK5doris16ComparePairFirstISt4lessEclINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEEEbRKSt4pairIT_T0_ESE_ |
66 | | }; |
67 | | |
68 | | static constexpr size_t MAX_EVENTS = 32; |
69 | | |
70 | | /// Max number of iterations to match the pattern against a sequence, exception thrown when exceeded |
71 | | constexpr auto sequence_match_max_iterations = 1000000l; |
72 | | |
73 | | template <PrimitiveType T, typename Derived> |
74 | | struct AggregateFunctionSequenceMatchData final { |
75 | | using Timestamp = typename PrimitiveTypeTraits<T>::CppType; |
76 | | using NativeType = |
77 | | std::conditional_t<T == TYPE_DATEV2, uint32_t, |
78 | | std::conditional_t<T == TYPE_DATETIMEV2, uint64_t, |
79 | | typename PrimitiveTypeTraits<T>::CppType>>; |
80 | | using Events = std::bitset<MAX_EVENTS>; |
81 | | using TimestampEvents = std::pair<Timestamp, Events>; |
82 | | using Comparator = ComparePairFirst<std::less>; |
83 | | |
84 | 17 | AggregateFunctionSequenceMatchData() { reset(); }_ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEEC2Ev Line | Count | Source | 84 | 10 | AggregateFunctionSequenceMatchData() { reset(); } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEEC2Ev _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEEC2Ev Line | Count | Source | 84 | 7 | AggregateFunctionSequenceMatchData() { reset(); } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEEC2Ev |
85 | | |
86 | | public: |
87 | 11 | const std::string get_pattern() const { return pattern; }_ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE11get_patternB5cxx11Ev Line | Count | Source | 87 | 6 | const std::string get_pattern() const { return pattern; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE11get_patternB5cxx11Ev _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE11get_patternB5cxx11Ev Line | Count | Source | 87 | 5 | const std::string get_pattern() const { return pattern; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE11get_patternB5cxx11Ev |
88 | | |
89 | 11 | size_t get_arg_count() const { return arg_count; }_ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13get_arg_countEv Line | Count | Source | 89 | 6 | size_t get_arg_count() const { return arg_count; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13get_arg_countEv _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13get_arg_countEv Line | Count | Source | 89 | 5 | size_t get_arg_count() const { return arg_count; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13get_arg_countEv |
90 | | |
91 | 32 | void init(const std::string pattern_, size_t arg_count_) { |
92 | 32 | if (!init_flag) { |
93 | 15 | this->pattern = pattern_; |
94 | 15 | this->arg_count = arg_count_; |
95 | 15 | parse_pattern(); |
96 | 15 | init_flag = true; |
97 | 15 | } |
98 | 32 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 91 | 19 | void init(const std::string pattern_, size_t arg_count_) { | 92 | 19 | if (!init_flag) { | 93 | 9 | this->pattern = pattern_; | 94 | 9 | this->arg_count = arg_count_; | 95 | 9 | parse_pattern(); | 96 | 9 | init_flag = true; | 97 | 9 | } | 98 | 19 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 91 | 13 | void init(const std::string pattern_, size_t arg_count_) { | 92 | 13 | if (!init_flag) { | 93 | 6 | this->pattern = pattern_; | 94 | 6 | this->arg_count = arg_count_; | 95 | 6 | parse_pattern(); | 96 | 6 | init_flag = true; | 97 | 6 | } | 98 | 13 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm |
99 | | |
100 | 17 | void reset() { |
101 | 17 | sorted = true; |
102 | 17 | init_flag = false; |
103 | 17 | pattern_has_time = false; |
104 | 17 | pattern = ""; |
105 | 17 | arg_count = 0; |
106 | 17 | conditions_met.reset(); |
107 | 17 | conditions_in_pattern.reset(); |
108 | | |
109 | 17 | events_list.clear(); |
110 | 17 | actions.clear(); |
111 | 17 | dfa_states.clear(); |
112 | 17 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5resetEv Line | Count | Source | 100 | 10 | void reset() { | 101 | 10 | sorted = true; | 102 | 10 | init_flag = false; | 103 | 10 | pattern_has_time = false; | 104 | 10 | pattern = ""; | 105 | 10 | arg_count = 0; | 106 | 10 | conditions_met.reset(); | 107 | 10 | conditions_in_pattern.reset(); | 108 | | | 109 | 10 | events_list.clear(); | 110 | 10 | actions.clear(); | 111 | 10 | dfa_states.clear(); | 112 | 10 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5resetEv _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5resetEv Line | Count | Source | 100 | 7 | void reset() { | 101 | 7 | sorted = true; | 102 | 7 | init_flag = false; | 103 | 7 | pattern_has_time = false; | 104 | 7 | pattern = ""; | 105 | 7 | arg_count = 0; | 106 | 7 | conditions_met.reset(); | 107 | 7 | conditions_in_pattern.reset(); | 108 | | | 109 | 7 | events_list.clear(); | 110 | 7 | actions.clear(); | 111 | 7 | dfa_states.clear(); | 112 | 7 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5resetEv |
113 | | |
114 | 21 | void add(const Timestamp& timestamp, const Events& events) { |
115 | | /// store information exclusively for rows with at least one event |
116 | 21 | if (events.any()) { |
117 | 18 | events_list.emplace_back(timestamp, events); |
118 | 18 | sorted = false; |
119 | 18 | conditions_met |= events; |
120 | 18 | } |
121 | 21 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE3addERKNS_11DateV2ValueINS_19DateTimeV2ValueTypeEEERKSt6bitsetILm32EE Line | Count | Source | 114 | 13 | void add(const Timestamp& timestamp, const Events& events) { | 115 | | /// store information exclusively for rows with at least one event | 116 | 13 | if (events.any()) { | 117 | 10 | events_list.emplace_back(timestamp, events); | 118 | 10 | sorted = false; | 119 | 10 | conditions_met |= events; | 120 | 10 | } | 121 | 13 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE3addERKNS_11DateV2ValueINS_15DateV2ValueTypeEEERKSt6bitsetILm32EE _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE3addERKNS_11DateV2ValueINS_19DateTimeV2ValueTypeEEERKSt6bitsetILm32EE Line | Count | Source | 114 | 8 | void add(const Timestamp& timestamp, const Events& events) { | 115 | | /// store information exclusively for rows with at least one event | 116 | 8 | if (events.any()) { | 117 | 8 | events_list.emplace_back(timestamp, events); | 118 | 8 | sorted = false; | 119 | 8 | conditions_met |= events; | 120 | 8 | } | 121 | 8 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE3addERKNS_11DateV2ValueINS_15DateV2ValueTypeEEERKSt6bitsetILm32EE |
122 | | |
123 | 4 | void merge(const AggregateFunctionSequenceMatchData& other) { |
124 | 4 | if (other.events_list.empty()) return; |
125 | | |
126 | 2 | events_list.insert(std::end(events_list), std::begin(other.events_list), |
127 | 2 | std::end(other.events_list)); |
128 | 2 | sorted = false; |
129 | 2 | conditions_met |= other.conditions_met; |
130 | 2 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5mergeERKS4_ Line | Count | Source | 123 | 2 | void merge(const AggregateFunctionSequenceMatchData& other) { | 124 | 2 | if (other.events_list.empty()) return; | 125 | | | 126 | 1 | events_list.insert(std::end(events_list), std::begin(other.events_list), | 127 | 1 | std::end(other.events_list)); | 128 | 1 | sorted = false; | 129 | 1 | conditions_met |= other.conditions_met; | 130 | 1 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5mergeERKS4_ _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5mergeERKS4_ Line | Count | Source | 123 | 2 | void merge(const AggregateFunctionSequenceMatchData& other) { | 124 | 2 | if (other.events_list.empty()) return; | 125 | | | 126 | 1 | events_list.insert(std::end(events_list), std::begin(other.events_list), | 127 | 1 | std::end(other.events_list)); | 128 | 1 | sorted = false; | 129 | 1 | conditions_met |= other.conditions_met; | 130 | 1 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5mergeERKS4_ |
131 | | |
132 | | // todo: rethink the sort method. |
133 | 9 | void sort() { |
134 | 9 | if (sorted) return; |
135 | | |
136 | 9 | std::sort(std::begin(events_list), std::end(events_list), Comparator {}); |
137 | 9 | sorted = true; |
138 | 9 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE4sortEv Line | Count | Source | 133 | 5 | void sort() { | 134 | 5 | if (sorted) return; | 135 | | | 136 | 5 | std::sort(std::begin(events_list), std::end(events_list), Comparator {}); | 137 | 5 | sorted = true; | 138 | 5 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE4sortEv _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE4sortEv Line | Count | Source | 133 | 4 | void sort() { | 134 | 4 | if (sorted) return; | 135 | | | 136 | 4 | std::sort(std::begin(events_list), std::end(events_list), Comparator {}); | 137 | 4 | sorted = true; | 138 | 4 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE4sortEv |
139 | | |
140 | 7 | void write(BufferWritable& buf) const { |
141 | 7 | buf.write_binary(sorted); |
142 | 7 | buf.write_binary(events_list.size()); |
143 | | |
144 | 13 | for (const auto& events : events_list) { |
145 | 13 | buf.write_binary(events.first); |
146 | 13 | buf.write_binary(events.second.to_ulong()); |
147 | 13 | } |
148 | | |
149 | | // This is std::bitset<32>, which will not exceed 32 bits. |
150 | 7 | UInt32 conditions_met_value = (UInt32)conditions_met.to_ulong(); |
151 | 7 | buf.write_binary(conditions_met_value); |
152 | | |
153 | 7 | buf.write_binary(pattern); |
154 | 7 | buf.write_binary(arg_count); |
155 | 7 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5writeERNS_14BufferWritableE Line | Count | Source | 140 | 4 | void write(BufferWritable& buf) const { | 141 | 4 | buf.write_binary(sorted); | 142 | 4 | buf.write_binary(events_list.size()); | 143 | | | 144 | 7 | for (const auto& events : events_list) { | 145 | 7 | buf.write_binary(events.first); | 146 | 7 | buf.write_binary(events.second.to_ulong()); | 147 | 7 | } | 148 | | | 149 | | // This is std::bitset<32>, which will not exceed 32 bits. | 150 | 4 | UInt32 conditions_met_value = (UInt32)conditions_met.to_ulong(); | 151 | 4 | buf.write_binary(conditions_met_value); | 152 | | | 153 | 4 | buf.write_binary(pattern); | 154 | 4 | buf.write_binary(arg_count); | 155 | 4 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5writeERNS_14BufferWritableE _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5writeERNS_14BufferWritableE Line | Count | Source | 140 | 3 | void write(BufferWritable& buf) const { | 141 | 3 | buf.write_binary(sorted); | 142 | 3 | buf.write_binary(events_list.size()); | 143 | | | 144 | 6 | for (const auto& events : events_list) { | 145 | 6 | buf.write_binary(events.first); | 146 | 6 | buf.write_binary(events.second.to_ulong()); | 147 | 6 | } | 148 | | | 149 | | // This is std::bitset<32>, which will not exceed 32 bits. | 150 | 3 | UInt32 conditions_met_value = (UInt32)conditions_met.to_ulong(); | 151 | 3 | buf.write_binary(conditions_met_value); | 152 | | | 153 | 3 | buf.write_binary(pattern); | 154 | 3 | buf.write_binary(arg_count); | 155 | 3 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5writeERNS_14BufferWritableE |
156 | | |
157 | 7 | void read(BufferReadable& buf) { |
158 | 7 | buf.read_binary(sorted); |
159 | | |
160 | 7 | size_t events_list_size; |
161 | 7 | buf.read_binary(events_list_size); |
162 | | |
163 | 7 | events_list.clear(); |
164 | 7 | events_list.reserve(events_list_size); |
165 | | |
166 | 20 | for (size_t i = 0; i < events_list_size; ++i) { |
167 | 13 | Timestamp timestamp; |
168 | 13 | buf.read_binary(timestamp); |
169 | | |
170 | 13 | UInt64 events; |
171 | 13 | buf.read_binary(events); |
172 | | |
173 | 13 | events_list.emplace_back(timestamp, Events {events}); |
174 | 13 | } |
175 | | |
176 | 7 | UInt32 conditions_met_value; |
177 | 7 | buf.read_binary(conditions_met_value); |
178 | 7 | conditions_met = conditions_met_value; |
179 | | |
180 | 7 | buf.read_binary(pattern); |
181 | 7 | buf.read_binary(arg_count); |
182 | 7 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE4readERNS_14BufferReadableE Line | Count | Source | 157 | 4 | void read(BufferReadable& buf) { | 158 | 4 | buf.read_binary(sorted); | 159 | | | 160 | 4 | size_t events_list_size; | 161 | 4 | buf.read_binary(events_list_size); | 162 | | | 163 | 4 | events_list.clear(); | 164 | 4 | events_list.reserve(events_list_size); | 165 | | | 166 | 11 | for (size_t i = 0; i < events_list_size; ++i) { | 167 | 7 | Timestamp timestamp; | 168 | 7 | buf.read_binary(timestamp); | 169 | | | 170 | 7 | UInt64 events; | 171 | 7 | buf.read_binary(events); | 172 | | | 173 | 7 | events_list.emplace_back(timestamp, Events {events}); | 174 | 7 | } | 175 | | | 176 | 4 | UInt32 conditions_met_value; | 177 | 4 | buf.read_binary(conditions_met_value); | 178 | 4 | conditions_met = conditions_met_value; | 179 | | | 180 | 4 | buf.read_binary(pattern); | 181 | 4 | buf.read_binary(arg_count); | 182 | 4 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE4readERNS_14BufferReadableE _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE4readERNS_14BufferReadableE Line | Count | Source | 157 | 3 | void read(BufferReadable& buf) { | 158 | 3 | buf.read_binary(sorted); | 159 | | | 160 | 3 | size_t events_list_size; | 161 | 3 | buf.read_binary(events_list_size); | 162 | | | 163 | 3 | events_list.clear(); | 164 | 3 | events_list.reserve(events_list_size); | 165 | | | 166 | 9 | for (size_t i = 0; i < events_list_size; ++i) { | 167 | 6 | Timestamp timestamp; | 168 | 6 | buf.read_binary(timestamp); | 169 | | | 170 | 6 | UInt64 events; | 171 | 6 | buf.read_binary(events); | 172 | | | 173 | 6 | events_list.emplace_back(timestamp, Events {events}); | 174 | 6 | } | 175 | | | 176 | 3 | UInt32 conditions_met_value; | 177 | 3 | buf.read_binary(conditions_met_value); | 178 | 3 | conditions_met = conditions_met_value; | 179 | | | 180 | 3 | buf.read_binary(pattern); | 181 | 3 | buf.read_binary(arg_count); | 182 | 3 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE4readERNS_14BufferReadableE |
183 | | |
184 | | private: |
185 | | enum class PatternActionType { |
186 | | SpecificEvent, |
187 | | AnyEvent, |
188 | | KleeneStar, |
189 | | TimeLessOrEqual, |
190 | | TimeLess, |
191 | | TimeGreaterOrEqual, |
192 | | TimeGreater, |
193 | | TimeEqual |
194 | | }; |
195 | | |
196 | | struct PatternAction final { |
197 | | PatternActionType type; |
198 | | std::uint64_t extra; |
199 | | |
200 | | PatternAction() = default; |
201 | | explicit PatternAction(const PatternActionType type_, const std::uint64_t extra_ = 0) |
202 | 42 | : type {type_}, extra {extra_} {}_ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13PatternActionC2ENS4_17PatternActionTypeEm Line | Count | Source | 202 | 26 | : type {type_}, extra {extra_} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13PatternActionC2ENS4_17PatternActionTypeEm _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13PatternActionC2ENS4_17PatternActionTypeEm Line | Count | Source | 202 | 16 | : type {type_}, extra {extra_} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13PatternActionC2ENS4_17PatternActionTypeEm |
203 | | }; |
204 | | |
205 | | using PatternActions = PODArrayWithStackMemory<PatternAction, 64>; |
206 | | |
207 | | Derived& derived() { return assert_cast<Derived&, TypeCheckOnRelease::DISABLE>(*this); } |
208 | | |
209 | 15 | void parse_pattern() { |
210 | 15 | actions.clear(); |
211 | 15 | actions.emplace_back(PatternActionType::KleeneStar); |
212 | | |
213 | 15 | dfa_states.clear(); |
214 | 15 | dfa_states.emplace_back(true); |
215 | | |
216 | 15 | pattern_has_time = false; |
217 | 15 | conditions_in_pattern.reset(); |
218 | | |
219 | 15 | const char* pos = pattern.data(); |
220 | 15 | const char* begin = pos; |
221 | 15 | const char* end = pos + pattern.size(); |
222 | | |
223 | | // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. |
224 | 15 | auto fail_parse = [&]() { |
225 | 1 | actions.clear(); |
226 | 1 | dfa_states.clear(); |
227 | 1 | conditions_in_pattern.reset(); |
228 | 1 | pattern_has_time = false; |
229 | 1 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENKUlvE_clEv Line | Count | Source | 224 | 1 | auto fail_parse = [&]() { | 225 | 1 | actions.clear(); | 226 | 1 | dfa_states.clear(); | 227 | 1 | conditions_in_pattern.reset(); | 228 | 1 | pattern_has_time = false; | 229 | 1 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlvE_clEv Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENKUlvE_clEv Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlvE_clEv |
230 | | |
231 | 15 | auto throw_exception = [&](const std::string& msg) { |
232 | 1 | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + |
233 | 1 | std::to_string(pos - begin); |
234 | 1 | fail_parse(); |
235 | 1 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Line | Count | Source | 231 | 1 | auto throw_exception = [&](const std::string& msg) { | 232 | | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 233 | 1 | std::to_string(pos - begin); | 234 | 1 | fail_parse(); | 235 | 1 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ |
236 | | |
237 | 86 | auto match = [&pos, end](const char* str) mutable { |
238 | 86 | size_t length = strlen(str); |
239 | 86 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { |
240 | 61 | pos += length; |
241 | 61 | return true; |
242 | 61 | } |
243 | 25 | return false; |
244 | 86 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENUlPKcE_clES6_ Line | Count | Source | 237 | 56 | auto match = [&pos, end](const char* str) mutable { | 238 | 56 | size_t length = strlen(str); | 239 | 56 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 240 | 41 | pos += length; | 241 | 41 | return true; | 242 | 41 | } | 243 | 15 | return false; | 244 | 56 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENUlPKcE_clES6_ _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENUlPKcE_clES6_ Line | Count | Source | 237 | 30 | auto match = [&pos, end](const char* str) mutable { | 238 | 30 | size_t length = strlen(str); | 239 | 30 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 240 | 20 | pos += length; | 241 | 20 | return true; | 242 | 20 | } | 243 | 10 | return false; | 244 | 30 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENUlPKcE_clES6_ |
245 | | |
246 | 28 | auto parse_uint = [&pos, end](auto& value) { |
247 | 28 | const auto* start = pos; |
248 | 55 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { |
249 | 27 | ++pos; |
250 | 27 | } |
251 | | |
252 | 28 | if (pos == start) { |
253 | 1 | return false; |
254 | 1 | } |
255 | | |
256 | 27 | StringParser::ParseResult result; |
257 | 27 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( |
258 | 27 | start, pos - start, &result); |
259 | 27 | return result == StringParser::PARSE_SUCCESS; |
260 | 28 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ Line | Count | Source | 246 | 18 | auto parse_uint = [&pos, end](auto& value) { | 247 | 18 | const auto* start = pos; | 248 | 35 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 249 | 17 | ++pos; | 250 | 17 | } | 251 | | | 252 | 18 | if (pos == start) { | 253 | 1 | return false; | 254 | 1 | } | 255 | | | 256 | 17 | StringParser::ParseResult result; | 257 | 17 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 258 | 17 | start, pos - start, &result); | 259 | 17 | return result == StringParser::PARSE_SUCCESS; | 260 | 18 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlRT_E_clIjEEDaS6_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ Line | Count | Source | 246 | 10 | auto parse_uint = [&pos, end](auto& value) { | 247 | 10 | const auto* start = pos; | 248 | 20 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 249 | 10 | ++pos; | 250 | 10 | } | 251 | | | 252 | 10 | if (pos == start) { | 253 | 0 | return false; | 254 | 0 | } | 255 | | | 256 | 10 | StringParser::ParseResult result; | 257 | 10 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 258 | 10 | start, pos - start, &result); | 259 | 10 | return result == StringParser::PARSE_SUCCESS; | 260 | 10 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlRT_E_clIjEEDaS6_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ |
261 | | |
262 | 42 | while (pos < end) { |
263 | 28 | if (match("(?")) { |
264 | 28 | if (match("t")) { |
265 | 3 | PatternActionType type; |
266 | | |
267 | 3 | if (match("<=")) |
268 | 3 | type = PatternActionType::TimeLessOrEqual; |
269 | 0 | else if (match("<")) |
270 | 0 | type = PatternActionType::TimeLess; |
271 | 0 | else if (match(">=")) |
272 | 0 | type = PatternActionType::TimeGreaterOrEqual; |
273 | 0 | else if (match(">")) |
274 | 0 | type = PatternActionType::TimeGreater; |
275 | 0 | else if (match("==")) |
276 | 0 | type = PatternActionType::TimeEqual; |
277 | 0 | else { |
278 | 0 | throw_exception("Unknown time condition"); |
279 | 0 | return; |
280 | 0 | } |
281 | | |
282 | 3 | NativeType duration = 0; |
283 | 3 | if (!parse_uint(duration)) { |
284 | 1 | throw_exception("Could not parse number"); |
285 | 1 | return; |
286 | 1 | } |
287 | | |
288 | 2 | if (actions.back().type != PatternActionType::SpecificEvent && |
289 | 2 | actions.back().type != PatternActionType::AnyEvent && |
290 | 2 | actions.back().type != PatternActionType::KleeneStar) { |
291 | 0 | throw_exception( |
292 | 0 | "Temporal condition should be preceded by an event condition"); |
293 | 0 | return; |
294 | 0 | } |
295 | | |
296 | 2 | pattern_has_time = true; |
297 | 2 | actions.emplace_back(type, duration); |
298 | 25 | } else { |
299 | 25 | UInt64 event_number = 0; |
300 | 25 | if (!parse_uint(event_number)) { |
301 | 0 | throw_exception("Could not parse number"); |
302 | 0 | return; |
303 | 0 | } |
304 | | |
305 | 25 | if (event_number > arg_count - 1) { |
306 | 0 | throw_exception("Event number " + std::to_string(event_number) + |
307 | 0 | " is out of range"); |
308 | 0 | return; |
309 | 0 | } |
310 | | |
311 | 25 | actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1); |
312 | 25 | dfa_states.back().transition = DFATransition::SpecificEvent; |
313 | 25 | dfa_states.back().event = static_cast<uint32_t>(event_number - 1); |
314 | 25 | dfa_states.emplace_back(); |
315 | 25 | conditions_in_pattern.set(event_number - 1); |
316 | 25 | } |
317 | | |
318 | 27 | if (!match(")")) { |
319 | 0 | throw_exception("Expected closing parenthesis, found"); |
320 | 0 | return; |
321 | 0 | } |
322 | | |
323 | 27 | } else if (match(".*")) { |
324 | 0 | actions.emplace_back(PatternActionType::KleeneStar); |
325 | 0 | dfa_states.back().has_kleene = true; |
326 | 0 | } else if (match(".")) { |
327 | 0 | actions.emplace_back(PatternActionType::AnyEvent); |
328 | 0 | dfa_states.back().transition = DFATransition::AnyEvent; |
329 | 0 | dfa_states.emplace_back(); |
330 | 0 | } else { |
331 | 0 | throw_exception("Could not parse pattern, unexpected starting symbol"); |
332 | 0 | return; |
333 | 0 | } |
334 | 28 | } |
335 | 15 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEv Line | Count | Source | 209 | 9 | void parse_pattern() { | 210 | 9 | actions.clear(); | 211 | 9 | actions.emplace_back(PatternActionType::KleeneStar); | 212 | | | 213 | 9 | dfa_states.clear(); | 214 | 9 | dfa_states.emplace_back(true); | 215 | | | 216 | 9 | pattern_has_time = false; | 217 | 9 | conditions_in_pattern.reset(); | 218 | | | 219 | 9 | const char* pos = pattern.data(); | 220 | 9 | const char* begin = pos; | 221 | 9 | const char* end = pos + pattern.size(); | 222 | | | 223 | | // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. | 224 | 9 | auto fail_parse = [&]() { | 225 | 9 | actions.clear(); | 226 | 9 | dfa_states.clear(); | 227 | 9 | conditions_in_pattern.reset(); | 228 | 9 | pattern_has_time = false; | 229 | 9 | }; | 230 | | | 231 | 9 | auto throw_exception = [&](const std::string& msg) { | 232 | 9 | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 233 | 9 | std::to_string(pos - begin); | 234 | 9 | fail_parse(); | 235 | 9 | }; | 236 | | | 237 | 9 | auto match = [&pos, end](const char* str) mutable { | 238 | 9 | size_t length = strlen(str); | 239 | 9 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 240 | 9 | pos += length; | 241 | 9 | return true; | 242 | 9 | } | 243 | 9 | return false; | 244 | 9 | }; | 245 | | | 246 | 9 | auto parse_uint = [&pos, end](auto& value) { | 247 | 9 | const auto* start = pos; | 248 | 9 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 249 | 9 | ++pos; | 250 | 9 | } | 251 | | | 252 | 9 | if (pos == start) { | 253 | 9 | return false; | 254 | 9 | } | 255 | | | 256 | 9 | StringParser::ParseResult result; | 257 | 9 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 258 | 9 | start, pos - start, &result); | 259 | 9 | return result == StringParser::PARSE_SUCCESS; | 260 | 9 | }; | 261 | | | 262 | 26 | while (pos < end) { | 263 | 18 | if (match("(?")) { | 264 | 18 | if (match("t")) { | 265 | 3 | PatternActionType type; | 266 | | | 267 | 3 | if (match("<=")) | 268 | 3 | type = PatternActionType::TimeLessOrEqual; | 269 | 0 | else if (match("<")) | 270 | 0 | type = PatternActionType::TimeLess; | 271 | 0 | else if (match(">=")) | 272 | 0 | type = PatternActionType::TimeGreaterOrEqual; | 273 | 0 | else if (match(">")) | 274 | 0 | type = PatternActionType::TimeGreater; | 275 | 0 | else if (match("==")) | 276 | 0 | type = PatternActionType::TimeEqual; | 277 | 0 | else { | 278 | 0 | throw_exception("Unknown time condition"); | 279 | 0 | return; | 280 | 0 | } | 281 | | | 282 | 3 | NativeType duration = 0; | 283 | 3 | if (!parse_uint(duration)) { | 284 | 1 | throw_exception("Could not parse number"); | 285 | 1 | return; | 286 | 1 | } | 287 | | | 288 | 2 | if (actions.back().type != PatternActionType::SpecificEvent && | 289 | 2 | actions.back().type != PatternActionType::AnyEvent && | 290 | 2 | actions.back().type != PatternActionType::KleeneStar) { | 291 | 0 | throw_exception( | 292 | 0 | "Temporal condition should be preceded by an event condition"); | 293 | 0 | return; | 294 | 0 | } | 295 | | | 296 | 2 | pattern_has_time = true; | 297 | 2 | actions.emplace_back(type, duration); | 298 | 15 | } else { | 299 | 15 | UInt64 event_number = 0; | 300 | 15 | if (!parse_uint(event_number)) { | 301 | 0 | throw_exception("Could not parse number"); | 302 | 0 | return; | 303 | 0 | } | 304 | | | 305 | 15 | if (event_number > arg_count - 1) { | 306 | 0 | throw_exception("Event number " + std::to_string(event_number) + | 307 | 0 | " is out of range"); | 308 | 0 | return; | 309 | 0 | } | 310 | | | 311 | 15 | actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1); | 312 | 15 | dfa_states.back().transition = DFATransition::SpecificEvent; | 313 | 15 | dfa_states.back().event = static_cast<uint32_t>(event_number - 1); | 314 | 15 | dfa_states.emplace_back(); | 315 | 15 | conditions_in_pattern.set(event_number - 1); | 316 | 15 | } | 317 | | | 318 | 17 | if (!match(")")) { | 319 | 0 | throw_exception("Expected closing parenthesis, found"); | 320 | 0 | return; | 321 | 0 | } | 322 | | | 323 | 17 | } else if (match(".*")) { | 324 | 0 | actions.emplace_back(PatternActionType::KleeneStar); | 325 | 0 | dfa_states.back().has_kleene = true; | 326 | 0 | } else if (match(".")) { | 327 | 0 | actions.emplace_back(PatternActionType::AnyEvent); | 328 | 0 | dfa_states.back().transition = DFATransition::AnyEvent; | 329 | 0 | dfa_states.emplace_back(); | 330 | 0 | } else { | 331 | 0 | throw_exception("Could not parse pattern, unexpected starting symbol"); | 332 | 0 | return; | 333 | 0 | } | 334 | 18 | } | 335 | 9 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEv _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEv Line | Count | Source | 209 | 6 | void parse_pattern() { | 210 | 6 | actions.clear(); | 211 | 6 | actions.emplace_back(PatternActionType::KleeneStar); | 212 | | | 213 | 6 | dfa_states.clear(); | 214 | 6 | dfa_states.emplace_back(true); | 215 | | | 216 | 6 | pattern_has_time = false; | 217 | 6 | conditions_in_pattern.reset(); | 218 | | | 219 | 6 | const char* pos = pattern.data(); | 220 | 6 | const char* begin = pos; | 221 | 6 | const char* end = pos + pattern.size(); | 222 | | | 223 | | // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. | 224 | 6 | auto fail_parse = [&]() { | 225 | 6 | actions.clear(); | 226 | 6 | dfa_states.clear(); | 227 | 6 | conditions_in_pattern.reset(); | 228 | 6 | pattern_has_time = false; | 229 | 6 | }; | 230 | | | 231 | 6 | auto throw_exception = [&](const std::string& msg) { | 232 | 6 | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 233 | 6 | std::to_string(pos - begin); | 234 | 6 | fail_parse(); | 235 | 6 | }; | 236 | | | 237 | 6 | auto match = [&pos, end](const char* str) mutable { | 238 | 6 | size_t length = strlen(str); | 239 | 6 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 240 | 6 | pos += length; | 241 | 6 | return true; | 242 | 6 | } | 243 | 6 | return false; | 244 | 6 | }; | 245 | | | 246 | 6 | auto parse_uint = [&pos, end](auto& value) { | 247 | 6 | const auto* start = pos; | 248 | 6 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 249 | 6 | ++pos; | 250 | 6 | } | 251 | | | 252 | 6 | if (pos == start) { | 253 | 6 | return false; | 254 | 6 | } | 255 | | | 256 | 6 | StringParser::ParseResult result; | 257 | 6 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 258 | 6 | start, pos - start, &result); | 259 | 6 | return result == StringParser::PARSE_SUCCESS; | 260 | 6 | }; | 261 | | | 262 | 16 | while (pos < end) { | 263 | 10 | if (match("(?")) { | 264 | 10 | if (match("t")) { | 265 | 0 | PatternActionType type; | 266 | |
| 267 | 0 | if (match("<=")) | 268 | 0 | type = PatternActionType::TimeLessOrEqual; | 269 | 0 | else if (match("<")) | 270 | 0 | type = PatternActionType::TimeLess; | 271 | 0 | else if (match(">=")) | 272 | 0 | type = PatternActionType::TimeGreaterOrEqual; | 273 | 0 | else if (match(">")) | 274 | 0 | type = PatternActionType::TimeGreater; | 275 | 0 | else if (match("==")) | 276 | 0 | type = PatternActionType::TimeEqual; | 277 | 0 | else { | 278 | 0 | throw_exception("Unknown time condition"); | 279 | 0 | return; | 280 | 0 | } | 281 | | | 282 | 0 | NativeType duration = 0; | 283 | 0 | if (!parse_uint(duration)) { | 284 | 0 | throw_exception("Could not parse number"); | 285 | 0 | return; | 286 | 0 | } | 287 | | | 288 | 0 | if (actions.back().type != PatternActionType::SpecificEvent && | 289 | 0 | actions.back().type != PatternActionType::AnyEvent && | 290 | 0 | actions.back().type != PatternActionType::KleeneStar) { | 291 | 0 | throw_exception( | 292 | 0 | "Temporal condition should be preceded by an event condition"); | 293 | 0 | return; | 294 | 0 | } | 295 | | | 296 | 0 | pattern_has_time = true; | 297 | 0 | actions.emplace_back(type, duration); | 298 | 10 | } else { | 299 | 10 | UInt64 event_number = 0; | 300 | 10 | if (!parse_uint(event_number)) { | 301 | 0 | throw_exception("Could not parse number"); | 302 | 0 | return; | 303 | 0 | } | 304 | | | 305 | 10 | if (event_number > arg_count - 1) { | 306 | 0 | throw_exception("Event number " + std::to_string(event_number) + | 307 | 0 | " is out of range"); | 308 | 0 | return; | 309 | 0 | } | 310 | | | 311 | 10 | actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1); | 312 | 10 | dfa_states.back().transition = DFATransition::SpecificEvent; | 313 | 10 | dfa_states.back().event = static_cast<uint32_t>(event_number - 1); | 314 | 10 | dfa_states.emplace_back(); | 315 | 10 | conditions_in_pattern.set(event_number - 1); | 316 | 10 | } | 317 | | | 318 | 10 | if (!match(")")) { | 319 | 0 | throw_exception("Expected closing parenthesis, found"); | 320 | 0 | return; | 321 | 0 | } | 322 | | | 323 | 10 | } else if (match(".*")) { | 324 | 0 | actions.emplace_back(PatternActionType::KleeneStar); | 325 | 0 | dfa_states.back().has_kleene = true; | 326 | 0 | } else if (match(".")) { | 327 | 0 | actions.emplace_back(PatternActionType::AnyEvent); | 328 | 0 | dfa_states.back().transition = DFATransition::AnyEvent; | 329 | 0 | dfa_states.emplace_back(); | 330 | 0 | } else { | 331 | 0 | throw_exception("Could not parse pattern, unexpected starting symbol"); | 332 | 0 | return; | 333 | 0 | } | 334 | 10 | } | 335 | 6 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEv |
336 | | |
337 | | public: |
338 | | /// Uses a DFA based approach in order to better handle patterns without |
339 | | /// time assertions. |
340 | | /// |
341 | | /// NOTE: This implementation relies on the assumption that the pattern is *small*. |
342 | | /// |
343 | | /// This algorithm performs in O(mn) (with m the number of DFA states and N the number |
344 | | /// of events) with a memory consumption and memory allocations in O(m). It means that |
345 | | /// if n >>> m (which is expected to be the case), this algorithm can be considered linear. |
346 | | template <typename EventEntry> |
347 | 3 | bool dfa_match(EventEntry& events_it, const EventEntry events_end) const { |
348 | 3 | using ActiveStates = std::vector<bool>; |
349 | | /// Those two vectors keep track of which states should be considered for the current |
350 | | /// event as well as the states which should be considered for the next event. |
351 | 3 | ActiveStates active_states(dfa_states.size(), false); |
352 | 3 | ActiveStates next_active_states(dfa_states.size(), false); |
353 | 3 | active_states[0] = true; |
354 | | |
355 | | /// Keeps track of dead-ends in order not to iterate over all the events to realize that |
356 | | /// the match failed. |
357 | 3 | size_t n_active = 1; |
358 | | |
359 | 9 | for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); |
360 | 6 | ++events_it) { |
361 | 6 | n_active = 0; |
362 | 6 | next_active_states.assign(dfa_states.size(), false); |
363 | | |
364 | 24 | for (size_t state = 0; state < dfa_states.size(); ++state) { |
365 | 18 | if (!active_states[state]) { |
366 | 9 | continue; |
367 | 9 | } |
368 | | |
369 | 9 | switch (dfa_states[state].transition) { |
370 | 0 | case DFATransition::None: |
371 | 0 | break; |
372 | 0 | case DFATransition::AnyEvent: |
373 | 0 | next_active_states[state + 1] = true; |
374 | 0 | ++n_active; |
375 | 0 | break; |
376 | 9 | case DFATransition::SpecificEvent: |
377 | 9 | if (events_it->second.test(dfa_states[state].event)) { |
378 | 6 | next_active_states[state + 1] = true; |
379 | 6 | ++n_active; |
380 | 6 | } |
381 | 9 | break; |
382 | 9 | } |
383 | | |
384 | 9 | if (dfa_states[state].has_kleene) { |
385 | 6 | next_active_states[state] = true; |
386 | 6 | ++n_active; |
387 | 6 | } |
388 | 9 | } |
389 | 6 | swap(active_states, next_active_states); |
390 | 6 | } |
391 | | |
392 | 3 | return active_states.back(); |
393 | 3 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE9dfa_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Line | Count | Source | 347 | 3 | bool dfa_match(EventEntry& events_it, const EventEntry events_end) const { | 348 | 3 | using ActiveStates = std::vector<bool>; | 349 | | /// Those two vectors keep track of which states should be considered for the current | 350 | | /// event as well as the states which should be considered for the next event. | 351 | 3 | ActiveStates active_states(dfa_states.size(), false); | 352 | 3 | ActiveStates next_active_states(dfa_states.size(), false); | 353 | 3 | active_states[0] = true; | 354 | | | 355 | | /// Keeps track of dead-ends in order not to iterate over all the events to realize that | 356 | | /// the match failed. | 357 | 3 | size_t n_active = 1; | 358 | | | 359 | 9 | for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); | 360 | 6 | ++events_it) { | 361 | 6 | n_active = 0; | 362 | 6 | next_active_states.assign(dfa_states.size(), false); | 363 | | | 364 | 24 | for (size_t state = 0; state < dfa_states.size(); ++state) { | 365 | 18 | if (!active_states[state]) { | 366 | 9 | continue; | 367 | 9 | } | 368 | | | 369 | 9 | switch (dfa_states[state].transition) { | 370 | 0 | case DFATransition::None: | 371 | 0 | break; | 372 | 0 | case DFATransition::AnyEvent: | 373 | 0 | next_active_states[state + 1] = true; | 374 | 0 | ++n_active; | 375 | 0 | break; | 376 | 9 | case DFATransition::SpecificEvent: | 377 | 9 | if (events_it->second.test(dfa_states[state].event)) { | 378 | 6 | next_active_states[state + 1] = true; | 379 | 6 | ++n_active; | 380 | 6 | } | 381 | 9 | break; | 382 | 9 | } | 383 | | | 384 | 9 | if (dfa_states[state].has_kleene) { | 385 | 6 | next_active_states[state] = true; | 386 | 6 | ++n_active; | 387 | 6 | } | 388 | 9 | } | 389 | 6 | swap(active_states, next_active_states); | 390 | 6 | } | 391 | | | 392 | 3 | return active_states.back(); | 393 | 3 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE9dfa_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ |
394 | | |
395 | | template <typename EventEntry> |
396 | 9 | bool backtracking_match(EventEntry& events_it, const EventEntry events_end) const { |
397 | 9 | const auto action_begin = std::begin(actions); |
398 | 9 | const auto action_end = std::end(actions); |
399 | 9 | auto action_it = action_begin; |
400 | | |
401 | 9 | const auto events_begin = events_it; |
402 | 9 | auto base_it = events_it; |
403 | | |
404 | | /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence |
405 | 9 | using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>; |
406 | 9 | std::stack<backtrack_info> back_stack; |
407 | | |
408 | | /// backtrack if possible |
409 | 9 | const auto do_backtrack = [&] { |
410 | 2 | while (!back_stack.empty()) { |
411 | 2 | auto& top = back_stack.top(); |
412 | | |
413 | 2 | action_it = std::get<0>(top); |
414 | 2 | events_it = std::next(std::get<1>(top)); |
415 | 2 | base_it = std::get<2>(top); |
416 | | |
417 | 2 | back_stack.pop(); |
418 | | |
419 | 2 | if (events_it != events_end) return true; |
420 | 2 | } |
421 | | |
422 | 0 | return false; |
423 | 2 | }; _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Line | Count | Source | 409 | 2 | const auto do_backtrack = [&] { | 410 | 2 | while (!back_stack.empty()) { | 411 | 2 | auto& top = back_stack.top(); | 412 | | | 413 | 2 | action_it = std::get<0>(top); | 414 | 2 | events_it = std::next(std::get<1>(top)); | 415 | 2 | base_it = std::get<2>(top); | 416 | | | 417 | 2 | back_stack.pop(); | 418 | | | 419 | 2 | if (events_it != events_end) return true; | 420 | 2 | } | 421 | | | 422 | 0 | return false; | 423 | 2 | }; |
Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv |
424 | | |
425 | 9 | size_t i = 0; |
426 | 42 | while (action_it != action_end && events_it != events_end) { |
427 | 33 | if (action_it->type == PatternActionType::SpecificEvent) { |
428 | 20 | if (events_it->second.test(action_it->extra)) { |
429 | | /// move to the next action and events |
430 | 18 | base_it = events_it; |
431 | 18 | ++action_it, ++events_it; |
432 | 18 | } else if (!do_backtrack()) |
433 | | /// backtracking failed, bail out |
434 | 0 | break; |
435 | 20 | } else if (action_it->type == PatternActionType::AnyEvent) { |
436 | 0 | base_it = events_it; |
437 | 0 | ++action_it, ++events_it; |
438 | 13 | } else if (action_it->type == PatternActionType::KleeneStar) { |
439 | 9 | back_stack.emplace(action_it, events_it, base_it); |
440 | 9 | base_it = events_it; |
441 | 9 | ++action_it; |
442 | 9 | } else if (action_it->type == PatternActionType::TimeLessOrEqual) { |
443 | 4 | if (events_it->first.datetime_diff_in_seconds(base_it->first) <= action_it->extra) { |
444 | | /// condition satisfied, move onto next action |
445 | 4 | back_stack.emplace(action_it, events_it, base_it); |
446 | 4 | base_it = events_it; |
447 | 4 | ++action_it; |
448 | 4 | } else if (!do_backtrack()) |
449 | 0 | break; |
450 | 4 | } else if (action_it->type == PatternActionType::TimeLess) { |
451 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) < action_it->extra) { |
452 | 0 | back_stack.emplace(action_it, events_it, base_it); |
453 | 0 | base_it = events_it; |
454 | 0 | ++action_it; |
455 | 0 | } else if (!do_backtrack()) |
456 | 0 | break; |
457 | 0 | } else if (action_it->type == PatternActionType::TimeGreaterOrEqual) { |
458 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) >= action_it->extra) { |
459 | 0 | back_stack.emplace(action_it, events_it, base_it); |
460 | 0 | base_it = events_it; |
461 | 0 | ++action_it; |
462 | 0 | } else if (++events_it == events_end && !do_backtrack()) |
463 | 0 | break; |
464 | 0 | } else if (action_it->type == PatternActionType::TimeGreater) { |
465 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) > action_it->extra) { |
466 | 0 | back_stack.emplace(action_it, events_it, base_it); |
467 | 0 | base_it = events_it; |
468 | 0 | ++action_it; |
469 | 0 | } else if (++events_it == events_end && !do_backtrack()) |
470 | 0 | break; |
471 | 0 | } else if (action_it->type == PatternActionType::TimeEqual) { |
472 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) == action_it->extra) { |
473 | 0 | back_stack.emplace(action_it, events_it, base_it); |
474 | 0 | base_it = events_it; |
475 | 0 | ++action_it; |
476 | 0 | } else if (++events_it == events_end && !do_backtrack()) |
477 | 0 | break; |
478 | 0 | } else { |
479 | 0 | LOG(WARNING) << "Unknown PatternActionType"; |
480 | 0 | return false; |
481 | 0 | } |
482 | | |
483 | 33 | if (++i > sequence_match_max_iterations) { |
484 | 0 | LOG(WARNING) |
485 | 0 | << "Pattern application proves too difficult, exceeding max iterations (" + |
486 | 0 | std::to_string(sequence_match_max_iterations) + ")"; |
487 | 0 | return false; |
488 | 0 | } |
489 | 33 | } |
490 | | |
491 | | /// if there are some actions remaining |
492 | 9 | if (action_it != action_end) { |
493 | | /// match multiple empty strings at end |
494 | 0 | while (action_it->type == PatternActionType::KleeneStar || |
495 | 0 | action_it->type == PatternActionType::TimeLessOrEqual || |
496 | 0 | action_it->type == PatternActionType::TimeLess || |
497 | 0 | (action_it->type == PatternActionType::TimeGreaterOrEqual && |
498 | 0 | action_it->extra == 0)) |
499 | 0 | ++action_it; |
500 | 0 | } |
501 | | |
502 | 9 | if (events_it == events_begin) ++events_it; |
503 | | |
504 | 9 | return action_it == action_end; |
505 | 9 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Line | Count | Source | 396 | 2 | bool backtracking_match(EventEntry& events_it, const EventEntry events_end) const { | 397 | 2 | const auto action_begin = std::begin(actions); | 398 | 2 | const auto action_end = std::end(actions); | 399 | 2 | auto action_it = action_begin; | 400 | | | 401 | 2 | const auto events_begin = events_it; | 402 | 2 | auto base_it = events_it; | 403 | | | 404 | | /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence | 405 | 2 | using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>; | 406 | 2 | std::stack<backtrack_info> back_stack; | 407 | | | 408 | | /// backtrack if possible | 409 | 2 | const auto do_backtrack = [&] { | 410 | 2 | while (!back_stack.empty()) { | 411 | 2 | auto& top = back_stack.top(); | 412 | | | 413 | 2 | action_it = std::get<0>(top); | 414 | 2 | events_it = std::next(std::get<1>(top)); | 415 | 2 | base_it = std::get<2>(top); | 416 | | | 417 | 2 | back_stack.pop(); | 418 | | | 419 | 2 | if (events_it != events_end) return true; | 420 | 2 | } | 421 | | | 422 | 2 | return false; | 423 | 2 | }; | 424 | | | 425 | 2 | size_t i = 0; | 426 | 14 | while (action_it != action_end && events_it != events_end) { | 427 | 12 | if (action_it->type == PatternActionType::SpecificEvent) { | 428 | 6 | if (events_it->second.test(action_it->extra)) { | 429 | | /// move to the next action and events | 430 | 4 | base_it = events_it; | 431 | 4 | ++action_it, ++events_it; | 432 | 4 | } else if (!do_backtrack()) | 433 | | /// backtracking failed, bail out | 434 | 0 | break; | 435 | 6 | } else if (action_it->type == PatternActionType::AnyEvent) { | 436 | 0 | base_it = events_it; | 437 | 0 | ++action_it, ++events_it; | 438 | 6 | } else if (action_it->type == PatternActionType::KleeneStar) { | 439 | 2 | back_stack.emplace(action_it, events_it, base_it); | 440 | 2 | base_it = events_it; | 441 | 2 | ++action_it; | 442 | 4 | } else if (action_it->type == PatternActionType::TimeLessOrEqual) { | 443 | 4 | if (events_it->first.datetime_diff_in_seconds(base_it->first) <= action_it->extra) { | 444 | | /// condition satisfied, move onto next action | 445 | 4 | back_stack.emplace(action_it, events_it, base_it); | 446 | 4 | base_it = events_it; | 447 | 4 | ++action_it; | 448 | 4 | } else if (!do_backtrack()) | 449 | 0 | break; | 450 | 4 | } else if (action_it->type == PatternActionType::TimeLess) { | 451 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) < action_it->extra) { | 452 | 0 | back_stack.emplace(action_it, events_it, base_it); | 453 | 0 | base_it = events_it; | 454 | 0 | ++action_it; | 455 | 0 | } else if (!do_backtrack()) | 456 | 0 | break; | 457 | 0 | } else if (action_it->type == PatternActionType::TimeGreaterOrEqual) { | 458 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) >= action_it->extra) { | 459 | 0 | back_stack.emplace(action_it, events_it, base_it); | 460 | 0 | base_it = events_it; | 461 | 0 | ++action_it; | 462 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 463 | 0 | break; | 464 | 0 | } else if (action_it->type == PatternActionType::TimeGreater) { | 465 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) > action_it->extra) { | 466 | 0 | back_stack.emplace(action_it, events_it, base_it); | 467 | 0 | base_it = events_it; | 468 | 0 | ++action_it; | 469 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 470 | 0 | break; | 471 | 0 | } else if (action_it->type == PatternActionType::TimeEqual) { | 472 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) == action_it->extra) { | 473 | 0 | back_stack.emplace(action_it, events_it, base_it); | 474 | 0 | base_it = events_it; | 475 | 0 | ++action_it; | 476 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 477 | 0 | break; | 478 | 0 | } else { | 479 | 0 | LOG(WARNING) << "Unknown PatternActionType"; | 480 | 0 | return false; | 481 | 0 | } | 482 | | | 483 | 12 | if (++i > sequence_match_max_iterations) { | 484 | 0 | LOG(WARNING) | 485 | 0 | << "Pattern application proves too difficult, exceeding max iterations (" + | 486 | 0 | std::to_string(sequence_match_max_iterations) + ")"; | 487 | 0 | return false; | 488 | 0 | } | 489 | 12 | } | 490 | | | 491 | | /// if there are some actions remaining | 492 | 2 | if (action_it != action_end) { | 493 | | /// match multiple empty strings at end | 494 | 0 | while (action_it->type == PatternActionType::KleeneStar || | 495 | 0 | action_it->type == PatternActionType::TimeLessOrEqual || | 496 | 0 | action_it->type == PatternActionType::TimeLess || | 497 | 0 | (action_it->type == PatternActionType::TimeGreaterOrEqual && | 498 | 0 | action_it->extra == 0)) | 499 | 0 | ++action_it; | 500 | 0 | } | 501 | | | 502 | 2 | if (events_it == events_begin) ++events_it; | 503 | | | 504 | 2 | return action_it == action_end; | 505 | 2 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Line | Count | Source | 396 | 7 | bool backtracking_match(EventEntry& events_it, const EventEntry events_end) const { | 397 | 7 | const auto action_begin = std::begin(actions); | 398 | 7 | const auto action_end = std::end(actions); | 399 | 7 | auto action_it = action_begin; | 400 | | | 401 | 7 | const auto events_begin = events_it; | 402 | 7 | auto base_it = events_it; | 403 | | | 404 | | /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence | 405 | 7 | using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>; | 406 | 7 | std::stack<backtrack_info> back_stack; | 407 | | | 408 | | /// backtrack if possible | 409 | 7 | const auto do_backtrack = [&] { | 410 | 7 | while (!back_stack.empty()) { | 411 | 7 | auto& top = back_stack.top(); | 412 | | | 413 | 7 | action_it = std::get<0>(top); | 414 | 7 | events_it = std::next(std::get<1>(top)); | 415 | 7 | base_it = std::get<2>(top); | 416 | | | 417 | 7 | back_stack.pop(); | 418 | | | 419 | 7 | if (events_it != events_end) return true; | 420 | 7 | } | 421 | | | 422 | 7 | return false; | 423 | 7 | }; | 424 | | | 425 | 7 | size_t i = 0; | 426 | 28 | while (action_it != action_end && events_it != events_end) { | 427 | 21 | if (action_it->type == PatternActionType::SpecificEvent) { | 428 | 14 | if (events_it->second.test(action_it->extra)) { | 429 | | /// move to the next action and events | 430 | 14 | base_it = events_it; | 431 | 14 | ++action_it, ++events_it; | 432 | 14 | } else if (!do_backtrack()) | 433 | | /// backtracking failed, bail out | 434 | 0 | break; | 435 | 14 | } else if (action_it->type == PatternActionType::AnyEvent) { | 436 | 0 | base_it = events_it; | 437 | 0 | ++action_it, ++events_it; | 438 | 7 | } else if (action_it->type == PatternActionType::KleeneStar) { | 439 | 7 | back_stack.emplace(action_it, events_it, base_it); | 440 | 7 | base_it = events_it; | 441 | 7 | ++action_it; | 442 | 7 | } else if (action_it->type == PatternActionType::TimeLessOrEqual) { | 443 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) <= action_it->extra) { | 444 | | /// condition satisfied, move onto next action | 445 | 0 | back_stack.emplace(action_it, events_it, base_it); | 446 | 0 | base_it = events_it; | 447 | 0 | ++action_it; | 448 | 0 | } else if (!do_backtrack()) | 449 | 0 | break; | 450 | 0 | } else if (action_it->type == PatternActionType::TimeLess) { | 451 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) < action_it->extra) { | 452 | 0 | back_stack.emplace(action_it, events_it, base_it); | 453 | 0 | base_it = events_it; | 454 | 0 | ++action_it; | 455 | 0 | } else if (!do_backtrack()) | 456 | 0 | break; | 457 | 0 | } else if (action_it->type == PatternActionType::TimeGreaterOrEqual) { | 458 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) >= action_it->extra) { | 459 | 0 | back_stack.emplace(action_it, events_it, base_it); | 460 | 0 | base_it = events_it; | 461 | 0 | ++action_it; | 462 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 463 | 0 | break; | 464 | 0 | } else if (action_it->type == PatternActionType::TimeGreater) { | 465 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) > action_it->extra) { | 466 | 0 | back_stack.emplace(action_it, events_it, base_it); | 467 | 0 | base_it = events_it; | 468 | 0 | ++action_it; | 469 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 470 | 0 | break; | 471 | 0 | } else if (action_it->type == PatternActionType::TimeEqual) { | 472 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) == action_it->extra) { | 473 | 0 | back_stack.emplace(action_it, events_it, base_it); | 474 | 0 | base_it = events_it; | 475 | 0 | ++action_it; | 476 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 477 | 0 | break; | 478 | 0 | } else { | 479 | 0 | LOG(WARNING) << "Unknown PatternActionType"; | 480 | 0 | return false; | 481 | 0 | } | 482 | | | 483 | 21 | if (++i > sequence_match_max_iterations) { | 484 | 0 | LOG(WARNING) | 485 | 0 | << "Pattern application proves too difficult, exceeding max iterations (" + | 486 | 0 | std::to_string(sequence_match_max_iterations) + ")"; | 487 | 0 | return false; | 488 | 0 | } | 489 | 21 | } | 490 | | | 491 | | /// if there are some actions remaining | 492 | 7 | if (action_it != action_end) { | 493 | | /// match multiple empty strings at end | 494 | 0 | while (action_it->type == PatternActionType::KleeneStar || | 495 | 0 | action_it->type == PatternActionType::TimeLessOrEqual || | 496 | 0 | action_it->type == PatternActionType::TimeLess || | 497 | 0 | (action_it->type == PatternActionType::TimeGreaterOrEqual && | 498 | 0 | action_it->extra == 0)) | 499 | 0 | ++action_it; | 500 | 0 | } | 501 | | | 502 | 7 | if (events_it == events_begin) ++events_it; | 503 | | | 504 | 7 | return action_it == action_end; | 505 | 7 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ |
506 | | |
507 | | /// Splits the pattern into deterministic parts separated by non-deterministic fragments |
508 | | /// (time constraints and Kleene stars), and tries to match the deterministic parts in their specified order, |
509 | | /// ignoring the non-deterministic fragments. |
510 | | /// This function can quickly check that a full match is not possible if some deterministic fragment is missing. |
511 | | template <typename EventEntry> |
512 | | bool could_match_deterministic_parts(const EventEntry events_begin, const EventEntry events_end, |
513 | 6 | bool limit_iterations = true) const { |
514 | 6 | size_t events_processed = 0; |
515 | 6 | auto events_it = events_begin; |
516 | | |
517 | 6 | const auto actions_end = std::end(actions); |
518 | 6 | auto actions_it = std::begin(actions); |
519 | 6 | auto det_part_begin = actions_it; |
520 | | |
521 | 6 | auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, |
522 | 14 | actions_it, limit_iterations]() { |
523 | 14 | auto events_it_init = events_it; |
524 | 14 | auto det_part_it = det_part_begin; |
525 | | |
526 | 14 | while (det_part_it != actions_it && events_it != events_end) { |
527 | | /// matching any event |
528 | 0 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; |
529 | | |
530 | | /// matching specific event |
531 | 0 | else { |
532 | 0 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; |
533 | | |
534 | | /// abandon current matching, try to match the deterministic fragment further in the list |
535 | 0 | else { |
536 | 0 | events_it = ++events_it_init; |
537 | 0 | det_part_it = det_part_begin; |
538 | 0 | } |
539 | 0 | } |
540 | |
|
541 | 0 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { |
542 | 0 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " |
543 | 0 | "iterations are " + |
544 | 0 | std::to_string(sequence_match_max_iterations); |
545 | 0 | return false; |
546 | 0 | } |
547 | 0 | } |
548 | | |
549 | 14 | return det_part_it == actions_it; |
550 | 14 | }; _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv Line | Count | Source | 522 | 6 | actions_it, limit_iterations]() { | 523 | 6 | auto events_it_init = events_it; | 524 | 6 | auto det_part_it = det_part_begin; | 525 | | | 526 | 6 | while (det_part_it != actions_it && events_it != events_end) { | 527 | | /// matching any event | 528 | 0 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 529 | | | 530 | | /// matching specific event | 531 | 0 | else { | 532 | 0 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 533 | | | 534 | | /// abandon current matching, try to match the deterministic fragment further in the list | 535 | 0 | else { | 536 | 0 | events_it = ++events_it_init; | 537 | 0 | det_part_it = det_part_begin; | 538 | 0 | } | 539 | 0 | } | 540 | |
| 541 | 0 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 542 | 0 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 543 | 0 | "iterations are " + | 544 | 0 | std::to_string(sequence_match_max_iterations); | 545 | 0 | return false; | 546 | 0 | } | 547 | 0 | } | 548 | | | 549 | 6 | return det_part_it == actions_it; | 550 | 6 | }; |
Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv Line | Count | Source | 522 | 8 | actions_it, limit_iterations]() { | 523 | 8 | auto events_it_init = events_it; | 524 | 8 | auto det_part_it = det_part_begin; | 525 | | | 526 | 8 | while (det_part_it != actions_it && events_it != events_end) { | 527 | | /// matching any event | 528 | 0 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 529 | | | 530 | | /// matching specific event | 531 | 0 | else { | 532 | 0 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 533 | | | 534 | | /// abandon current matching, try to match the deterministic fragment further in the list | 535 | 0 | else { | 536 | 0 | events_it = ++events_it_init; | 537 | 0 | det_part_it = det_part_begin; | 538 | 0 | } | 539 | 0 | } | 540 | |
| 541 | 0 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 542 | 0 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 543 | 0 | "iterations are " + | 544 | 0 | std::to_string(sequence_match_max_iterations); | 545 | 0 | return false; | 546 | 0 | } | 547 | 0 | } | 548 | | | 549 | 8 | return det_part_it == actions_it; | 550 | 8 | }; |
Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv |
551 | | |
552 | 26 | for (; actions_it != actions_end; ++actions_it) |
553 | 20 | if (actions_it->type != PatternActionType::SpecificEvent && |
554 | 20 | actions_it->type != PatternActionType::AnyEvent) { |
555 | 8 | if (!match_deterministic_part()) return false; |
556 | 8 | det_part_begin = std::next(actions_it); |
557 | 8 | } |
558 | | |
559 | 6 | return match_deterministic_part(); |
560 | 6 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b Line | Count | Source | 513 | 2 | bool limit_iterations = true) const { | 514 | 2 | size_t events_processed = 0; | 515 | 2 | auto events_it = events_begin; | 516 | | | 517 | 2 | const auto actions_end = std::end(actions); | 518 | 2 | auto actions_it = std::begin(actions); | 519 | 2 | auto det_part_begin = actions_it; | 520 | | | 521 | 2 | auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, | 522 | 2 | actions_it, limit_iterations]() { | 523 | 2 | auto events_it_init = events_it; | 524 | 2 | auto det_part_it = det_part_begin; | 525 | | | 526 | 2 | while (det_part_it != actions_it && events_it != events_end) { | 527 | | /// matching any event | 528 | 2 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 529 | | | 530 | | /// matching specific event | 531 | 2 | else { | 532 | 2 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 533 | | | 534 | | /// abandon current matching, try to match the deterministic fragment further in the list | 535 | 2 | else { | 536 | 2 | events_it = ++events_it_init; | 537 | 2 | det_part_it = det_part_begin; | 538 | 2 | } | 539 | 2 | } | 540 | | | 541 | 2 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 542 | 2 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 543 | 2 | "iterations are " + | 544 | 2 | std::to_string(sequence_match_max_iterations); | 545 | 2 | return false; | 546 | 2 | } | 547 | 2 | } | 548 | | | 549 | 2 | return det_part_it == actions_it; | 550 | 2 | }; | 551 | | | 552 | 10 | for (; actions_it != actions_end; ++actions_it) | 553 | 8 | if (actions_it->type != PatternActionType::SpecificEvent && | 554 | 8 | actions_it->type != PatternActionType::AnyEvent) { | 555 | 4 | if (!match_deterministic_part()) return false; | 556 | 4 | det_part_begin = std::next(actions_it); | 557 | 4 | } | 558 | | | 559 | 2 | return match_deterministic_part(); | 560 | 2 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b Line | Count | Source | 513 | 4 | bool limit_iterations = true) const { | 514 | 4 | size_t events_processed = 0; | 515 | 4 | auto events_it = events_begin; | 516 | | | 517 | 4 | const auto actions_end = std::end(actions); | 518 | 4 | auto actions_it = std::begin(actions); | 519 | 4 | auto det_part_begin = actions_it; | 520 | | | 521 | 4 | auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, | 522 | 4 | actions_it, limit_iterations]() { | 523 | 4 | auto events_it_init = events_it; | 524 | 4 | auto det_part_it = det_part_begin; | 525 | | | 526 | 4 | while (det_part_it != actions_it && events_it != events_end) { | 527 | | /// matching any event | 528 | 4 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 529 | | | 530 | | /// matching specific event | 531 | 4 | else { | 532 | 4 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 533 | | | 534 | | /// abandon current matching, try to match the deterministic fragment further in the list | 535 | 4 | else { | 536 | 4 | events_it = ++events_it_init; | 537 | 4 | det_part_it = det_part_begin; | 538 | 4 | } | 539 | 4 | } | 540 | | | 541 | 4 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 542 | 4 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 543 | 4 | "iterations are " + | 544 | 4 | std::to_string(sequence_match_max_iterations); | 545 | 4 | return false; | 546 | 4 | } | 547 | 4 | } | 548 | | | 549 | 4 | return det_part_it == actions_it; | 550 | 4 | }; | 551 | | | 552 | 16 | for (; actions_it != actions_end; ++actions_it) | 553 | 12 | if (actions_it->type != PatternActionType::SpecificEvent && | 554 | 12 | actions_it->type != PatternActionType::AnyEvent) { | 555 | 4 | if (!match_deterministic_part()) return false; | 556 | 4 | det_part_begin = std::next(actions_it); | 557 | 4 | } | 558 | | | 559 | 4 | return match_deterministic_part(); | 560 | 4 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b |
561 | | |
562 | | private: |
563 | | enum class DFATransition : char { |
564 | | /// .-------. |
565 | | /// | | |
566 | | /// `-------' |
567 | | None, |
568 | | /// .-------. (?[0-9]) |
569 | | /// | | ---------- |
570 | | /// `-------' |
571 | | SpecificEvent, |
572 | | /// .-------. . |
573 | | /// | | ---------- |
574 | | /// `-------' |
575 | | AnyEvent, |
576 | | }; |
577 | | |
578 | | struct DFAState { |
579 | | explicit DFAState(bool has_kleene_ = false) |
580 | 40 | : has_kleene {has_kleene_}, event {0}, transition {DFATransition::None} {}_ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE8DFAStateC2Eb Line | Count | Source | 580 | 24 | : has_kleene {has_kleene_}, event {0}, transition {DFATransition::None} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE8DFAStateC2Eb _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE8DFAStateC2Eb Line | Count | Source | 580 | 16 | : has_kleene {has_kleene_}, event {0}, transition {DFATransition::None} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE8DFAStateC2Eb |
581 | | |
582 | | /// .-------. |
583 | | /// | | - - - |
584 | | /// `-------' |
585 | | /// |_^ |
586 | | bool has_kleene; |
587 | | /// In the case of a state transitions with a `SpecificEvent`, |
588 | | /// `event` contains the value of the event. |
589 | | uint32_t event; |
590 | | /// The kind of transition out of this state. |
591 | | DFATransition transition; |
592 | | }; |
593 | | |
594 | | using DFAStates = std::vector<DFAState>; |
595 | | |
596 | | public: |
597 | | bool sorted = true; |
598 | | std::vector<TimestampEvents> events_list; |
599 | | // sequenceMatch conditions met at least once in events_list |
600 | | std::bitset<MAX_EVENTS> conditions_met; |
601 | | // sequenceMatch conditions met at least once in the pattern |
602 | | std::bitset<MAX_EVENTS> conditions_in_pattern; |
603 | | // `True` if the parsed pattern contains time assertions (?t...), `false` otherwise. |
604 | | bool pattern_has_time; |
605 | | |
606 | | private: |
607 | | std::string pattern; |
608 | | size_t arg_count; |
609 | | bool init_flag = false; |
610 | | |
611 | | PatternActions actions; |
612 | | DFAStates dfa_states; |
613 | | }; |
614 | | |
615 | | template <PrimitiveType T, typename Derived> |
616 | | class AggregateFunctionSequenceBase |
617 | | : public IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, |
618 | | Derived> { |
619 | | public: |
620 | | using NativeType = typename PrimitiveTypeTraits<T>::CppType; |
621 | | AggregateFunctionSequenceBase(const DataTypes& arguments) |
622 | 19 | : IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, Derived>( |
623 | 19 | arguments) { |
624 | 19 | arg_count = arguments.size(); |
625 | 19 | } _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 622 | 9 | : IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, Derived>( | 623 | 9 | arguments) { | 624 | 9 | arg_count = arguments.size(); | 625 | 9 | } |
Unexecuted instantiation: _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 622 | 10 | : IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, Derived>( | 623 | 10 | arguments) { | 624 | 10 | arg_count = arguments.size(); | 625 | 10 | } |
Unexecuted instantiation: _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE |
626 | | |
627 | 0 | void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); }Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5resetEPc |
628 | | |
629 | | void add(AggregateDataPtr __restrict place, const IColumn** columns, const ssize_t row_num, |
630 | 21 | Arena&) const override { |
631 | 21 | std::string pattern = |
632 | 21 | assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(columns[0]) |
633 | 21 | ->get_data_at(0) |
634 | 21 | .to_string(); |
635 | 21 | this->data(place).init(pattern, arg_count); |
636 | | |
637 | 21 | const auto& timestamp = assert_cast<const typename PrimitiveTypeTraits<T>::ColumnType&, |
638 | 21 | TypeCheckOnRelease::DISABLE>(*columns[1]) |
639 | 21 | .get_data()[row_num]; |
640 | 21 | typename AggregateFunctionSequenceMatchData<T, Derived>::Events events; |
641 | | |
642 | 72 | for (auto i = 2; i < arg_count; i++) { |
643 | 51 | const auto event = |
644 | 51 | assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(columns[i]) |
645 | 51 | ->get_data()[row_num]; |
646 | 51 | events.set(i - 2, event); |
647 | 51 | } |
648 | | |
649 | 21 | this->data(place).add(timestamp, events); |
650 | 21 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE Line | Count | Source | 630 | 13 | Arena&) const override { | 631 | 13 | std::string pattern = | 632 | 13 | assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(columns[0]) | 633 | 13 | ->get_data_at(0) | 634 | 13 | .to_string(); | 635 | 13 | this->data(place).init(pattern, arg_count); | 636 | | | 637 | 13 | const auto& timestamp = assert_cast<const typename PrimitiveTypeTraits<T>::ColumnType&, | 638 | 13 | TypeCheckOnRelease::DISABLE>(*columns[1]) | 639 | 13 | .get_data()[row_num]; | 640 | 13 | typename AggregateFunctionSequenceMatchData<T, Derived>::Events events; | 641 | | | 642 | 48 | for (auto i = 2; i < arg_count; i++) { | 643 | 35 | const auto event = | 644 | 35 | assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(columns[i]) | 645 | 35 | ->get_data()[row_num]; | 646 | 35 | events.set(i - 2, event); | 647 | 35 | } | 648 | | | 649 | 13 | this->data(place).add(timestamp, events); | 650 | 13 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE Line | Count | Source | 630 | 8 | Arena&) const override { | 631 | 8 | std::string pattern = | 632 | 8 | assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(columns[0]) | 633 | 8 | ->get_data_at(0) | 634 | 8 | .to_string(); | 635 | 8 | this->data(place).init(pattern, arg_count); | 636 | | | 637 | 8 | const auto& timestamp = assert_cast<const typename PrimitiveTypeTraits<T>::ColumnType&, | 638 | 8 | TypeCheckOnRelease::DISABLE>(*columns[1]) | 639 | 8 | .get_data()[row_num]; | 640 | 8 | typename AggregateFunctionSequenceMatchData<T, Derived>::Events events; | 641 | | | 642 | 24 | for (auto i = 2; i < arg_count; i++) { | 643 | 16 | const auto event = | 644 | 16 | assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(columns[i]) | 645 | 16 | ->get_data()[row_num]; | 646 | 16 | events.set(i - 2, event); | 647 | 16 | } | 648 | | | 649 | 8 | this->data(place).add(timestamp, events); | 650 | 8 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE |
651 | | |
652 | | void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, |
653 | 4 | Arena&) const override { |
654 | 4 | const std::string pattern = this->data(rhs).get_pattern(); |
655 | 4 | this->data(place).init(pattern, this->data(rhs).get_arg_count()); |
656 | 4 | this->data(place).merge(this->data(rhs)); |
657 | 4 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5mergeEPcPKcRNS_5ArenaE Line | Count | Source | 653 | 2 | Arena&) const override { | 654 | 2 | const std::string pattern = this->data(rhs).get_pattern(); | 655 | 2 | this->data(place).init(pattern, this->data(rhs).get_arg_count()); | 656 | 2 | this->data(place).merge(this->data(rhs)); | 657 | 2 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5mergeEPcPKcRNS_5ArenaE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5mergeEPcPKcRNS_5ArenaE Line | Count | Source | 653 | 2 | Arena&) const override { | 654 | 2 | const std::string pattern = this->data(rhs).get_pattern(); | 655 | 2 | this->data(place).init(pattern, this->data(rhs).get_arg_count()); | 656 | 2 | this->data(place).merge(this->data(rhs)); | 657 | 2 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5mergeEPcPKcRNS_5ArenaE |
658 | | |
659 | 7 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { |
660 | 7 | this->data(place).write(buf); |
661 | 7 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE9serializeEPKcRNS_14BufferWritableE Line | Count | Source | 659 | 4 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { | 660 | 4 | this->data(place).write(buf); | 661 | 4 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE9serializeEPKcRNS_14BufferWritableE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE9serializeEPKcRNS_14BufferWritableE Line | Count | Source | 659 | 3 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { | 660 | 3 | this->data(place).write(buf); | 661 | 3 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE9serializeEPKcRNS_14BufferWritableE |
662 | | |
663 | | void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, |
664 | 7 | Arena&) const override { |
665 | 7 | this->data(place).read(buf); |
666 | 7 | const std::string pattern = this->data(place).get_pattern(); |
667 | 7 | this->data(place).init(pattern, this->data(place).get_arg_count()); |
668 | 7 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Line | Count | Source | 664 | 4 | Arena&) const override { | 665 | 4 | this->data(place).read(buf); | 666 | 4 | const std::string pattern = this->data(place).get_pattern(); | 667 | 4 | this->data(place).init(pattern, this->data(place).get_arg_count()); | 668 | 4 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Line | Count | Source | 664 | 3 | Arena&) const override { | 665 | 3 | this->data(place).read(buf); | 666 | 3 | const std::string pattern = this->data(place).get_pattern(); | 667 | 3 | this->data(place).init(pattern, this->data(place).get_arg_count()); | 668 | 3 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE |
669 | | |
670 | | private: |
671 | | size_t arg_count; |
672 | | }; |
673 | | |
674 | | template <PrimitiveType T> |
675 | | class AggregateFunctionSequenceMatch final |
676 | | : public AggregateFunctionSequenceBase<T, AggregateFunctionSequenceMatch<T>>, |
677 | | VarargsExpression, |
678 | | NullableAggregateFunction { |
679 | | public: |
680 | | AggregateFunctionSequenceMatch(const DataTypes& arguments, const String& pattern_) |
681 | | : AggregateFunctionSequenceBase<T, AggregateFunctionSequenceMatch<T>>(arguments, |
682 | | pattern_) {} |
683 | | |
684 | | using AggregateFunctionSequenceBase< |
685 | | T, AggregateFunctionSequenceMatch<T>>::AggregateFunctionSequenceBase; |
686 | | |
687 | 0 | String get_name() const override { return "sequence_match"; }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE26EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE25EE8get_nameB5cxx11Ev |
688 | | |
689 | 0 | DataTypePtr get_return_type() const override { return std::make_shared<DataTypeUInt8>(); }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE26EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE25EE15get_return_typeEv |
690 | | |
691 | 9 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
692 | 9 | auto& output = assert_cast<ColumnUInt8&>(to).get_data(); |
693 | 9 | if (!this->data(place).conditions_in_pattern.any()) { |
694 | 3 | output.push_back(false); |
695 | 3 | return; |
696 | 3 | } |
697 | | |
698 | 6 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != |
699 | 6 | this->data(place).conditions_in_pattern) { |
700 | 1 | output.push_back(false); |
701 | 1 | return; |
702 | 1 | } |
703 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. |
704 | 5 | this->data(const_cast<AggregateDataPtr>(place)).sort(); |
705 | | |
706 | 5 | const auto& data_ref = this->data(place); |
707 | | |
708 | 5 | const auto events_begin = std::begin(data_ref.events_list); |
709 | 5 | const auto events_end = std::end(data_ref.events_list); |
710 | 5 | auto events_it = events_begin; |
711 | | |
712 | 5 | bool match = (this->data(place).pattern_has_time |
713 | 5 | ? (this->data(place).could_match_deterministic_parts(events_begin, |
714 | 2 | events_end) && |
715 | 2 | this->data(place).backtracking_match(events_it, events_end)) |
716 | 5 | : this->data(place).dfa_match(events_it, events_end)); |
717 | 5 | output.push_back(match); |
718 | 5 | } _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE26EE18insert_result_intoEPKcRNS_7IColumnE Line | Count | Source | 691 | 9 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { | 692 | 9 | auto& output = assert_cast<ColumnUInt8&>(to).get_data(); | 693 | 9 | if (!this->data(place).conditions_in_pattern.any()) { | 694 | 3 | output.push_back(false); | 695 | 3 | return; | 696 | 3 | } | 697 | | | 698 | 6 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != | 699 | 6 | this->data(place).conditions_in_pattern) { | 700 | 1 | output.push_back(false); | 701 | 1 | return; | 702 | 1 | } | 703 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. | 704 | 5 | this->data(const_cast<AggregateDataPtr>(place)).sort(); | 705 | | | 706 | 5 | const auto& data_ref = this->data(place); | 707 | | | 708 | 5 | const auto events_begin = std::begin(data_ref.events_list); | 709 | 5 | const auto events_end = std::end(data_ref.events_list); | 710 | 5 | auto events_it = events_begin; | 711 | | | 712 | 5 | bool match = (this->data(place).pattern_has_time | 713 | 5 | ? (this->data(place).could_match_deterministic_parts(events_begin, | 714 | 2 | events_end) && | 715 | 2 | this->data(place).backtracking_match(events_it, events_end)) | 716 | 5 | : this->data(place).dfa_match(events_it, events_end)); | 717 | 5 | output.push_back(match); | 718 | 5 | } |
Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE25EE18insert_result_intoEPKcRNS_7IColumnE |
719 | | }; |
720 | | |
721 | | template <PrimitiveType T> |
722 | | class AggregateFunctionSequenceCount final |
723 | | : public AggregateFunctionSequenceBase<T, AggregateFunctionSequenceCount<T>>, |
724 | | VarargsExpression, |
725 | | NotNullableAggregateFunction { |
726 | | public: |
727 | | AggregateFunctionSequenceCount(const DataTypes& arguments, const String& pattern_) |
728 | | : AggregateFunctionSequenceBase<T, AggregateFunctionSequenceCount<T>>(arguments, |
729 | | pattern_) {} |
730 | | |
731 | | using AggregateFunctionSequenceBase< |
732 | | T, AggregateFunctionSequenceCount<T>>::AggregateFunctionSequenceBase; |
733 | | |
734 | 0 | String get_name() const override { return "sequence_count"; }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE8get_nameB5cxx11Ev |
735 | | |
736 | 0 | DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE15get_return_typeEv |
737 | | |
738 | 6 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
739 | 6 | auto& output = assert_cast<ColumnInt64&>(to).get_data(); |
740 | 6 | if (!this->data(place).conditions_in_pattern.any()) { |
741 | 2 | output.push_back(0); |
742 | 2 | return; |
743 | 2 | } |
744 | | |
745 | 4 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != |
746 | 4 | this->data(place).conditions_in_pattern) { |
747 | 0 | output.push_back(0); |
748 | 0 | return; |
749 | 0 | } |
750 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. |
751 | 4 | this->data(const_cast<AggregateDataPtr>(place)).sort(); |
752 | 4 | output.push_back(count(place)); |
753 | 4 | } _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE18insert_result_intoEPKcRNS_7IColumnE Line | Count | Source | 738 | 6 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { | 739 | 6 | auto& output = assert_cast<ColumnInt64&>(to).get_data(); | 740 | 6 | if (!this->data(place).conditions_in_pattern.any()) { | 741 | 2 | output.push_back(0); | 742 | 2 | return; | 743 | 2 | } | 744 | | | 745 | 4 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != | 746 | 4 | this->data(place).conditions_in_pattern) { | 747 | 0 | output.push_back(0); | 748 | 0 | return; | 749 | 0 | } | 750 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. | 751 | 4 | this->data(const_cast<AggregateDataPtr>(place)).sort(); | 752 | 4 | output.push_back(count(place)); | 753 | 4 | } |
Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE18insert_result_intoEPKcRNS_7IColumnE |
754 | | |
755 | | private: |
756 | 4 | UInt64 count(ConstAggregateDataPtr __restrict place) const { |
757 | 4 | const auto& data_ref = this->data(place); |
758 | | |
759 | 4 | const auto events_begin = std::begin(data_ref.events_list); |
760 | 4 | const auto events_end = std::end(data_ref.events_list); |
761 | 4 | auto events_it = events_begin; |
762 | | |
763 | 4 | size_t count = 0; |
764 | | // check if there is a chance of matching the sequence at least once |
765 | 4 | if (this->data(place).could_match_deterministic_parts(events_begin, events_end)) { |
766 | 11 | while (events_it != events_end && |
767 | 11 | this->data(place).backtracking_match(events_it, events_end)) |
768 | 7 | ++count; |
769 | 4 | } |
770 | | |
771 | 4 | return count; |
772 | 4 | } _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE5countEPKc Line | Count | Source | 756 | 4 | UInt64 count(ConstAggregateDataPtr __restrict place) const { | 757 | 4 | const auto& data_ref = this->data(place); | 758 | | | 759 | 4 | const auto events_begin = std::begin(data_ref.events_list); | 760 | 4 | const auto events_end = std::end(data_ref.events_list); | 761 | 4 | auto events_it = events_begin; | 762 | | | 763 | 4 | size_t count = 0; | 764 | | // check if there is a chance of matching the sequence at least once | 765 | 4 | if (this->data(place).could_match_deterministic_parts(events_begin, events_end)) { | 766 | 11 | while (events_it != events_end && | 767 | 11 | this->data(place).backtracking_match(events_it, events_end)) | 768 | 7 | ++count; | 769 | 4 | } | 770 | | | 771 | 4 | return count; | 772 | 4 | } |
Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE5countEPKc |
773 | | }; |
774 | | |
775 | | } // namespace doris |