be/src/exprs/aggregate/aggregate_function_sequence_match.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | // This file is copied from |
19 | | // https://github.com/ClickHouse/ClickHouse/blob/master/AggregateFunctionSequenceMatch.h |
20 | | // and modified by Doris |
21 | | |
22 | | #pragma once |
23 | | |
24 | | #include <string.h> |
25 | | |
26 | | #include <algorithm> |
27 | | #include <bitset> |
28 | | #include <boost/iterator/iterator_facade.hpp> |
29 | | #include <cctype> |
30 | | #include <cstdint> |
31 | | #include <functional> |
32 | | #include <iterator> |
33 | | #include <memory> |
34 | | #include <stack> |
35 | | #include <string> |
36 | | #include <tuple> |
37 | | #include <utility> |
38 | | #include <vector> |
39 | | |
40 | | #include "common/logging.h" |
41 | | #include "core/assert_cast.h" |
42 | | #include "core/column/column_string.h" |
43 | | #include "core/column/column_vector.h" |
44 | | #include "core/data_type/data_type_number.h" |
45 | | #include "core/pod_array_fwd.h" |
46 | | #include "core/string_ref.h" |
47 | | #include "core/types.h" |
48 | | #include "exprs/aggregate/aggregate_function.h" |
49 | | #include "util/string_parser.hpp" |
50 | | |
51 | | namespace doris { |
52 | | class Arena; |
53 | | class BufferReadable; |
54 | | class BufferWritable; |
55 | | class IColumn; |
56 | | } // namespace doris |
57 | | |
58 | | namespace doris { |
59 | | |
60 | | template <template <typename> class Comparator> |
61 | | struct ComparePairFirst final { |
62 | | template <typename T1, typename T2> |
63 | 38 | bool operator()(const std::pair<T1, T2>& lhs, const std::pair<T1, T2>& rhs) const { |
64 | 38 | return Comparator<T1> {}(lhs.first, rhs.first); |
65 | 38 | } _ZNK5doris16ComparePairFirstISt4lessEclINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEEEbRKSt4pairIT_T0_ESE_ Line | Count | Source | 63 | 38 | bool operator()(const std::pair<T1, T2>& lhs, const std::pair<T1, T2>& rhs) const { | 64 | 38 | return Comparator<T1> {}(lhs.first, rhs.first); | 65 | 38 | } |
Unexecuted instantiation: _ZNK5doris16ComparePairFirstISt4lessEclINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEEEbRKSt4pairIT_T0_ESE_ Unexecuted instantiation: _ZNK5doris16ComparePairFirstISt4lessEclINS_16TimestampTzValueESt6bitsetILm32EEEEbRKSt4pairIT_T0_ESC_ |
66 | | }; |
67 | | |
68 | | static constexpr size_t MAX_EVENTS = 32; |
69 | | |
70 | | /// Max number of iterations to match the pattern against a sequence, exception thrown when exceeded |
71 | | constexpr auto sequence_match_max_iterations = 1000000l; |
72 | | |
73 | | template <PrimitiveType T, typename Derived> |
74 | | struct AggregateFunctionSequenceMatchData final { |
75 | | using Timestamp = typename PrimitiveTypeTraits<T>::CppType; |
76 | | using Events = std::bitset<MAX_EVENTS>; |
77 | | using TimestampEvents = std::pair<Timestamp, Events>; |
78 | | using Comparator = ComparePairFirst<std::less>; |
79 | | |
80 | 21 | AggregateFunctionSequenceMatchData() { reset(); }_ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEEC2Ev Line | Count | Source | 80 | 12 | AggregateFunctionSequenceMatchData() { reset(); } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEEC2Ev Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEEC2Ev _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEEC2Ev Line | Count | Source | 80 | 9 | AggregateFunctionSequenceMatchData() { reset(); } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEEC2Ev Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEEC2Ev |
81 | | |
82 | | public: |
83 | 11 | const std::string get_pattern() const { return pattern; }_ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE11get_patternB5cxx11Ev Line | Count | Source | 83 | 6 | const std::string get_pattern() const { return pattern; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE11get_patternB5cxx11Ev Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE11get_patternB5cxx11Ev _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE11get_patternB5cxx11Ev Line | Count | Source | 83 | 5 | const std::string get_pattern() const { return pattern; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE11get_patternB5cxx11Ev Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE11get_patternB5cxx11Ev |
84 | | |
85 | 11 | size_t get_arg_count() const { return arg_count; }_ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13get_arg_countEv Line | Count | Source | 85 | 6 | size_t get_arg_count() const { return arg_count; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13get_arg_countEv Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13get_arg_countEv _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13get_arg_countEv Line | Count | Source | 85 | 5 | size_t get_arg_count() const { return arg_count; } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13get_arg_countEv Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13get_arg_countEv |
86 | | |
87 | 36 | void init(const std::string pattern_, size_t arg_count_) { |
88 | 36 | if (!init_flag) { |
89 | 19 | this->pattern = pattern_; |
90 | 19 | this->arg_count = arg_count_; |
91 | 19 | parse_pattern(); |
92 | 19 | init_flag = true; |
93 | 19 | } |
94 | 36 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 87 | 21 | void init(const std::string pattern_, size_t arg_count_) { | 88 | 21 | if (!init_flag) { | 89 | 11 | this->pattern = pattern_; | 90 | 11 | this->arg_count = arg_count_; | 91 | 11 | parse_pattern(); | 92 | 11 | init_flag = true; | 93 | 11 | } | 94 | 21 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Line | Count | Source | 87 | 15 | void init(const std::string pattern_, size_t arg_count_) { | 88 | 15 | if (!init_flag) { | 89 | 8 | this->pattern = pattern_; | 90 | 8 | this->arg_count = arg_count_; | 91 | 8 | parse_pattern(); | 92 | 8 | init_flag = true; | 93 | 8 | } | 94 | 15 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE4initENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEm |
95 | | |
96 | 21 | void reset() { |
97 | 21 | sorted = true; |
98 | 21 | init_flag = false; |
99 | 21 | pattern_has_time = false; |
100 | 21 | pattern = ""; |
101 | 21 | arg_count = 0; |
102 | 21 | conditions_met.reset(); |
103 | 21 | conditions_in_pattern.reset(); |
104 | | |
105 | 21 | events_list.clear(); |
106 | 21 | actions.clear(); |
107 | 21 | dfa_states.clear(); |
108 | 21 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5resetEv Line | Count | Source | 96 | 12 | void reset() { | 97 | 12 | sorted = true; | 98 | 12 | init_flag = false; | 99 | 12 | pattern_has_time = false; | 100 | 12 | pattern = ""; | 101 | 12 | arg_count = 0; | 102 | 12 | conditions_met.reset(); | 103 | 12 | conditions_in_pattern.reset(); | 104 | | | 105 | 12 | events_list.clear(); | 106 | 12 | actions.clear(); | 107 | 12 | dfa_states.clear(); | 108 | 12 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5resetEv Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE5resetEv _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5resetEv Line | Count | Source | 96 | 9 | void reset() { | 97 | 9 | sorted = true; | 98 | 9 | init_flag = false; | 99 | 9 | pattern_has_time = false; | 100 | 9 | pattern = ""; | 101 | 9 | arg_count = 0; | 102 | 9 | conditions_met.reset(); | 103 | 9 | conditions_in_pattern.reset(); | 104 | | | 105 | 9 | events_list.clear(); | 106 | 9 | actions.clear(); | 107 | 9 | dfa_states.clear(); | 108 | 9 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5resetEv Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE5resetEv |
109 | | |
110 | 25 | void add(const Timestamp& timestamp, const Events& events) { |
111 | | /// store information exclusively for rows with at least one event |
112 | 25 | if (events.any()) { |
113 | 22 | events_list.emplace_back(timestamp, events); |
114 | 22 | sorted = false; |
115 | 22 | conditions_met |= events; |
116 | 22 | } |
117 | 25 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE3addERKNS_11DateV2ValueINS_19DateTimeV2ValueTypeEEERKSt6bitsetILm32EE Line | Count | Source | 110 | 15 | void add(const Timestamp& timestamp, const Events& events) { | 111 | | /// store information exclusively for rows with at least one event | 112 | 15 | if (events.any()) { | 113 | 12 | events_list.emplace_back(timestamp, events); | 114 | 12 | sorted = false; | 115 | 12 | conditions_met |= events; | 116 | 12 | } | 117 | 15 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE3addERKNS_11DateV2ValueINS_15DateV2ValueTypeEEERKSt6bitsetILm32EE Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE3addERKNS_16TimestampTzValueERKSt6bitsetILm32EE _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE3addERKNS_11DateV2ValueINS_19DateTimeV2ValueTypeEEERKSt6bitsetILm32EE Line | Count | Source | 110 | 10 | void add(const Timestamp& timestamp, const Events& events) { | 111 | | /// store information exclusively for rows with at least one event | 112 | 10 | if (events.any()) { | 113 | 10 | events_list.emplace_back(timestamp, events); | 114 | 10 | sorted = false; | 115 | 10 | conditions_met |= events; | 116 | 10 | } | 117 | 10 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE3addERKNS_11DateV2ValueINS_15DateV2ValueTypeEEERKSt6bitsetILm32EE Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE3addERKNS_16TimestampTzValueERKSt6bitsetILm32EE |
118 | | |
119 | 4 | void merge(const AggregateFunctionSequenceMatchData& other) { |
120 | 4 | if (other.events_list.empty()) return; |
121 | | |
122 | 2 | events_list.insert(std::end(events_list), std::begin(other.events_list), |
123 | 2 | std::end(other.events_list)); |
124 | 2 | sorted = false; |
125 | 2 | conditions_met |= other.conditions_met; |
126 | 2 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5mergeERKS4_ Line | Count | Source | 119 | 2 | void merge(const AggregateFunctionSequenceMatchData& other) { | 120 | 2 | if (other.events_list.empty()) return; | 121 | | | 122 | 1 | events_list.insert(std::end(events_list), std::begin(other.events_list), | 123 | 1 | std::end(other.events_list)); | 124 | 1 | sorted = false; | 125 | 1 | conditions_met |= other.conditions_met; | 126 | 1 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5mergeERKS4_ Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE5mergeERKS4_ _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5mergeERKS4_ Line | Count | Source | 119 | 2 | void merge(const AggregateFunctionSequenceMatchData& other) { | 120 | 2 | if (other.events_list.empty()) return; | 121 | | | 122 | 1 | events_list.insert(std::end(events_list), std::begin(other.events_list), | 123 | 1 | std::end(other.events_list)); | 124 | 1 | sorted = false; | 125 | 1 | conditions_met |= other.conditions_met; | 126 | 1 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5mergeERKS4_ Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE5mergeERKS4_ |
127 | | |
128 | | // todo: rethink the sort method. |
129 | 9 | void sort() { |
130 | 9 | if (sorted) return; |
131 | | |
132 | 9 | std::sort(std::begin(events_list), std::end(events_list), Comparator {}); |
133 | 9 | sorted = true; |
134 | 9 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE4sortEv Line | Count | Source | 129 | 5 | void sort() { | 130 | 5 | if (sorted) return; | 131 | | | 132 | 5 | std::sort(std::begin(events_list), std::end(events_list), Comparator {}); | 133 | 5 | sorted = true; | 134 | 5 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE4sortEv Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE4sortEv _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE4sortEv Line | Count | Source | 129 | 4 | void sort() { | 130 | 4 | if (sorted) return; | 131 | | | 132 | 4 | std::sort(std::begin(events_list), std::end(events_list), Comparator {}); | 133 | 4 | sorted = true; | 134 | 4 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE4sortEv Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE4sortEv |
135 | | |
136 | 7 | void write(BufferWritable& buf) const { |
137 | 7 | buf.write_binary(sorted); |
138 | 7 | buf.write_binary(events_list.size()); |
139 | | |
140 | 13 | for (const auto& events : events_list) { |
141 | 13 | buf.write_binary(events.first); |
142 | 13 | buf.write_binary(events.second.to_ulong()); |
143 | 13 | } |
144 | | |
145 | | // This is std::bitset<32>, which will not exceed 32 bits. |
146 | 7 | UInt32 conditions_met_value = (UInt32)conditions_met.to_ulong(); |
147 | 7 | buf.write_binary(conditions_met_value); |
148 | | |
149 | 7 | buf.write_binary(pattern); |
150 | 7 | buf.write_binary(arg_count); |
151 | 7 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5writeERNS_14BufferWritableE Line | Count | Source | 136 | 4 | void write(BufferWritable& buf) const { | 137 | 4 | buf.write_binary(sorted); | 138 | 4 | buf.write_binary(events_list.size()); | 139 | | | 140 | 7 | for (const auto& events : events_list) { | 141 | 7 | buf.write_binary(events.first); | 142 | 7 | buf.write_binary(events.second.to_ulong()); | 143 | 7 | } | 144 | | | 145 | | // This is std::bitset<32>, which will not exceed 32 bits. | 146 | 4 | UInt32 conditions_met_value = (UInt32)conditions_met.to_ulong(); | 147 | 4 | buf.write_binary(conditions_met_value); | 148 | | | 149 | 4 | buf.write_binary(pattern); | 150 | 4 | buf.write_binary(arg_count); | 151 | 4 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5writeERNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE5writeERNS_14BufferWritableE _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5writeERNS_14BufferWritableE Line | Count | Source | 136 | 3 | void write(BufferWritable& buf) const { | 137 | 3 | buf.write_binary(sorted); | 138 | 3 | buf.write_binary(events_list.size()); | 139 | | | 140 | 6 | for (const auto& events : events_list) { | 141 | 6 | buf.write_binary(events.first); | 142 | 6 | buf.write_binary(events.second.to_ulong()); | 143 | 6 | } | 144 | | | 145 | | // This is std::bitset<32>, which will not exceed 32 bits. | 146 | 3 | UInt32 conditions_met_value = (UInt32)conditions_met.to_ulong(); | 147 | 3 | buf.write_binary(conditions_met_value); | 148 | | | 149 | 3 | buf.write_binary(pattern); | 150 | 3 | buf.write_binary(arg_count); | 151 | 3 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5writeERNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE5writeERNS_14BufferWritableE |
152 | | |
153 | 7 | void read(BufferReadable& buf) { |
154 | 7 | buf.read_binary(sorted); |
155 | | |
156 | 7 | size_t events_list_size; |
157 | 7 | buf.read_binary(events_list_size); |
158 | | |
159 | 7 | events_list.clear(); |
160 | 7 | events_list.reserve(events_list_size); |
161 | | |
162 | 20 | for (size_t i = 0; i < events_list_size; ++i) { |
163 | 13 | Timestamp timestamp; |
164 | 13 | buf.read_binary(timestamp); |
165 | | |
166 | 13 | UInt64 events; |
167 | 13 | buf.read_binary(events); |
168 | | |
169 | 13 | events_list.emplace_back(timestamp, Events {events}); |
170 | 13 | } |
171 | | |
172 | 7 | UInt32 conditions_met_value; |
173 | 7 | buf.read_binary(conditions_met_value); |
174 | 7 | conditions_met = conditions_met_value; |
175 | | |
176 | 7 | buf.read_binary(pattern); |
177 | 7 | buf.read_binary(arg_count); |
178 | 7 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE4readERNS_14BufferReadableE Line | Count | Source | 153 | 4 | void read(BufferReadable& buf) { | 154 | 4 | buf.read_binary(sorted); | 155 | | | 156 | 4 | size_t events_list_size; | 157 | 4 | buf.read_binary(events_list_size); | 158 | | | 159 | 4 | events_list.clear(); | 160 | 4 | events_list.reserve(events_list_size); | 161 | | | 162 | 11 | for (size_t i = 0; i < events_list_size; ++i) { | 163 | 7 | Timestamp timestamp; | 164 | 7 | buf.read_binary(timestamp); | 165 | | | 166 | 7 | UInt64 events; | 167 | 7 | buf.read_binary(events); | 168 | | | 169 | 7 | events_list.emplace_back(timestamp, Events {events}); | 170 | 7 | } | 171 | | | 172 | 4 | UInt32 conditions_met_value; | 173 | 4 | buf.read_binary(conditions_met_value); | 174 | 4 | conditions_met = conditions_met_value; | 175 | | | 176 | 4 | buf.read_binary(pattern); | 177 | 4 | buf.read_binary(arg_count); | 178 | 4 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE4readERNS_14BufferReadableE Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE4readERNS_14BufferReadableE _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE4readERNS_14BufferReadableE Line | Count | Source | 153 | 3 | void read(BufferReadable& buf) { | 154 | 3 | buf.read_binary(sorted); | 155 | | | 156 | 3 | size_t events_list_size; | 157 | 3 | buf.read_binary(events_list_size); | 158 | | | 159 | 3 | events_list.clear(); | 160 | 3 | events_list.reserve(events_list_size); | 161 | | | 162 | 9 | for (size_t i = 0; i < events_list_size; ++i) { | 163 | 6 | Timestamp timestamp; | 164 | 6 | buf.read_binary(timestamp); | 165 | | | 166 | 6 | UInt64 events; | 167 | 6 | buf.read_binary(events); | 168 | | | 169 | 6 | events_list.emplace_back(timestamp, Events {events}); | 170 | 6 | } | 171 | | | 172 | 3 | UInt32 conditions_met_value; | 173 | 3 | buf.read_binary(conditions_met_value); | 174 | 3 | conditions_met = conditions_met_value; | 175 | | | 176 | 3 | buf.read_binary(pattern); | 177 | 3 | buf.read_binary(arg_count); | 178 | 3 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE4readERNS_14BufferReadableE Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE4readERNS_14BufferReadableE |
179 | | |
180 | | private: |
181 | | enum class PatternActionType { |
182 | | SpecificEvent, |
183 | | AnyEvent, |
184 | | KleeneStar, |
185 | | TimeLessOrEqual, |
186 | | TimeLess, |
187 | | TimeGreaterOrEqual, |
188 | | TimeGreater, |
189 | | TimeEqual |
190 | | }; |
191 | | |
192 | | struct PatternAction final { |
193 | | PatternActionType type; |
194 | | std::uint64_t extra; |
195 | | |
196 | | PatternAction() = default; |
197 | | explicit PatternAction(const PatternActionType type_, const std::uint64_t extra_ = 0) |
198 | 46 | : type {type_}, extra {extra_} {}_ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13PatternActionC2ENS4_17PatternActionTypeEm Line | Count | Source | 198 | 28 | : type {type_}, extra {extra_} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13PatternActionC2ENS4_17PatternActionTypeEm Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13PatternActionC2ENS4_17PatternActionTypeEm _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13PatternActionC2ENS4_17PatternActionTypeEm Line | Count | Source | 198 | 18 | : type {type_}, extra {extra_} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13PatternActionC2ENS4_17PatternActionTypeEm Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13PatternActionC2ENS4_17PatternActionTypeEm |
199 | | }; |
200 | | |
201 | | using PatternActions = PODArrayWithStackMemory<PatternAction, 64>; |
202 | | |
203 | | Derived& derived() { return assert_cast<Derived&, TypeCheckOnRelease::DISABLE>(*this); } |
204 | | |
205 | 19 | void parse_pattern() { |
206 | 19 | actions.clear(); |
207 | 19 | actions.emplace_back(PatternActionType::KleeneStar); |
208 | | |
209 | 19 | dfa_states.clear(); |
210 | 19 | dfa_states.emplace_back(true); |
211 | | |
212 | 19 | pattern_has_time = false; |
213 | 19 | conditions_in_pattern.reset(); |
214 | | |
215 | 19 | const char* pos = pattern.data(); |
216 | 19 | const char* begin = pos; |
217 | 19 | const char* end = pos + pattern.size(); |
218 | 19 | const size_t event_count = arg_count - 2; |
219 | | |
220 | | // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. |
221 | 19 | auto fail_parse = [&]() { |
222 | 5 | actions.clear(); |
223 | 5 | dfa_states.clear(); |
224 | 5 | conditions_in_pattern.reset(); |
225 | 5 | pattern_has_time = false; |
226 | 5 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENKUlvE_clEv Line | Count | Source | 221 | 3 | auto fail_parse = [&]() { | 222 | 3 | actions.clear(); | 223 | 3 | dfa_states.clear(); | 224 | 3 | conditions_in_pattern.reset(); | 225 | 3 | pattern_has_time = false; | 226 | 3 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlvE_clEv Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13parse_patternEvENKUlvE_clEv _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENKUlvE_clEv Line | Count | Source | 221 | 2 | auto fail_parse = [&]() { | 222 | 2 | actions.clear(); | 223 | 2 | dfa_states.clear(); | 224 | 2 | conditions_in_pattern.reset(); | 225 | 2 | pattern_has_time = false; | 226 | 2 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlvE_clEv Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13parse_patternEvENKUlvE_clEv |
227 | | |
228 | 19 | auto throw_exception = [&](const std::string& msg) { |
229 | 5 | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + |
230 | 5 | std::to_string(pos - begin); |
231 | 5 | fail_parse(); |
232 | 5 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Line | Count | Source | 228 | 3 | auto throw_exception = [&](const std::string& msg) { | 229 | | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 230 | 3 | std::to_string(pos - begin); | 231 | 3 | fail_parse(); | 232 | 3 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Line | Count | Source | 228 | 2 | auto throw_exception = [&](const std::string& msg) { | 229 | | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 230 | 2 | std::to_string(pos - begin); | 231 | 2 | fail_parse(); | 232 | 2 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13parse_patternEvENKUlRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE_clESC_ |
233 | | |
234 | 94 | auto match = [&pos, end](const char* str) mutable { |
235 | 94 | size_t length = strlen(str); |
236 | 94 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { |
237 | 65 | pos += length; |
238 | 65 | return true; |
239 | 65 | } |
240 | 29 | return false; |
241 | 94 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENUlPKcE_clES6_ Line | Count | Source | 234 | 60 | auto match = [&pos, end](const char* str) mutable { | 235 | 60 | size_t length = strlen(str); | 236 | 60 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 237 | 43 | pos += length; | 238 | 43 | return true; | 239 | 43 | } | 240 | 17 | return false; | 241 | 60 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENUlPKcE_clES6_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13parse_patternEvENUlPKcE_clES6_ _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENUlPKcE_clES6_ Line | Count | Source | 234 | 34 | auto match = [&pos, end](const char* str) mutable { | 235 | 34 | size_t length = strlen(str); | 236 | 34 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 237 | 22 | pos += length; | 238 | 22 | return true; | 239 | 22 | } | 240 | 12 | return false; | 241 | 34 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENUlPKcE_clES6_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13parse_patternEvENUlPKcE_clES6_ |
242 | | |
243 | 32 | auto parse_uint = [&pos, end](auto& value) { |
244 | 32 | const auto* start = pos; |
245 | 65 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { |
246 | 33 | ++pos; |
247 | 33 | } |
248 | | |
249 | 32 | if (pos == start) { |
250 | 1 | return false; |
251 | 1 | } |
252 | | |
253 | 31 | StringParser::ParseResult result; |
254 | 31 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( |
255 | 31 | start, pos - start, &result); |
256 | 31 | return result == StringParser::PARSE_SUCCESS; |
257 | 32 | }; _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ Line | Count | Source | 243 | 20 | auto parse_uint = [&pos, end](auto& value) { | 244 | 20 | const auto* start = pos; | 245 | 40 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 246 | 20 | ++pos; | 247 | 20 | } | 248 | | | 249 | 20 | if (pos == start) { | 250 | 1 | return false; | 251 | 1 | } | 252 | | | 253 | 19 | StringParser::ParseResult result; | 254 | 19 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 255 | 19 | start, pos - start, &result); | 256 | 19 | return result == StringParser::PARSE_SUCCESS; | 257 | 20 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ Line | Count | Source | 243 | 12 | auto parse_uint = [&pos, end](auto& value) { | 244 | 12 | const auto* start = pos; | 245 | 25 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 246 | 13 | ++pos; | 247 | 13 | } | 248 | | | 249 | 12 | if (pos == start) { | 250 | 0 | return false; | 251 | 0 | } | 252 | | | 253 | 12 | StringParser::ParseResult result; | 254 | 12 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 255 | 12 | start, pos - start, &result); | 256 | 12 | return result == StringParser::PARSE_SUCCESS; | 257 | 12 | }; |
Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ Unexecuted instantiation: _ZZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13parse_patternEvENKUlRT_E_clImEEDaS6_ |
258 | | |
259 | 46 | while (pos < end) { |
260 | 32 | if (match("(?")) { |
261 | 32 | if (match("t")) { |
262 | 3 | PatternActionType type; |
263 | | |
264 | 3 | if (match("<=")) |
265 | 3 | type = PatternActionType::TimeLessOrEqual; |
266 | 0 | else if (match("<")) |
267 | 0 | type = PatternActionType::TimeLess; |
268 | 0 | else if (match(">=")) |
269 | 0 | type = PatternActionType::TimeGreaterOrEqual; |
270 | 0 | else if (match(">")) |
271 | 0 | type = PatternActionType::TimeGreater; |
272 | 0 | else if (match("==")) |
273 | 0 | type = PatternActionType::TimeEqual; |
274 | 0 | else { |
275 | 0 | throw_exception("Unknown time condition"); |
276 | 0 | return; |
277 | 0 | } |
278 | | |
279 | 3 | uint64_t duration = 0; |
280 | 3 | if (!parse_uint(duration)) { |
281 | 1 | throw_exception("Could not parse number"); |
282 | 1 | return; |
283 | 1 | } |
284 | | |
285 | 2 | if (actions.back().type != PatternActionType::SpecificEvent && |
286 | 2 | actions.back().type != PatternActionType::AnyEvent && |
287 | 2 | actions.back().type != PatternActionType::KleeneStar) { |
288 | 0 | throw_exception( |
289 | 0 | "Temporal condition should be preceded by an event condition"); |
290 | 0 | return; |
291 | 0 | } |
292 | | |
293 | 2 | pattern_has_time = true; |
294 | 2 | actions.emplace_back(type, duration); |
295 | 29 | } else { |
296 | 29 | UInt64 event_number = 0; |
297 | 29 | if (!parse_uint(event_number)) { |
298 | 0 | throw_exception("Could not parse number"); |
299 | 0 | return; |
300 | 0 | } |
301 | | |
302 | 29 | if (event_number == 0 || event_number > event_count) { |
303 | 4 | throw_exception("Event number " + std::to_string(event_number) + |
304 | 4 | " is out of range"); |
305 | 4 | return; |
306 | 4 | } |
307 | | |
308 | 25 | const auto event_index = event_number - 1; |
309 | 25 | actions.emplace_back(PatternActionType::SpecificEvent, event_index); |
310 | 25 | dfa_states.back().transition = DFATransition::SpecificEvent; |
311 | 25 | dfa_states.back().event = static_cast<uint32_t>(event_index); |
312 | 25 | dfa_states.emplace_back(); |
313 | 25 | conditions_in_pattern.set(event_index); |
314 | 25 | } |
315 | | |
316 | 27 | if (!match(")")) { |
317 | 0 | throw_exception("Expected closing parenthesis, found"); |
318 | 0 | return; |
319 | 0 | } |
320 | | |
321 | 27 | } else if (match(".*")) { |
322 | 0 | actions.emplace_back(PatternActionType::KleeneStar); |
323 | 0 | dfa_states.back().has_kleene = true; |
324 | 0 | } else if (match(".")) { |
325 | 0 | actions.emplace_back(PatternActionType::AnyEvent); |
326 | 0 | dfa_states.back().transition = DFATransition::AnyEvent; |
327 | 0 | dfa_states.emplace_back(); |
328 | 0 | } else { |
329 | 0 | throw_exception("Could not parse pattern, unexpected starting symbol"); |
330 | 0 | return; |
331 | 0 | } |
332 | 32 | } |
333 | 19 | } _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE13parse_patternEv Line | Count | Source | 205 | 11 | void parse_pattern() { | 206 | 11 | actions.clear(); | 207 | 11 | actions.emplace_back(PatternActionType::KleeneStar); | 208 | | | 209 | 11 | dfa_states.clear(); | 210 | 11 | dfa_states.emplace_back(true); | 211 | | | 212 | 11 | pattern_has_time = false; | 213 | 11 | conditions_in_pattern.reset(); | 214 | | | 215 | 11 | const char* pos = pattern.data(); | 216 | 11 | const char* begin = pos; | 217 | 11 | const char* end = pos + pattern.size(); | 218 | 11 | const size_t event_count = arg_count - 2; | 219 | | | 220 | | // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. | 221 | 11 | auto fail_parse = [&]() { | 222 | 11 | actions.clear(); | 223 | 11 | dfa_states.clear(); | 224 | 11 | conditions_in_pattern.reset(); | 225 | 11 | pattern_has_time = false; | 226 | 11 | }; | 227 | | | 228 | 11 | auto throw_exception = [&](const std::string& msg) { | 229 | 11 | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 230 | 11 | std::to_string(pos - begin); | 231 | 11 | fail_parse(); | 232 | 11 | }; | 233 | | | 234 | 11 | auto match = [&pos, end](const char* str) mutable { | 235 | 11 | size_t length = strlen(str); | 236 | 11 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 237 | 11 | pos += length; | 238 | 11 | return true; | 239 | 11 | } | 240 | 11 | return false; | 241 | 11 | }; | 242 | | | 243 | 11 | auto parse_uint = [&pos, end](auto& value) { | 244 | 11 | const auto* start = pos; | 245 | 11 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 246 | 11 | ++pos; | 247 | 11 | } | 248 | | | 249 | 11 | if (pos == start) { | 250 | 11 | return false; | 251 | 11 | } | 252 | | | 253 | 11 | StringParser::ParseResult result; | 254 | 11 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 255 | 11 | start, pos - start, &result); | 256 | 11 | return result == StringParser::PARSE_SUCCESS; | 257 | 11 | }; | 258 | | | 259 | 28 | while (pos < end) { | 260 | 20 | if (match("(?")) { | 261 | 20 | if (match("t")) { | 262 | 3 | PatternActionType type; | 263 | | | 264 | 3 | if (match("<=")) | 265 | 3 | type = PatternActionType::TimeLessOrEqual; | 266 | 0 | else if (match("<")) | 267 | 0 | type = PatternActionType::TimeLess; | 268 | 0 | else if (match(">=")) | 269 | 0 | type = PatternActionType::TimeGreaterOrEqual; | 270 | 0 | else if (match(">")) | 271 | 0 | type = PatternActionType::TimeGreater; | 272 | 0 | else if (match("==")) | 273 | 0 | type = PatternActionType::TimeEqual; | 274 | 0 | else { | 275 | 0 | throw_exception("Unknown time condition"); | 276 | 0 | return; | 277 | 0 | } | 278 | | | 279 | 3 | uint64_t duration = 0; | 280 | 3 | if (!parse_uint(duration)) { | 281 | 1 | throw_exception("Could not parse number"); | 282 | 1 | return; | 283 | 1 | } | 284 | | | 285 | 2 | if (actions.back().type != PatternActionType::SpecificEvent && | 286 | 2 | actions.back().type != PatternActionType::AnyEvent && | 287 | 2 | actions.back().type != PatternActionType::KleeneStar) { | 288 | 0 | throw_exception( | 289 | 0 | "Temporal condition should be preceded by an event condition"); | 290 | 0 | return; | 291 | 0 | } | 292 | | | 293 | 2 | pattern_has_time = true; | 294 | 2 | actions.emplace_back(type, duration); | 295 | 17 | } else { | 296 | 17 | UInt64 event_number = 0; | 297 | 17 | if (!parse_uint(event_number)) { | 298 | 0 | throw_exception("Could not parse number"); | 299 | 0 | return; | 300 | 0 | } | 301 | | | 302 | 17 | if (event_number == 0 || event_number > event_count) { | 303 | 2 | throw_exception("Event number " + std::to_string(event_number) + | 304 | 2 | " is out of range"); | 305 | 2 | return; | 306 | 2 | } | 307 | | | 308 | 15 | const auto event_index = event_number - 1; | 309 | 15 | actions.emplace_back(PatternActionType::SpecificEvent, event_index); | 310 | 15 | dfa_states.back().transition = DFATransition::SpecificEvent; | 311 | 15 | dfa_states.back().event = static_cast<uint32_t>(event_index); | 312 | 15 | dfa_states.emplace_back(); | 313 | 15 | conditions_in_pattern.set(event_index); | 314 | 15 | } | 315 | | | 316 | 17 | if (!match(")")) { | 317 | 0 | throw_exception("Expected closing parenthesis, found"); | 318 | 0 | return; | 319 | 0 | } | 320 | | | 321 | 17 | } else if (match(".*")) { | 322 | 0 | actions.emplace_back(PatternActionType::KleeneStar); | 323 | 0 | dfa_states.back().has_kleene = true; | 324 | 0 | } else if (match(".")) { | 325 | 0 | actions.emplace_back(PatternActionType::AnyEvent); | 326 | 0 | dfa_states.back().transition = DFATransition::AnyEvent; | 327 | 0 | dfa_states.emplace_back(); | 328 | 0 | } else { | 329 | 0 | throw_exception("Could not parse pattern, unexpected starting symbol"); | 330 | 0 | return; | 331 | 0 | } | 332 | 20 | } | 333 | 11 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE13parse_patternEv Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE13parse_patternEv _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE13parse_patternEv Line | Count | Source | 205 | 8 | void parse_pattern() { | 206 | 8 | actions.clear(); | 207 | 8 | actions.emplace_back(PatternActionType::KleeneStar); | 208 | | | 209 | 8 | dfa_states.clear(); | 210 | 8 | dfa_states.emplace_back(true); | 211 | | | 212 | 8 | pattern_has_time = false; | 213 | 8 | conditions_in_pattern.reset(); | 214 | | | 215 | 8 | const char* pos = pattern.data(); | 216 | 8 | const char* begin = pos; | 217 | 8 | const char* end = pos + pattern.size(); | 218 | 8 | const size_t event_count = arg_count - 2; | 219 | | | 220 | | // Pattern is checked in fe, so pattern should be valid here, we check it and if pattern is invalid, we return. | 221 | 8 | auto fail_parse = [&]() { | 222 | 8 | actions.clear(); | 223 | 8 | dfa_states.clear(); | 224 | 8 | conditions_in_pattern.reset(); | 225 | 8 | pattern_has_time = false; | 226 | 8 | }; | 227 | | | 228 | 8 | auto throw_exception = [&](const std::string& msg) { | 229 | 8 | LOG(WARNING) << msg + " '" + std::string(pos, end) + "' at position " + | 230 | 8 | std::to_string(pos - begin); | 231 | 8 | fail_parse(); | 232 | 8 | }; | 233 | | | 234 | 8 | auto match = [&pos, end](const char* str) mutable { | 235 | 8 | size_t length = strlen(str); | 236 | 8 | if (pos + length <= end && 0 == memcmp(pos, str, length)) { | 237 | 8 | pos += length; | 238 | 8 | return true; | 239 | 8 | } | 240 | 8 | return false; | 241 | 8 | }; | 242 | | | 243 | 8 | auto parse_uint = [&pos, end](auto& value) { | 244 | 8 | const auto* start = pos; | 245 | 8 | while (pos < end && std::isdigit(static_cast<unsigned char>(*pos))) { | 246 | 8 | ++pos; | 247 | 8 | } | 248 | | | 249 | 8 | if (pos == start) { | 250 | 8 | return false; | 251 | 8 | } | 252 | | | 253 | 8 | StringParser::ParseResult result; | 254 | 8 | value = StringParser::string_to_int<std::decay_t<decltype(value)>, false>( | 255 | 8 | start, pos - start, &result); | 256 | 8 | return result == StringParser::PARSE_SUCCESS; | 257 | 8 | }; | 258 | | | 259 | 18 | while (pos < end) { | 260 | 12 | if (match("(?")) { | 261 | 12 | if (match("t")) { | 262 | 0 | PatternActionType type; | 263 | |
| 264 | 0 | if (match("<=")) | 265 | 0 | type = PatternActionType::TimeLessOrEqual; | 266 | 0 | else if (match("<")) | 267 | 0 | type = PatternActionType::TimeLess; | 268 | 0 | else if (match(">=")) | 269 | 0 | type = PatternActionType::TimeGreaterOrEqual; | 270 | 0 | else if (match(">")) | 271 | 0 | type = PatternActionType::TimeGreater; | 272 | 0 | else if (match("==")) | 273 | 0 | type = PatternActionType::TimeEqual; | 274 | 0 | else { | 275 | 0 | throw_exception("Unknown time condition"); | 276 | 0 | return; | 277 | 0 | } | 278 | | | 279 | 0 | uint64_t duration = 0; | 280 | 0 | if (!parse_uint(duration)) { | 281 | 0 | throw_exception("Could not parse number"); | 282 | 0 | return; | 283 | 0 | } | 284 | | | 285 | 0 | if (actions.back().type != PatternActionType::SpecificEvent && | 286 | 0 | actions.back().type != PatternActionType::AnyEvent && | 287 | 0 | actions.back().type != PatternActionType::KleeneStar) { | 288 | 0 | throw_exception( | 289 | 0 | "Temporal condition should be preceded by an event condition"); | 290 | 0 | return; | 291 | 0 | } | 292 | | | 293 | 0 | pattern_has_time = true; | 294 | 0 | actions.emplace_back(type, duration); | 295 | 12 | } else { | 296 | 12 | UInt64 event_number = 0; | 297 | 12 | if (!parse_uint(event_number)) { | 298 | 0 | throw_exception("Could not parse number"); | 299 | 0 | return; | 300 | 0 | } | 301 | | | 302 | 12 | if (event_number == 0 || event_number > event_count) { | 303 | 2 | throw_exception("Event number " + std::to_string(event_number) + | 304 | 2 | " is out of range"); | 305 | 2 | return; | 306 | 2 | } | 307 | | | 308 | 10 | const auto event_index = event_number - 1; | 309 | 10 | actions.emplace_back(PatternActionType::SpecificEvent, event_index); | 310 | 10 | dfa_states.back().transition = DFATransition::SpecificEvent; | 311 | 10 | dfa_states.back().event = static_cast<uint32_t>(event_index); | 312 | 10 | dfa_states.emplace_back(); | 313 | 10 | conditions_in_pattern.set(event_index); | 314 | 10 | } | 315 | | | 316 | 10 | if (!match(")")) { | 317 | 0 | throw_exception("Expected closing parenthesis, found"); | 318 | 0 | return; | 319 | 0 | } | 320 | | | 321 | 10 | } else if (match(".*")) { | 322 | 0 | actions.emplace_back(PatternActionType::KleeneStar); | 323 | 0 | dfa_states.back().has_kleene = true; | 324 | 0 | } else if (match(".")) { | 325 | 0 | actions.emplace_back(PatternActionType::AnyEvent); | 326 | 0 | dfa_states.back().transition = DFATransition::AnyEvent; | 327 | 0 | dfa_states.emplace_back(); | 328 | 0 | } else { | 329 | 0 | throw_exception("Could not parse pattern, unexpected starting symbol"); | 330 | 0 | return; | 331 | 0 | } | 332 | 12 | } | 333 | 8 | } |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE13parse_patternEv Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE13parse_patternEv |
334 | | |
335 | | public: |
336 | | /// Uses a DFA based approach in order to better handle patterns without |
337 | | /// time assertions. |
338 | | /// |
339 | | /// NOTE: This implementation relies on the assumption that the pattern is *small*. |
340 | | /// |
341 | | /// This algorithm performs in O(mn) (with m the number of DFA states and N the number |
342 | | /// of events) with a memory consumption and memory allocations in O(m). It means that |
343 | | /// if n >>> m (which is expected to be the case), this algorithm can be considered linear. |
344 | | template <typename EventEntry> |
345 | 3 | bool dfa_match(EventEntry& events_it, const EventEntry events_end) const { |
346 | 3 | using ActiveStates = std::vector<bool>; |
347 | | /// Those two vectors keep track of which states should be considered for the current |
348 | | /// event as well as the states which should be considered for the next event. |
349 | 3 | ActiveStates active_states(dfa_states.size(), false); |
350 | 3 | ActiveStates next_active_states(dfa_states.size(), false); |
351 | 3 | active_states[0] = true; |
352 | | |
353 | | /// Keeps track of dead-ends in order not to iterate over all the events to realize that |
354 | | /// the match failed. |
355 | 3 | size_t n_active = 1; |
356 | | |
357 | 9 | for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); |
358 | 6 | ++events_it) { |
359 | 6 | n_active = 0; |
360 | 6 | next_active_states.assign(dfa_states.size(), false); |
361 | | |
362 | 24 | for (size_t state = 0; state < dfa_states.size(); ++state) { |
363 | 18 | if (!active_states[state]) { |
364 | 9 | continue; |
365 | 9 | } |
366 | | |
367 | 9 | switch (dfa_states[state].transition) { |
368 | 0 | case DFATransition::None: |
369 | 0 | break; |
370 | 0 | case DFATransition::AnyEvent: |
371 | 0 | next_active_states[state + 1] = true; |
372 | 0 | ++n_active; |
373 | 0 | break; |
374 | 9 | case DFATransition::SpecificEvent: |
375 | 9 | if (events_it->second.test(dfa_states[state].event)) { |
376 | 6 | next_active_states[state + 1] = true; |
377 | 6 | ++n_active; |
378 | 6 | } |
379 | 9 | break; |
380 | 9 | } |
381 | | |
382 | 9 | if (dfa_states[state].has_kleene) { |
383 | 6 | next_active_states[state] = true; |
384 | 6 | ++n_active; |
385 | 6 | } |
386 | 9 | } |
387 | 6 | swap(active_states, next_active_states); |
388 | 6 | } |
389 | | |
390 | 3 | return active_states.back(); |
391 | 3 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE9dfa_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Line | Count | Source | 345 | 3 | bool dfa_match(EventEntry& events_it, const EventEntry events_end) const { | 346 | 3 | using ActiveStates = std::vector<bool>; | 347 | | /// Those two vectors keep track of which states should be considered for the current | 348 | | /// event as well as the states which should be considered for the next event. | 349 | 3 | ActiveStates active_states(dfa_states.size(), false); | 350 | 3 | ActiveStates next_active_states(dfa_states.size(), false); | 351 | 3 | active_states[0] = true; | 352 | | | 353 | | /// Keeps track of dead-ends in order not to iterate over all the events to realize that | 354 | | /// the match failed. | 355 | 3 | size_t n_active = 1; | 356 | | | 357 | 9 | for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); | 358 | 6 | ++events_it) { | 359 | 6 | n_active = 0; | 360 | 6 | next_active_states.assign(dfa_states.size(), false); | 361 | | | 362 | 24 | for (size_t state = 0; state < dfa_states.size(); ++state) { | 363 | 18 | if (!active_states[state]) { | 364 | 9 | continue; | 365 | 9 | } | 366 | | | 367 | 9 | switch (dfa_states[state].transition) { | 368 | 0 | case DFATransition::None: | 369 | 0 | break; | 370 | 0 | case DFATransition::AnyEvent: | 371 | 0 | next_active_states[state + 1] = true; | 372 | 0 | ++n_active; | 373 | 0 | break; | 374 | 9 | case DFATransition::SpecificEvent: | 375 | 9 | if (events_it->second.test(dfa_states[state].event)) { | 376 | 6 | next_active_states[state + 1] = true; | 377 | 6 | ++n_active; | 378 | 6 | } | 379 | 9 | break; | 380 | 9 | } | 381 | | | 382 | 9 | if (dfa_states[state].has_kleene) { | 383 | 6 | next_active_states[state] = true; | 384 | 6 | ++n_active; | 385 | 6 | } | 386 | 9 | } | 387 | 6 | swap(active_states, next_active_states); | 388 | 6 | } | 389 | | | 390 | 3 | return active_states.back(); | 391 | 3 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE9dfa_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE9dfa_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbRT_SJ_ |
392 | | |
393 | | template <typename EventEntry> |
394 | 9 | bool backtracking_match(EventEntry& events_it, const EventEntry events_end) const { |
395 | 9 | const auto action_begin = std::begin(actions); |
396 | 9 | const auto action_end = std::end(actions); |
397 | 9 | auto action_it = action_begin; |
398 | | |
399 | 9 | const auto events_begin = events_it; |
400 | 9 | auto base_it = events_it; |
401 | | |
402 | | /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence |
403 | 9 | using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>; |
404 | 9 | std::stack<backtrack_info> back_stack; |
405 | | |
406 | | /// backtrack if possible |
407 | 9 | const auto do_backtrack = [&] { |
408 | 2 | while (!back_stack.empty()) { |
409 | 2 | auto& top = back_stack.top(); |
410 | | |
411 | 2 | action_it = std::get<0>(top); |
412 | 2 | events_it = std::next(std::get<1>(top)); |
413 | 2 | base_it = std::get<2>(top); |
414 | | |
415 | 2 | back_stack.pop(); |
416 | | |
417 | 2 | if (events_it != events_end) return true; |
418 | 2 | } |
419 | | |
420 | 0 | return false; |
421 | 2 | }; _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Line | Count | Source | 407 | 2 | const auto do_backtrack = [&] { | 408 | 2 | while (!back_stack.empty()) { | 409 | 2 | auto& top = back_stack.top(); | 410 | | | 411 | 2 | action_it = std::get<0>(top); | 412 | 2 | events_it = std::next(std::get<1>(top)); | 413 | 2 | base_it = std::get<2>(top); | 414 | | | 415 | 2 | back_stack.pop(); | 416 | | | 417 | 2 | if (events_it != events_end) return true; | 418 | 2 | } | 419 | | | 420 | 0 | return false; | 421 | 2 | }; |
Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbRT_SJ_ENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbRT_SJ_ENKUlvE_clEv |
422 | | |
423 | 9 | size_t i = 0; |
424 | 42 | while (action_it != action_end && events_it != events_end) { |
425 | 33 | if (action_it->type == PatternActionType::SpecificEvent) { |
426 | 20 | if (events_it->second.test(action_it->extra)) { |
427 | | /// move to the next action and events |
428 | 18 | base_it = events_it; |
429 | 18 | ++action_it, ++events_it; |
430 | 18 | } else if (!do_backtrack()) |
431 | | /// backtracking failed, bail out |
432 | 0 | break; |
433 | 20 | } else if (action_it->type == PatternActionType::AnyEvent) { |
434 | 0 | base_it = events_it; |
435 | 0 | ++action_it, ++events_it; |
436 | 13 | } else if (action_it->type == PatternActionType::KleeneStar) { |
437 | 9 | back_stack.emplace(action_it, events_it, base_it); |
438 | 9 | base_it = events_it; |
439 | 9 | ++action_it; |
440 | 9 | } else if (action_it->type == PatternActionType::TimeLessOrEqual) { |
441 | 4 | if (events_it->first.datetime_diff_in_seconds(base_it->first) <= action_it->extra) { |
442 | | /// condition satisfied, move onto next action |
443 | 4 | back_stack.emplace(action_it, events_it, base_it); |
444 | 4 | base_it = events_it; |
445 | 4 | ++action_it; |
446 | 4 | } else if (!do_backtrack()) |
447 | 0 | break; |
448 | 4 | } else if (action_it->type == PatternActionType::TimeLess) { |
449 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) < action_it->extra) { |
450 | 0 | back_stack.emplace(action_it, events_it, base_it); |
451 | 0 | base_it = events_it; |
452 | 0 | ++action_it; |
453 | 0 | } else if (!do_backtrack()) |
454 | 0 | break; |
455 | 0 | } else if (action_it->type == PatternActionType::TimeGreaterOrEqual) { |
456 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) >= action_it->extra) { |
457 | 0 | back_stack.emplace(action_it, events_it, base_it); |
458 | 0 | base_it = events_it; |
459 | 0 | ++action_it; |
460 | 0 | } else if (++events_it == events_end && !do_backtrack()) |
461 | 0 | break; |
462 | 0 | } else if (action_it->type == PatternActionType::TimeGreater) { |
463 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) > action_it->extra) { |
464 | 0 | back_stack.emplace(action_it, events_it, base_it); |
465 | 0 | base_it = events_it; |
466 | 0 | ++action_it; |
467 | 0 | } else if (++events_it == events_end && !do_backtrack()) |
468 | 0 | break; |
469 | 0 | } else if (action_it->type == PatternActionType::TimeEqual) { |
470 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) == action_it->extra) { |
471 | 0 | back_stack.emplace(action_it, events_it, base_it); |
472 | 0 | base_it = events_it; |
473 | 0 | ++action_it; |
474 | 0 | } else if (++events_it == events_end && !do_backtrack()) |
475 | 0 | break; |
476 | 0 | } else { |
477 | 0 | LOG(WARNING) << "Unknown PatternActionType"; |
478 | 0 | return false; |
479 | 0 | } |
480 | | |
481 | 33 | if (++i > sequence_match_max_iterations) { |
482 | 0 | LOG(WARNING) |
483 | 0 | << "Pattern application proves too difficult, exceeding max iterations (" + |
484 | 0 | std::to_string(sequence_match_max_iterations) + ")"; |
485 | 0 | return false; |
486 | 0 | } |
487 | 33 | } |
488 | | |
489 | | /// if there are some actions remaining |
490 | 9 | if (action_it != action_end) { |
491 | | /// match multiple empty strings at end |
492 | 0 | while (action_it->type == PatternActionType::KleeneStar || |
493 | 0 | action_it->type == PatternActionType::TimeLessOrEqual || |
494 | 0 | action_it->type == PatternActionType::TimeLess || |
495 | 0 | (action_it->type == PatternActionType::TimeGreaterOrEqual && |
496 | 0 | action_it->extra == 0)) |
497 | 0 | ++action_it; |
498 | 0 | } |
499 | | |
500 | 9 | if (events_it == events_begin) ++events_it; |
501 | | |
502 | 9 | return action_it == action_end; |
503 | 9 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Line | Count | Source | 394 | 2 | bool backtracking_match(EventEntry& events_it, const EventEntry events_end) const { | 395 | 2 | const auto action_begin = std::begin(actions); | 396 | 2 | const auto action_end = std::end(actions); | 397 | 2 | auto action_it = action_begin; | 398 | | | 399 | 2 | const auto events_begin = events_it; | 400 | 2 | auto base_it = events_it; | 401 | | | 402 | | /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence | 403 | 2 | using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>; | 404 | 2 | std::stack<backtrack_info> back_stack; | 405 | | | 406 | | /// backtrack if possible | 407 | 2 | const auto do_backtrack = [&] { | 408 | 2 | while (!back_stack.empty()) { | 409 | 2 | auto& top = back_stack.top(); | 410 | | | 411 | 2 | action_it = std::get<0>(top); | 412 | 2 | events_it = std::next(std::get<1>(top)); | 413 | 2 | base_it = std::get<2>(top); | 414 | | | 415 | 2 | back_stack.pop(); | 416 | | | 417 | 2 | if (events_it != events_end) return true; | 418 | 2 | } | 419 | | | 420 | 2 | return false; | 421 | 2 | }; | 422 | | | 423 | 2 | size_t i = 0; | 424 | 14 | while (action_it != action_end && events_it != events_end) { | 425 | 12 | if (action_it->type == PatternActionType::SpecificEvent) { | 426 | 6 | if (events_it->second.test(action_it->extra)) { | 427 | | /// move to the next action and events | 428 | 4 | base_it = events_it; | 429 | 4 | ++action_it, ++events_it; | 430 | 4 | } else if (!do_backtrack()) | 431 | | /// backtracking failed, bail out | 432 | 0 | break; | 433 | 6 | } else if (action_it->type == PatternActionType::AnyEvent) { | 434 | 0 | base_it = events_it; | 435 | 0 | ++action_it, ++events_it; | 436 | 6 | } else if (action_it->type == PatternActionType::KleeneStar) { | 437 | 2 | back_stack.emplace(action_it, events_it, base_it); | 438 | 2 | base_it = events_it; | 439 | 2 | ++action_it; | 440 | 4 | } else if (action_it->type == PatternActionType::TimeLessOrEqual) { | 441 | 4 | if (events_it->first.datetime_diff_in_seconds(base_it->first) <= action_it->extra) { | 442 | | /// condition satisfied, move onto next action | 443 | 4 | back_stack.emplace(action_it, events_it, base_it); | 444 | 4 | base_it = events_it; | 445 | 4 | ++action_it; | 446 | 4 | } else if (!do_backtrack()) | 447 | 0 | break; | 448 | 4 | } else if (action_it->type == PatternActionType::TimeLess) { | 449 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) < action_it->extra) { | 450 | 0 | back_stack.emplace(action_it, events_it, base_it); | 451 | 0 | base_it = events_it; | 452 | 0 | ++action_it; | 453 | 0 | } else if (!do_backtrack()) | 454 | 0 | break; | 455 | 0 | } else if (action_it->type == PatternActionType::TimeGreaterOrEqual) { | 456 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) >= action_it->extra) { | 457 | 0 | back_stack.emplace(action_it, events_it, base_it); | 458 | 0 | base_it = events_it; | 459 | 0 | ++action_it; | 460 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 461 | 0 | break; | 462 | 0 | } else if (action_it->type == PatternActionType::TimeGreater) { | 463 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) > action_it->extra) { | 464 | 0 | back_stack.emplace(action_it, events_it, base_it); | 465 | 0 | base_it = events_it; | 466 | 0 | ++action_it; | 467 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 468 | 0 | break; | 469 | 0 | } else if (action_it->type == PatternActionType::TimeEqual) { | 470 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) == action_it->extra) { | 471 | 0 | back_stack.emplace(action_it, events_it, base_it); | 472 | 0 | base_it = events_it; | 473 | 0 | ++action_it; | 474 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 475 | 0 | break; | 476 | 0 | } else { | 477 | 0 | LOG(WARNING) << "Unknown PatternActionType"; | 478 | 0 | return false; | 479 | 0 | } | 480 | | | 481 | 12 | if (++i > sequence_match_max_iterations) { | 482 | 0 | LOG(WARNING) | 483 | 0 | << "Pattern application proves too difficult, exceeding max iterations (" + | 484 | 0 | std::to_string(sequence_match_max_iterations) + ")"; | 485 | 0 | return false; | 486 | 0 | } | 487 | 12 | } | 488 | | | 489 | | /// if there are some actions remaining | 490 | 2 | if (action_it != action_end) { | 491 | | /// match multiple empty strings at end | 492 | 0 | while (action_it->type == PatternActionType::KleeneStar || | 493 | 0 | action_it->type == PatternActionType::TimeLessOrEqual || | 494 | 0 | action_it->type == PatternActionType::TimeLess || | 495 | 0 | (action_it->type == PatternActionType::TimeGreaterOrEqual && | 496 | 0 | action_it->extra == 0)) | 497 | 0 | ++action_it; | 498 | 0 | } | 499 | | | 500 | 2 | if (events_it == events_begin) ++events_it; | 501 | | | 502 | 2 | return action_it == action_end; | 503 | 2 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbRT_SJ_ _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Line | Count | Source | 394 | 7 | bool backtracking_match(EventEntry& events_it, const EventEntry events_end) const { | 395 | 7 | const auto action_begin = std::begin(actions); | 396 | 7 | const auto action_end = std::end(actions); | 397 | 7 | auto action_it = action_begin; | 398 | | | 399 | 7 | const auto events_begin = events_it; | 400 | 7 | auto base_it = events_it; | 401 | | | 402 | | /// an iterator to action plus an iterator to row in events list plus timestamp at the start of sequence | 403 | 7 | using backtrack_info = std::tuple<decltype(action_it), EventEntry, EventEntry>; | 404 | 7 | std::stack<backtrack_info> back_stack; | 405 | | | 406 | | /// backtrack if possible | 407 | 7 | const auto do_backtrack = [&] { | 408 | 7 | while (!back_stack.empty()) { | 409 | 7 | auto& top = back_stack.top(); | 410 | | | 411 | 7 | action_it = std::get<0>(top); | 412 | 7 | events_it = std::next(std::get<1>(top)); | 413 | 7 | base_it = std::get<2>(top); | 414 | | | 415 | 7 | back_stack.pop(); | 416 | | | 417 | 7 | if (events_it != events_end) return true; | 418 | 7 | } | 419 | | | 420 | 7 | return false; | 421 | 7 | }; | 422 | | | 423 | 7 | size_t i = 0; | 424 | 28 | while (action_it != action_end && events_it != events_end) { | 425 | 21 | if (action_it->type == PatternActionType::SpecificEvent) { | 426 | 14 | if (events_it->second.test(action_it->extra)) { | 427 | | /// move to the next action and events | 428 | 14 | base_it = events_it; | 429 | 14 | ++action_it, ++events_it; | 430 | 14 | } else if (!do_backtrack()) | 431 | | /// backtracking failed, bail out | 432 | 0 | break; | 433 | 14 | } else if (action_it->type == PatternActionType::AnyEvent) { | 434 | 0 | base_it = events_it; | 435 | 0 | ++action_it, ++events_it; | 436 | 7 | } else if (action_it->type == PatternActionType::KleeneStar) { | 437 | 7 | back_stack.emplace(action_it, events_it, base_it); | 438 | 7 | base_it = events_it; | 439 | 7 | ++action_it; | 440 | 7 | } else if (action_it->type == PatternActionType::TimeLessOrEqual) { | 441 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) <= action_it->extra) { | 442 | | /// condition satisfied, move onto next action | 443 | 0 | back_stack.emplace(action_it, events_it, base_it); | 444 | 0 | base_it = events_it; | 445 | 0 | ++action_it; | 446 | 0 | } else if (!do_backtrack()) | 447 | 0 | break; | 448 | 0 | } else if (action_it->type == PatternActionType::TimeLess) { | 449 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) < action_it->extra) { | 450 | 0 | back_stack.emplace(action_it, events_it, base_it); | 451 | 0 | base_it = events_it; | 452 | 0 | ++action_it; | 453 | 0 | } else if (!do_backtrack()) | 454 | 0 | break; | 455 | 0 | } else if (action_it->type == PatternActionType::TimeGreaterOrEqual) { | 456 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) >= action_it->extra) { | 457 | 0 | back_stack.emplace(action_it, events_it, base_it); | 458 | 0 | base_it = events_it; | 459 | 0 | ++action_it; | 460 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 461 | 0 | break; | 462 | 0 | } else if (action_it->type == PatternActionType::TimeGreater) { | 463 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) > action_it->extra) { | 464 | 0 | back_stack.emplace(action_it, events_it, base_it); | 465 | 0 | base_it = events_it; | 466 | 0 | ++action_it; | 467 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 468 | 0 | break; | 469 | 0 | } else if (action_it->type == PatternActionType::TimeEqual) { | 470 | 0 | if (events_it->first.datetime_diff_in_seconds(base_it->first) == action_it->extra) { | 471 | 0 | back_stack.emplace(action_it, events_it, base_it); | 472 | 0 | base_it = events_it; | 473 | 0 | ++action_it; | 474 | 0 | } else if (++events_it == events_end && !do_backtrack()) | 475 | 0 | break; | 476 | 0 | } else { | 477 | 0 | LOG(WARNING) << "Unknown PatternActionType"; | 478 | 0 | return false; | 479 | 0 | } | 480 | | | 481 | 21 | if (++i > sequence_match_max_iterations) { | 482 | 0 | LOG(WARNING) | 483 | 0 | << "Pattern application proves too difficult, exceeding max iterations (" + | 484 | 0 | std::to_string(sequence_match_max_iterations) + ")"; | 485 | 0 | return false; | 486 | 0 | } | 487 | 21 | } | 488 | | | 489 | | /// if there are some actions remaining | 490 | 7 | if (action_it != action_end) { | 491 | | /// match multiple empty strings at end | 492 | 0 | while (action_it->type == PatternActionType::KleeneStar || | 493 | 0 | action_it->type == PatternActionType::TimeLessOrEqual || | 494 | 0 | action_it->type == PatternActionType::TimeLess || | 495 | 0 | (action_it->type == PatternActionType::TimeGreaterOrEqual && | 496 | 0 | action_it->extra == 0)) | 497 | 0 | ++action_it; | 498 | 0 | } | 499 | | | 500 | 7 | if (events_it == events_begin) ++events_it; | 501 | | | 502 | 7 | return action_it == action_end; | 503 | 7 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbRT_SL_ Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE18backtracking_matchIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbRT_SJ_ |
504 | | |
505 | | /// Splits the pattern into deterministic parts separated by non-deterministic fragments |
506 | | /// (time constraints and Kleene stars), and tries to match the deterministic parts in their specified order, |
507 | | /// ignoring the non-deterministic fragments. |
508 | | /// This function can quickly check that a full match is not possible if some deterministic fragment is missing. |
509 | | template <typename EventEntry> |
510 | | bool could_match_deterministic_parts(const EventEntry events_begin, const EventEntry events_end, |
511 | 6 | bool limit_iterations = true) const { |
512 | 6 | size_t events_processed = 0; |
513 | 6 | auto events_it = events_begin; |
514 | | |
515 | 6 | const auto actions_end = std::end(actions); |
516 | 6 | auto actions_it = std::begin(actions); |
517 | 6 | auto det_part_begin = actions_it; |
518 | | |
519 | 6 | auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, |
520 | 14 | actions_it, limit_iterations]() { |
521 | 14 | auto events_it_init = events_it; |
522 | 14 | auto det_part_it = det_part_begin; |
523 | | |
524 | 14 | while (det_part_it != actions_it && events_it != events_end) { |
525 | | /// matching any event |
526 | 0 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; |
527 | | |
528 | | /// matching specific event |
529 | 0 | else { |
530 | 0 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; |
531 | | |
532 | | /// abandon current matching, try to match the deterministic fragment further in the list |
533 | 0 | else { |
534 | 0 | events_it = ++events_it_init; |
535 | 0 | det_part_it = det_part_begin; |
536 | 0 | } |
537 | 0 | } |
538 | |
|
539 | 0 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { |
540 | 0 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " |
541 | 0 | "iterations are " + |
542 | 0 | std::to_string(sequence_match_max_iterations); |
543 | 0 | return false; |
544 | 0 | } |
545 | 0 | } |
546 | | |
547 | 14 | return det_part_it == actions_it; |
548 | 14 | }; _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv Line | Count | Source | 520 | 6 | actions_it, limit_iterations]() { | 521 | 6 | auto events_it_init = events_it; | 522 | 6 | auto det_part_it = det_part_begin; | 523 | | | 524 | 6 | while (det_part_it != actions_it && events_it != events_end) { | 525 | | /// matching any event | 526 | 0 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 527 | | | 528 | | /// matching specific event | 529 | 0 | else { | 530 | 0 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 531 | | | 532 | | /// abandon current matching, try to match the deterministic fragment further in the list | 533 | 0 | else { | 534 | 0 | events_it = ++events_it_init; | 535 | 0 | det_part_it = det_part_begin; | 536 | 0 | } | 537 | 0 | } | 538 | |
| 539 | 0 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 540 | 0 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 541 | 0 | "iterations are " + | 542 | 0 | std::to_string(sequence_match_max_iterations); | 543 | 0 | return false; | 544 | 0 | } | 545 | 0 | } | 546 | | | 547 | 6 | return det_part_it == actions_it; | 548 | 6 | }; |
Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbT_SJ_bENKUlvE_clEv _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv Line | Count | Source | 520 | 8 | actions_it, limit_iterations]() { | 521 | 8 | auto events_it_init = events_it; | 522 | 8 | auto det_part_it = det_part_begin; | 523 | | | 524 | 8 | while (det_part_it != actions_it && events_it != events_end) { | 525 | | /// matching any event | 526 | 0 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 527 | | | 528 | | /// matching specific event | 529 | 0 | else { | 530 | 0 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 531 | | | 532 | | /// abandon current matching, try to match the deterministic fragment further in the list | 533 | 0 | else { | 534 | 0 | events_it = ++events_it_init; | 535 | 0 | det_part_it = det_part_begin; | 536 | 0 | } | 537 | 0 | } | 538 | |
| 539 | 0 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 540 | 0 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 541 | 0 | "iterations are " + | 542 | 0 | std::to_string(sequence_match_max_iterations); | 543 | 0 | return false; | 544 | 0 | } | 545 | 0 | } | 546 | | | 547 | 8 | return det_part_it == actions_it; | 548 | 8 | }; |
Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_bENKUlvE_clEv Unexecuted instantiation: _ZZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbT_SJ_bENKUlvE_clEv |
549 | | |
550 | 26 | for (; actions_it != actions_end; ++actions_it) |
551 | 20 | if (actions_it->type != PatternActionType::SpecificEvent && |
552 | 20 | actions_it->type != PatternActionType::AnyEvent) { |
553 | 8 | if (!match_deterministic_part()) return false; |
554 | 8 | det_part_begin = std::next(actions_it); |
555 | 8 | } |
556 | | |
557 | 6 | return match_deterministic_part(); |
558 | 6 | } _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b Line | Count | Source | 511 | 2 | bool limit_iterations = true) const { | 512 | 2 | size_t events_processed = 0; | 513 | 2 | auto events_it = events_begin; | 514 | | | 515 | 2 | const auto actions_end = std::end(actions); | 516 | 2 | auto actions_it = std::begin(actions); | 517 | 2 | auto det_part_begin = actions_it; | 518 | | | 519 | 2 | auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, | 520 | 2 | actions_it, limit_iterations]() { | 521 | 2 | auto events_it_init = events_it; | 522 | 2 | auto det_part_it = det_part_begin; | 523 | | | 524 | 2 | while (det_part_it != actions_it && events_it != events_end) { | 525 | | /// matching any event | 526 | 2 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 527 | | | 528 | | /// matching specific event | 529 | 2 | else { | 530 | 2 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 531 | | | 532 | | /// abandon current matching, try to match the deterministic fragment further in the list | 533 | 2 | else { | 534 | 2 | events_it = ++events_it_init; | 535 | 2 | det_part_it = det_part_begin; | 536 | 2 | } | 537 | 2 | } | 538 | | | 539 | 2 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 540 | 2 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 541 | 2 | "iterations are " + | 542 | 2 | std::to_string(sequence_match_max_iterations); | 543 | 2 | return false; | 544 | 2 | } | 545 | 2 | } | 546 | | | 547 | 2 | return det_part_it == actions_it; | 548 | 2 | }; | 549 | | | 550 | 10 | for (; actions_it != actions_end; ++actions_it) | 551 | 8 | if (actions_it->type != PatternActionType::SpecificEvent && | 552 | 8 | actions_it->type != PatternActionType::AnyEvent) { | 553 | 4 | if (!match_deterministic_part()) return false; | 554 | 4 | det_part_begin = std::next(actions_it); | 555 | 4 | } | 556 | | | 557 | 2 | return match_deterministic_part(); | 558 | 2 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbT_SJ_b _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_19DateTimeV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b Line | Count | Source | 511 | 4 | bool limit_iterations = true) const { | 512 | 4 | size_t events_processed = 0; | 513 | 4 | auto events_it = events_begin; | 514 | | | 515 | 4 | const auto actions_end = std::end(actions); | 516 | 4 | auto actions_it = std::begin(actions); | 517 | 4 | auto det_part_begin = actions_it; | 518 | | | 519 | 4 | auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, | 520 | 4 | actions_it, limit_iterations]() { | 521 | 4 | auto events_it_init = events_it; | 522 | 4 | auto det_part_it = det_part_begin; | 523 | | | 524 | 4 | while (det_part_it != actions_it && events_it != events_end) { | 525 | | /// matching any event | 526 | 4 | if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; | 527 | | | 528 | | /// matching specific event | 529 | 4 | else { | 530 | 4 | if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; | 531 | | | 532 | | /// abandon current matching, try to match the deterministic fragment further in the list | 533 | 4 | else { | 534 | 4 | events_it = ++events_it_init; | 535 | 4 | det_part_it = det_part_begin; | 536 | 4 | } | 537 | 4 | } | 538 | | | 539 | 4 | if (limit_iterations && ++events_processed > sequence_match_max_iterations) { | 540 | 4 | LOG(WARNING) << "Pattern application proves too difficult, exceeding max " | 541 | 4 | "iterations are " + | 542 | 4 | std::to_string(sequence_match_max_iterations); | 543 | 4 | return false; | 544 | 4 | } | 545 | 4 | } | 546 | | | 547 | 4 | return det_part_it == actions_it; | 548 | 4 | }; | 549 | | | 550 | 16 | for (; actions_it != actions_end; ++actions_it) | 551 | 12 | if (actions_it->type != PatternActionType::SpecificEvent && | 552 | 12 | actions_it->type != PatternActionType::AnyEvent) { | 553 | 4 | if (!match_deterministic_part()) return false; | 554 | 4 | det_part_begin = std::next(actions_it); | 555 | 4 | } | 556 | | | 557 | 4 | return match_deterministic_part(); | 558 | 4 | } |
Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_11DateV2ValueINS_15DateV2ValueTypeEEESt6bitsetILm32EEESt6vectorISE_SaISE_EEEEEEbT_SL_b Unexecuted instantiation: _ZNK5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE31could_match_deterministic_partsIN9__gnu_cxx17__normal_iteratorIPKSt4pairINS_16TimestampTzValueESt6bitsetILm32EEESt6vectorISC_SaISC_EEEEEEbT_SJ_b |
559 | | |
560 | | private: |
561 | | enum class DFATransition : char { |
562 | | /// .-------. |
563 | | /// | | |
564 | | /// `-------' |
565 | | None, |
566 | | /// .-------. (?[0-9]) |
567 | | /// | | ---------- |
568 | | /// `-------' |
569 | | SpecificEvent, |
570 | | /// .-------. . |
571 | | /// | | ---------- |
572 | | /// `-------' |
573 | | AnyEvent, |
574 | | }; |
575 | | |
576 | | struct DFAState { |
577 | | explicit DFAState(bool has_kleene_ = false) |
578 | 44 | : has_kleene {has_kleene_}, event {0}, transition {DFATransition::None} {}_ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE8DFAStateC2Eb Line | Count | Source | 578 | 26 | : has_kleene {has_kleene_}, event {0}, transition {DFATransition::None} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE8DFAStateC2Eb Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE8DFAStateC2Eb _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE8DFAStateC2Eb Line | Count | Source | 578 | 18 | : has_kleene {has_kleene_}, event {0}, transition {DFATransition::None} {} |
Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE8DFAStateC2Eb Unexecuted instantiation: _ZN5doris34AggregateFunctionSequenceMatchDataILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE8DFAStateC2Eb |
579 | | |
580 | | /// .-------. |
581 | | /// | | - - - |
582 | | /// `-------' |
583 | | /// |_^ |
584 | | bool has_kleene; |
585 | | /// In the case of a state transitions with a `SpecificEvent`, |
586 | | /// `event` contains the value of the event. |
587 | | uint32_t event; |
588 | | /// The kind of transition out of this state. |
589 | | DFATransition transition; |
590 | | }; |
591 | | |
592 | | using DFAStates = std::vector<DFAState>; |
593 | | |
594 | | public: |
595 | | bool sorted = true; |
596 | | std::vector<TimestampEvents> events_list; |
597 | | // sequenceMatch conditions met at least once in events_list |
598 | | std::bitset<MAX_EVENTS> conditions_met; |
599 | | // sequenceMatch conditions met at least once in the pattern |
600 | | std::bitset<MAX_EVENTS> conditions_in_pattern; |
601 | | // `True` if the parsed pattern contains time assertions (?t...), `false` otherwise. |
602 | | bool pattern_has_time; |
603 | | |
604 | | private: |
605 | | std::string pattern; |
606 | | size_t arg_count; |
607 | | bool init_flag = false; |
608 | | |
609 | | PatternActions actions; |
610 | | DFAStates dfa_states; |
611 | | }; |
612 | | |
613 | | template <PrimitiveType T, typename Derived> |
614 | | class AggregateFunctionSequenceBase |
615 | | : public IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, |
616 | | Derived> { |
617 | | public: |
618 | | AggregateFunctionSequenceBase(const DataTypes& arguments) |
619 | 25 | : IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, Derived>( |
620 | 25 | arguments) { |
621 | 25 | arg_count = arguments.size(); |
622 | 25 | } _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 619 | 12 | : IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, Derived>( | 620 | 12 | arguments) { | 621 | 12 | arg_count = arguments.size(); | 622 | 12 | } |
Unexecuted instantiation: _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Unexecuted instantiation: _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 619 | 13 | : IAggregateFunctionDataHelper<AggregateFunctionSequenceMatchData<T, Derived>, Derived>( | 620 | 13 | arguments) { | 621 | 13 | arg_count = arguments.size(); | 622 | 13 | } |
Unexecuted instantiation: _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Unexecuted instantiation: _ZN5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEEC2ERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE |
623 | | |
624 | 0 | void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); }Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5resetEPc Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE5resetEPc |
625 | | |
626 | | void add(AggregateDataPtr __restrict place, const IColumn** columns, const ssize_t row_num, |
627 | 25 | Arena&) const override { |
628 | 25 | std::string pattern = |
629 | 25 | assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(columns[0]) |
630 | 25 | ->get_data_at(0) |
631 | 25 | .to_string(); |
632 | 25 | this->data(place).init(pattern, arg_count); |
633 | | |
634 | 25 | const auto& timestamp = assert_cast<const typename PrimitiveTypeTraits<T>::ColumnType&, |
635 | 25 | TypeCheckOnRelease::DISABLE>(*columns[1]) |
636 | 25 | .get_data()[row_num]; |
637 | 25 | typename AggregateFunctionSequenceMatchData<T, Derived>::Events events; |
638 | | |
639 | 146 | for (auto i = 2; i < arg_count; i++) { |
640 | 121 | const auto event = |
641 | 121 | assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(columns[i]) |
642 | 121 | ->get_data()[row_num]; |
643 | 121 | events.set(i - 2, event); |
644 | 121 | } |
645 | | |
646 | 25 | this->data(place).add(timestamp, events); |
647 | 25 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE Line | Count | Source | 627 | 15 | Arena&) const override { | 628 | 15 | std::string pattern = | 629 | 15 | assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(columns[0]) | 630 | 15 | ->get_data_at(0) | 631 | 15 | .to_string(); | 632 | 15 | this->data(place).init(pattern, arg_count); | 633 | | | 634 | 15 | const auto& timestamp = assert_cast<const typename PrimitiveTypeTraits<T>::ColumnType&, | 635 | 15 | TypeCheckOnRelease::DISABLE>(*columns[1]) | 636 | 15 | .get_data()[row_num]; | 637 | 15 | typename AggregateFunctionSequenceMatchData<T, Derived>::Events events; | 638 | | | 639 | 85 | for (auto i = 2; i < arg_count; i++) { | 640 | 70 | const auto event = | 641 | 70 | assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(columns[i]) | 642 | 70 | ->get_data()[row_num]; | 643 | 70 | events.set(i - 2, event); | 644 | 70 | } | 645 | | | 646 | 15 | this->data(place).add(timestamp, events); | 647 | 15 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE Line | Count | Source | 627 | 10 | Arena&) const override { | 628 | 10 | std::string pattern = | 629 | 10 | assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(columns[0]) | 630 | 10 | ->get_data_at(0) | 631 | 10 | .to_string(); | 632 | 10 | this->data(place).init(pattern, arg_count); | 633 | | | 634 | 10 | const auto& timestamp = assert_cast<const typename PrimitiveTypeTraits<T>::ColumnType&, | 635 | 10 | TypeCheckOnRelease::DISABLE>(*columns[1]) | 636 | 10 | .get_data()[row_num]; | 637 | 10 | typename AggregateFunctionSequenceMatchData<T, Derived>::Events events; | 638 | | | 639 | 61 | for (auto i = 2; i < arg_count; i++) { | 640 | 51 | const auto event = | 641 | 51 | assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(columns[i]) | 642 | 51 | ->get_data()[row_num]; | 643 | 51 | events.set(i - 2, event); | 644 | 51 | } | 645 | | | 646 | 10 | this->data(place).add(timestamp, events); | 647 | 10 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE3addEPcPPKNS_7IColumnElRNS_5ArenaE |
648 | | |
649 | | void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, |
650 | 4 | Arena&) const override { |
651 | 4 | const std::string pattern = this->data(rhs).get_pattern(); |
652 | 4 | this->data(place).init(pattern, this->data(rhs).get_arg_count()); |
653 | 4 | this->data(place).merge(this->data(rhs)); |
654 | 4 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE5mergeEPcPKcRNS_5ArenaE Line | Count | Source | 650 | 2 | Arena&) const override { | 651 | 2 | const std::string pattern = this->data(rhs).get_pattern(); | 652 | 2 | this->data(place).init(pattern, this->data(rhs).get_arg_count()); | 653 | 2 | this->data(place).merge(this->data(rhs)); | 654 | 2 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE5mergeEPcPKcRNS_5ArenaE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE5mergeEPcPKcRNS_5ArenaE Line | Count | Source | 650 | 2 | Arena&) const override { | 651 | 2 | const std::string pattern = this->data(rhs).get_pattern(); | 652 | 2 | this->data(place).init(pattern, this->data(rhs).get_arg_count()); | 653 | 2 | this->data(place).merge(this->data(rhs)); | 654 | 2 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE5mergeEPcPKcRNS_5ArenaE |
655 | | |
656 | 7 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { |
657 | 7 | this->data(place).write(buf); |
658 | 7 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE9serializeEPKcRNS_14BufferWritableE Line | Count | Source | 656 | 4 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { | 657 | 4 | this->data(place).write(buf); | 658 | 4 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE9serializeEPKcRNS_14BufferWritableE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE9serializeEPKcRNS_14BufferWritableE Line | Count | Source | 656 | 3 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { | 657 | 3 | this->data(place).write(buf); | 658 | 3 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE9serializeEPKcRNS_14BufferWritableE |
659 | | |
660 | | void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, |
661 | 7 | Arena&) const override { |
662 | 7 | this->data(place).read(buf); |
663 | 7 | const std::string pattern = this->data(place).get_pattern(); |
664 | 7 | this->data(place).init(pattern, this->data(place).get_arg_count()); |
665 | 7 | } _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceMatchILS1_26EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Line | Count | Source | 661 | 4 | Arena&) const override { | 662 | 4 | this->data(place).read(buf); | 663 | 4 | const std::string pattern = this->data(place).get_pattern(); | 664 | 4 | this->data(place).init(pattern, this->data(place).get_arg_count()); | 665 | 4 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceMatchILS1_25EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceMatchILS1_42EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE26ENS_30AggregateFunctionSequenceCountILS1_26EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Line | Count | Source | 661 | 3 | Arena&) const override { | 662 | 3 | this->data(place).read(buf); | 663 | 3 | const std::string pattern = this->data(place).get_pattern(); | 664 | 3 | this->data(place).init(pattern, this->data(place).get_arg_count()); | 665 | 3 | } |
Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE25ENS_30AggregateFunctionSequenceCountILS1_25EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris29AggregateFunctionSequenceBaseILNS_13PrimitiveTypeE42ENS_30AggregateFunctionSequenceCountILS1_42EEEE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE |
666 | | |
667 | | private: |
668 | | size_t arg_count; |
669 | | }; |
670 | | |
671 | | template <PrimitiveType T> |
672 | | class AggregateFunctionSequenceMatch final |
673 | | : public AggregateFunctionSequenceBase<T, AggregateFunctionSequenceMatch<T>>, |
674 | | VarargsExpression, |
675 | | NullableAggregateFunction { |
676 | | public: |
677 | | AggregateFunctionSequenceMatch(const DataTypes& arguments, const String& pattern_) |
678 | | : AggregateFunctionSequenceBase<T, AggregateFunctionSequenceMatch<T>>(arguments, |
679 | | pattern_) {} |
680 | | |
681 | | using AggregateFunctionSequenceBase< |
682 | | T, AggregateFunctionSequenceMatch<T>>::AggregateFunctionSequenceBase; |
683 | | |
684 | 0 | String get_name() const override { return "sequence_match"; }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE26EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE25EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE42EE8get_nameB5cxx11Ev |
685 | | |
686 | 0 | DataTypePtr get_return_type() const override { return std::make_shared<DataTypeUInt8>(); }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE26EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE25EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE42EE15get_return_typeEv |
687 | | |
688 | 11 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
689 | 11 | auto& output = assert_cast<ColumnUInt8&>(to).get_data(); |
690 | 11 | if (!this->data(place).conditions_in_pattern.any()) { |
691 | 5 | output.push_back(false); |
692 | 5 | return; |
693 | 5 | } |
694 | | |
695 | 6 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != |
696 | 6 | this->data(place).conditions_in_pattern) { |
697 | 1 | output.push_back(false); |
698 | 1 | return; |
699 | 1 | } |
700 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. |
701 | 5 | this->data(const_cast<AggregateDataPtr>(place)).sort(); |
702 | | |
703 | 5 | const auto& data_ref = this->data(place); |
704 | | |
705 | 5 | const auto events_begin = std::begin(data_ref.events_list); |
706 | 5 | const auto events_end = std::end(data_ref.events_list); |
707 | 5 | auto events_it = events_begin; |
708 | | |
709 | 5 | bool match = (this->data(place).pattern_has_time |
710 | 5 | ? (this->data(place).could_match_deterministic_parts(events_begin, |
711 | 2 | events_end) && |
712 | 2 | this->data(place).backtracking_match(events_it, events_end)) |
713 | 5 | : this->data(place).dfa_match(events_it, events_end)); |
714 | 5 | output.push_back(match); |
715 | 5 | } _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE26EE18insert_result_intoEPKcRNS_7IColumnE Line | Count | Source | 688 | 11 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { | 689 | 11 | auto& output = assert_cast<ColumnUInt8&>(to).get_data(); | 690 | 11 | if (!this->data(place).conditions_in_pattern.any()) { | 691 | 5 | output.push_back(false); | 692 | 5 | return; | 693 | 5 | } | 694 | | | 695 | 6 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != | 696 | 6 | this->data(place).conditions_in_pattern) { | 697 | 1 | output.push_back(false); | 698 | 1 | return; | 699 | 1 | } | 700 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. | 701 | 5 | this->data(const_cast<AggregateDataPtr>(place)).sort(); | 702 | | | 703 | 5 | const auto& data_ref = this->data(place); | 704 | | | 705 | 5 | const auto events_begin = std::begin(data_ref.events_list); | 706 | 5 | const auto events_end = std::end(data_ref.events_list); | 707 | 5 | auto events_it = events_begin; | 708 | | | 709 | 5 | bool match = (this->data(place).pattern_has_time | 710 | 5 | ? (this->data(place).could_match_deterministic_parts(events_begin, | 711 | 2 | events_end) && | 712 | 2 | this->data(place).backtracking_match(events_it, events_end)) | 713 | 5 | : this->data(place).dfa_match(events_it, events_end)); | 714 | 5 | output.push_back(match); | 715 | 5 | } |
Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE25EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceMatchILNS_13PrimitiveTypeE42EE18insert_result_intoEPKcRNS_7IColumnE |
716 | | }; |
717 | | |
718 | | template <PrimitiveType T> |
719 | | class AggregateFunctionSequenceCount final |
720 | | : public AggregateFunctionSequenceBase<T, AggregateFunctionSequenceCount<T>>, |
721 | | VarargsExpression, |
722 | | NotNullableAggregateFunction { |
723 | | public: |
724 | | AggregateFunctionSequenceCount(const DataTypes& arguments, const String& pattern_) |
725 | | : AggregateFunctionSequenceBase<T, AggregateFunctionSequenceCount<T>>(arguments, |
726 | | pattern_) {} |
727 | | |
728 | | using AggregateFunctionSequenceBase< |
729 | | T, AggregateFunctionSequenceCount<T>>::AggregateFunctionSequenceBase; |
730 | | |
731 | 0 | String get_name() const override { return "sequence_count"; }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE42EE8get_nameB5cxx11Ev |
732 | | |
733 | 0 | DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE42EE15get_return_typeEv |
734 | | |
735 | 8 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
736 | 8 | auto& output = assert_cast<ColumnInt64&>(to).get_data(); |
737 | 8 | if (!this->data(place).conditions_in_pattern.any()) { |
738 | 4 | output.push_back(0); |
739 | 4 | return; |
740 | 4 | } |
741 | | |
742 | 4 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != |
743 | 4 | this->data(place).conditions_in_pattern) { |
744 | 0 | output.push_back(0); |
745 | 0 | return; |
746 | 0 | } |
747 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. |
748 | 4 | this->data(const_cast<AggregateDataPtr>(place)).sort(); |
749 | 4 | output.push_back(count(place)); |
750 | 4 | } _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE18insert_result_intoEPKcRNS_7IColumnE Line | Count | Source | 735 | 8 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { | 736 | 8 | auto& output = assert_cast<ColumnInt64&>(to).get_data(); | 737 | 8 | if (!this->data(place).conditions_in_pattern.any()) { | 738 | 4 | output.push_back(0); | 739 | 4 | return; | 740 | 4 | } | 741 | | | 742 | 4 | if ((this->data(place).conditions_in_pattern & this->data(place).conditions_met) != | 743 | 4 | this->data(place).conditions_in_pattern) { | 744 | 0 | output.push_back(0); | 745 | 0 | return; | 746 | 0 | } | 747 | | // place is essentially an AggregateDataPtr, passed as a ConstAggregateDataPtr. | 748 | 4 | this->data(const_cast<AggregateDataPtr>(place)).sort(); | 749 | 4 | output.push_back(count(place)); | 750 | 4 | } |
Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE42EE18insert_result_intoEPKcRNS_7IColumnE |
751 | | |
752 | | private: |
753 | 4 | UInt64 count(ConstAggregateDataPtr __restrict place) const { |
754 | 4 | const auto& data_ref = this->data(place); |
755 | | |
756 | 4 | const auto events_begin = std::begin(data_ref.events_list); |
757 | 4 | const auto events_end = std::end(data_ref.events_list); |
758 | 4 | auto events_it = events_begin; |
759 | | |
760 | 4 | size_t count = 0; |
761 | | // check if there is a chance of matching the sequence at least once |
762 | 4 | if (this->data(place).could_match_deterministic_parts(events_begin, events_end)) { |
763 | 11 | while (events_it != events_end && |
764 | 11 | this->data(place).backtracking_match(events_it, events_end)) |
765 | 7 | ++count; |
766 | 4 | } |
767 | | |
768 | 4 | return count; |
769 | 4 | } _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE26EE5countEPKc Line | Count | Source | 753 | 4 | UInt64 count(ConstAggregateDataPtr __restrict place) const { | 754 | 4 | const auto& data_ref = this->data(place); | 755 | | | 756 | 4 | const auto events_begin = std::begin(data_ref.events_list); | 757 | 4 | const auto events_end = std::end(data_ref.events_list); | 758 | 4 | auto events_it = events_begin; | 759 | | | 760 | 4 | size_t count = 0; | 761 | | // check if there is a chance of matching the sequence at least once | 762 | 4 | if (this->data(place).could_match_deterministic_parts(events_begin, events_end)) { | 763 | 11 | while (events_it != events_end && | 764 | 11 | this->data(place).backtracking_match(events_it, events_end)) | 765 | 7 | ++count; | 766 | 4 | } | 767 | | | 768 | 4 | return count; | 769 | 4 | } |
Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE25EE5countEPKc Unexecuted instantiation: _ZNK5doris30AggregateFunctionSequenceCountILNS_13PrimitiveTypeE42EE5countEPKc |
770 | | }; |
771 | | |
772 | | } // namespace doris |