Coverage Report

Created: 2026-05-09 08:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/join_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
#include <variant>
22
23
#include "exec/common/hash_table/hash_crc32_return32.h"
24
#include "exec/common/hash_table/hash_key_type.h"
25
#include "exec/common/hash_table/hash_map_context.h"
26
#include "exec/common/hash_table/join_hash_table.h"
27
28
namespace doris {
29
30
// Devirtualize compare_at for ASOF JOIN supported column types.
31
// ASOF JOIN only supports DateV2, DateTimeV2, and TimestampTZ.
32
// Dispatches to the concrete ColumnVector<T> once so that all compare_at
33
// calls inside `func` are direct (non-virtual) calls.
34
// `func` receives a single argument: a const pointer to the concrete column
35
// (or const IColumn* as fallback for unexpected types).
36
template <typename Func>
37
263
decltype(auto) asof_column_dispatch(const IColumn* col, Func&& func) {
38
263
    if (const auto* c_dv2 = check_and_get_column<ColumnDateV2>(col)) {
39
2
        return std::forward<Func>(func)(c_dv2);
40
261
    } else if (const auto* c_dtv2 = check_and_get_column<ColumnDateTimeV2>(col)) {
41
259
        return std::forward<Func>(func)(c_dtv2);
42
259
    } else if (const auto* c_tstz = check_and_get_column<ColumnTimeStampTz>(col)) {
43
3
        return std::forward<Func>(func)(c_tstz);
44
18.4E
    } else {
45
18.4E
        return std::forward<Func>(func)(col);
46
18.4E
    }
47
263
}
48
using JoinOpVariants =
49
        std::variant<std::integral_constant<TJoinOp::type, TJoinOp::INNER_JOIN>,
50
                     std::integral_constant<TJoinOp::type, TJoinOp::LEFT_SEMI_JOIN>,
51
                     std::integral_constant<TJoinOp::type, TJoinOp::LEFT_ANTI_JOIN>,
52
                     std::integral_constant<TJoinOp::type, TJoinOp::LEFT_OUTER_JOIN>,
53
                     std::integral_constant<TJoinOp::type, TJoinOp::FULL_OUTER_JOIN>,
54
                     std::integral_constant<TJoinOp::type, TJoinOp::RIGHT_OUTER_JOIN>,
55
                     std::integral_constant<TJoinOp::type, TJoinOp::CROSS_JOIN>,
56
                     std::integral_constant<TJoinOp::type, TJoinOp::RIGHT_SEMI_JOIN>,
57
                     std::integral_constant<TJoinOp::type, TJoinOp::RIGHT_ANTI_JOIN>,
58
                     std::integral_constant<TJoinOp::type, TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN>,
59
                     std::integral_constant<TJoinOp::type, TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN>,
60
                     std::integral_constant<TJoinOp::type, TJoinOp::ASOF_LEFT_INNER_JOIN>,
61
                     std::integral_constant<TJoinOp::type, TJoinOp::ASOF_LEFT_OUTER_JOIN>>;
62
63
606k
inline bool is_asof_join(TJoinOp::type join_op) {
64
606k
    return join_op == TJoinOp::ASOF_LEFT_INNER_JOIN || join_op == TJoinOp::ASOF_LEFT_OUTER_JOIN;
65
606k
}
66
67
template <int JoinOpType>
68
inline constexpr bool is_asof_join_op_v =
69
        JoinOpType == TJoinOp::ASOF_LEFT_INNER_JOIN || JoinOpType == TJoinOp::ASOF_LEFT_OUTER_JOIN;
70
71
template <int JoinOpType>
72
inline constexpr bool is_asof_outer_join_op_v = JoinOpType == TJoinOp::ASOF_LEFT_OUTER_JOIN;
73
74
template <class T>
75
using PrimaryTypeHashTableContext = MethodOneNumber<T, JoinHashMap<T, HashCRC32Return32<T>, false>>;
76
77
template <class T>
78
using DirectPrimaryTypeHashTableContext =
79
        MethodOneNumberDirect<T, JoinHashMap<T, HashCRC32Return32<T>, true>>;
80
81
template <class Key>
82
using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, HashCRC32Return32<Key>, false>>;
83
84
using SerializedHashTableContext =
85
        MethodSerialized<JoinHashMap<StringRef, HashCRC32Return32<StringRef>, false>>;
86
using MethodOneString =
87
        MethodStringNoCache<JoinHashMap<StringRef, HashCRC32Return32<StringRef>, false>>;
88
89
using HashTableVariants = std::variant<
90
        std::monostate, SerializedHashTableContext, PrimaryTypeHashTableContext<UInt8>,
91
        PrimaryTypeHashTableContext<UInt16>, PrimaryTypeHashTableContext<UInt32>,
92
        PrimaryTypeHashTableContext<UInt64>, PrimaryTypeHashTableContext<UInt128>,
93
        PrimaryTypeHashTableContext<UInt256>, DirectPrimaryTypeHashTableContext<UInt8>,
94
        DirectPrimaryTypeHashTableContext<UInt16>, DirectPrimaryTypeHashTableContext<UInt32>,
95
        DirectPrimaryTypeHashTableContext<UInt64>, DirectPrimaryTypeHashTableContext<UInt128>,
96
        FixedKeyHashTableContext<UInt64>, FixedKeyHashTableContext<UInt72>,
97
        FixedKeyHashTableContext<UInt96>, FixedKeyHashTableContext<UInt104>,
98
        FixedKeyHashTableContext<UInt128>, FixedKeyHashTableContext<UInt136>,
99
        FixedKeyHashTableContext<UInt256>, MethodOneString>;
100
101
struct JoinDataVariants {
102
    HashTableVariants method_variant;
103
104
132k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
105
132k
        switch (type) {
106
14.7k
        case HashKeyType::serialized:
107
14.7k
            method_variant.emplace<SerializedHashTableContext>();
108
14.7k
            break;
109
4.58k
        case HashKeyType::int8_key:
110
4.58k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt8>>();
111
4.58k
            break;
112
2.89k
        case HashKeyType::int16_key:
113
2.89k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt16>>();
114
2.89k
            break;
115
23.8k
        case HashKeyType::int32_key:
116
23.8k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt32>>();
117
23.8k
            break;
118
45.1k
        case HashKeyType::int64_key:
119
45.1k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt64>>();
120
45.1k
            break;
121
769
        case HashKeyType::int128_key:
122
769
            method_variant.emplace<PrimaryTypeHashTableContext<UInt128>>();
123
769
            break;
124
38
        case HashKeyType::int256_key:
125
38
            method_variant.emplace<PrimaryTypeHashTableContext<UInt256>>();
126
38
            break;
127
2.56k
        case HashKeyType::string_key:
128
2.56k
            method_variant.emplace<MethodOneString>();
129
2.56k
            break;
130
8.39k
        case HashKeyType::fixed64:
131
8.39k
            method_variant.emplace<FixedKeyHashTableContext<UInt64>>(get_key_sizes(data_types));
132
8.39k
            break;
133
3.35k
        case HashKeyType::fixed72:
134
3.35k
            method_variant.emplace<FixedKeyHashTableContext<UInt72>>(get_key_sizes(data_types));
135
3.35k
            break;
136
8.30k
        case HashKeyType::fixed96:
137
8.30k
            method_variant.emplace<FixedKeyHashTableContext<UInt96>>(get_key_sizes(data_types));
138
8.30k
            break;
139
114
        case HashKeyType::fixed104:
140
114
            method_variant.emplace<FixedKeyHashTableContext<UInt104>>(get_key_sizes(data_types));
141
114
            break;
142
8.20k
        case HashKeyType::fixed128:
143
8.20k
            method_variant.emplace<FixedKeyHashTableContext<UInt128>>(get_key_sizes(data_types));
144
8.20k
            break;
145
960
        case HashKeyType::fixed136:
146
960
            method_variant.emplace<FixedKeyHashTableContext<UInt136>>(get_key_sizes(data_types));
147
960
            break;
148
8.02k
        case HashKeyType::fixed256:
149
8.02k
            method_variant.emplace<FixedKeyHashTableContext<UInt256>>(get_key_sizes(data_types));
150
8.02k
            break;
151
0
        default:
152
0
            throw Exception(ErrorCode::INTERNAL_ERROR,
153
0
                            "JoinDataVariants meet invalid key type, type={}", type);
154
132k
        }
155
132k
    }
156
};
157
158
template <typename Method>
159
void primary_to_direct_mapping(Method* context, const ColumnRawPtrs& key_columns,
160
61.4k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
161
61.4k
    using FieldType = typename Method::Base::Key;
162
61.4k
    FieldType max_key = std::numeric_limits<FieldType>::min();
163
61.4k
    FieldType min_key = std::numeric_limits<FieldType>::max();
164
165
61.4k
    size_t num_rows = key_columns[0]->size();
166
61.4k
    if (key_columns[0]->is_nullable()) {
167
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
168
0
                                              ->get_nested_column_ptr()
169
0
                                              ->get_raw_data()
170
0
                                              .data;
171
0
        const NullMap& null_map =
172
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
173
        // skip first mocked row
174
0
        for (size_t i = 1; i < num_rows; i++) {
175
0
            if (null_map[i]) {
176
0
                continue;
177
0
            }
178
0
            max_key = std::max(max_key, input_keys[i]);
179
0
            min_key = std::min(min_key, input_keys[i]);
180
0
        }
181
61.4k
    } else {
182
61.4k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
183
        // skip first mocked row
184
25.5M
        for (size_t i = 1; i < num_rows; i++) {
185
25.4M
            max_key = std::max(max_key, input_keys[i]);
186
25.4M
            min_key = std::min(min_key, input_keys[i]);
187
25.4M
        }
188
61.4k
    }
189
190
61.4k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
191
61.4k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
192
61.4k
    if (allow_direct_mapping) {
193
35.3k
        for (const auto& variant_ptr : variant_ptrs) {
194
35.3k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
195
35.3k
                    max_key, min_key);
196
35.3k
        }
197
21.3k
    }
198
61.4k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberIhNS_13JoinHashTableIhNS_17HashCRC32Return32IhEELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
160
2.70k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
161
2.70k
    using FieldType = typename Method::Base::Key;
162
2.70k
    FieldType max_key = std::numeric_limits<FieldType>::min();
163
2.70k
    FieldType min_key = std::numeric_limits<FieldType>::max();
164
165
2.70k
    size_t num_rows = key_columns[0]->size();
166
2.70k
    if (key_columns[0]->is_nullable()) {
167
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
168
0
                                              ->get_nested_column_ptr()
169
0
                                              ->get_raw_data()
170
0
                                              .data;
171
0
        const NullMap& null_map =
172
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
173
        // skip first mocked row
174
0
        for (size_t i = 1; i < num_rows; i++) {
175
0
            if (null_map[i]) {
176
0
                continue;
177
0
            }
178
0
            max_key = std::max(max_key, input_keys[i]);
179
0
            min_key = std::min(min_key, input_keys[i]);
180
0
        }
181
2.70k
    } else {
182
2.70k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
183
        // skip first mocked row
184
6.43k
        for (size_t i = 1; i < num_rows; i++) {
185
3.72k
            max_key = std::max(max_key, input_keys[i]);
186
3.72k
            min_key = std::min(min_key, input_keys[i]);
187
3.72k
        }
188
2.70k
    }
189
190
2.70k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
191
2.70k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
192
2.70k
    if (allow_direct_mapping) {
193
2.77k
        for (const auto& variant_ptr : variant_ptrs) {
194
2.77k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
195
2.77k
                    max_key, min_key);
196
2.77k
        }
197
988
    }
198
2.70k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberItNS_13JoinHashTableItNS_17HashCRC32Return32ItEELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
160
1.30k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
161
1.30k
    using FieldType = typename Method::Base::Key;
162
1.30k
    FieldType max_key = std::numeric_limits<FieldType>::min();
163
1.30k
    FieldType min_key = std::numeric_limits<FieldType>::max();
164
165
1.30k
    size_t num_rows = key_columns[0]->size();
166
1.30k
    if (key_columns[0]->is_nullable()) {
167
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
168
0
                                              ->get_nested_column_ptr()
169
0
                                              ->get_raw_data()
170
0
                                              .data;
171
0
        const NullMap& null_map =
172
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
173
        // skip first mocked row
174
0
        for (size_t i = 1; i < num_rows; i++) {
175
0
            if (null_map[i]) {
176
0
                continue;
177
0
            }
178
0
            max_key = std::max(max_key, input_keys[i]);
179
0
            min_key = std::min(min_key, input_keys[i]);
180
0
        }
181
1.30k
    } else {
182
1.30k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
183
        // skip first mocked row
184
6.93k
        for (size_t i = 1; i < num_rows; i++) {
185
5.63k
            max_key = std::max(max_key, input_keys[i]);
186
5.63k
            min_key = std::min(min_key, input_keys[i]);
187
5.63k
        }
188
1.30k
    }
189
190
1.30k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
191
1.30k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
192
1.30k
    if (allow_direct_mapping) {
193
2.28k
        for (const auto& variant_ptr : variant_ptrs) {
194
2.28k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
195
2.28k
                    max_key, min_key);
196
2.28k
        }
197
711
    }
198
1.30k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberIjNS_13JoinHashTableIjNS_17HashCRC32Return32IjEELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
160
15.3k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
161
15.3k
    using FieldType = typename Method::Base::Key;
162
15.3k
    FieldType max_key = std::numeric_limits<FieldType>::min();
163
15.3k
    FieldType min_key = std::numeric_limits<FieldType>::max();
164
165
15.3k
    size_t num_rows = key_columns[0]->size();
166
15.3k
    if (key_columns[0]->is_nullable()) {
167
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
168
0
                                              ->get_nested_column_ptr()
169
0
                                              ->get_raw_data()
170
0
                                              .data;
171
0
        const NullMap& null_map =
172
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
173
        // skip first mocked row
174
0
        for (size_t i = 1; i < num_rows; i++) {
175
0
            if (null_map[i]) {
176
0
                continue;
177
0
            }
178
0
            max_key = std::max(max_key, input_keys[i]);
179
0
            min_key = std::min(min_key, input_keys[i]);
180
0
        }
181
15.3k
    } else {
182
15.3k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
183
        // skip first mocked row
184
25.2M
        for (size_t i = 1; i < num_rows; i++) {
185
25.2M
            max_key = std::max(max_key, input_keys[i]);
186
25.2M
            min_key = std::min(min_key, input_keys[i]);
187
25.2M
        }
188
15.3k
    }
189
190
15.3k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
191
15.3k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
192
15.3k
    if (allow_direct_mapping) {
193
20.5k
        for (const auto& variant_ptr : variant_ptrs) {
194
20.5k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
195
20.5k
                    max_key, min_key);
196
20.5k
        }
197
12.5k
    }
198
15.3k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberImNS_13JoinHashTableImNS_17HashCRC32Return32ImEELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
160
41.6k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
161
41.6k
    using FieldType = typename Method::Base::Key;
162
41.6k
    FieldType max_key = std::numeric_limits<FieldType>::min();
163
41.6k
    FieldType min_key = std::numeric_limits<FieldType>::max();
164
165
41.6k
    size_t num_rows = key_columns[0]->size();
166
41.6k
    if (key_columns[0]->is_nullable()) {
167
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
168
0
                                              ->get_nested_column_ptr()
169
0
                                              ->get_raw_data()
170
0
                                              .data;
171
0
        const NullMap& null_map =
172
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
173
        // skip first mocked row
174
0
        for (size_t i = 1; i < num_rows; i++) {
175
0
            if (null_map[i]) {
176
0
                continue;
177
0
            }
178
0
            max_key = std::max(max_key, input_keys[i]);
179
0
            min_key = std::min(min_key, input_keys[i]);
180
0
        }
181
41.6k
    } else {
182
41.6k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
183
        // skip first mocked row
184
281k
        for (size_t i = 1; i < num_rows; i++) {
185
239k
            max_key = std::max(max_key, input_keys[i]);
186
239k
            min_key = std::min(min_key, input_keys[i]);
187
239k
        }
188
41.6k
    }
189
190
41.6k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
191
41.6k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
192
41.6k
    if (allow_direct_mapping) {
193
9.15k
        for (const auto& variant_ptr : variant_ptrs) {
194
9.15k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
195
9.15k
                    max_key, min_key);
196
9.15k
        }
197
6.76k
    }
198
41.6k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberIN4wide7integerILm128EjEENS_13JoinHashTableIS4_NS_17HashCRC32Return32IS4_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
160
516
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
161
516
    using FieldType = typename Method::Base::Key;
162
516
    FieldType max_key = std::numeric_limits<FieldType>::min();
163
516
    FieldType min_key = std::numeric_limits<FieldType>::max();
164
165
516
    size_t num_rows = key_columns[0]->size();
166
516
    if (key_columns[0]->is_nullable()) {
167
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
168
0
                                              ->get_nested_column_ptr()
169
0
                                              ->get_raw_data()
170
0
                                              .data;
171
0
        const NullMap& null_map =
172
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
173
        // skip first mocked row
174
0
        for (size_t i = 1; i < num_rows; i++) {
175
0
            if (null_map[i]) {
176
0
                continue;
177
0
            }
178
0
            max_key = std::max(max_key, input_keys[i]);
179
0
            min_key = std::min(min_key, input_keys[i]);
180
0
        }
181
516
    } else {
182
516
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
183
        // skip first mocked row
184
1.59k
        for (size_t i = 1; i < num_rows; i++) {
185
1.07k
            max_key = std::max(max_key, input_keys[i]);
186
1.07k
            min_key = std::min(min_key, input_keys[i]);
187
1.07k
        }
188
516
    }
189
190
516
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
191
516
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
192
516
    if (allow_direct_mapping) {
193
621
        for (const auto& variant_ptr : variant_ptrs) {
194
621
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
195
621
                    max_key, min_key);
196
621
        }
197
405
    }
198
516
}
199
200
template <typename Method>
201
void try_convert_to_direct_mapping(
202
        Method* method, const ColumnRawPtrs& key_columns,
203
51.2k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingISt9monostateEEvPT_RKSt6vectorIPKNS_7IColumnESaIS7_EERKS4_ISt10shared_ptrINS_16JoinDataVariantsEESaISE_EE
_ZN5doris29try_convert_to_direct_mappingINS_16MethodSerializedINS_13JoinHashTableINS_9StringRefENS_17HashCRC32Return32IS3_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
203
14.5k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodOneNumberIN4wide7integerILm256EjEENS_13JoinHashTableIS4_NS_17HashCRC32Return32IS4_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
203
30
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectIhNS_13JoinHashTableIhNS_17HashCRC32Return32IhEELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectItNS_13JoinHashTableItNS_17HashCRC32Return32ItEELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectIjNS_13JoinHashTableIjNS_17HashCRC32Return32IjEELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectImNS_13JoinHashTableImNS_17HashCRC32Return32ImEELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectIN4wide7integerILm128EjEENS_13JoinHashTableIS4_NS_17HashCRC32Return32IS4_EELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableImNS_17HashCRC32Return32ImEELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
203
6.84k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_6UInt72ENS_17HashCRC32Return32IS3_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
203
3.29k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_6UInt96ENS_17HashCRC32Return32IS3_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
203
8.05k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_7UInt104ENS_17HashCRC32Return32IS3_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
203
52
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableIN4wide7integerILm128EjEENS_17HashCRC32Return32IS5_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
203
8.13k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_7UInt136ENS_17HashCRC32Return32IS3_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
203
960
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableIN4wide7integerILm256EjEENS_17HashCRC32Return32IS5_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
203
7.96k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_19MethodStringNoCacheINS_13JoinHashTableINS_9StringRefENS_17HashCRC32Return32IS3_EELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
203
1.38k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
204
205
inline void try_convert_to_direct_mapping(
206
        PrimaryTypeHashTableContext<UInt8>* context, const ColumnRawPtrs& key_columns,
207
2.70k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
208
2.70k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
209
2.70k
}
210
211
inline void try_convert_to_direct_mapping(
212
        PrimaryTypeHashTableContext<UInt16>* context, const ColumnRawPtrs& key_columns,
213
1.30k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
214
1.30k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
215
1.30k
}
216
217
inline void try_convert_to_direct_mapping(
218
        PrimaryTypeHashTableContext<UInt32>* context, const ColumnRawPtrs& key_columns,
219
15.3k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
220
15.3k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
221
15.3k
}
222
223
inline void try_convert_to_direct_mapping(
224
        PrimaryTypeHashTableContext<UInt64>* context, const ColumnRawPtrs& key_columns,
225
41.6k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
226
41.6k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
227
41.6k
}
228
229
inline void try_convert_to_direct_mapping(
230
        PrimaryTypeHashTableContext<UInt128>* context, const ColumnRawPtrs& key_columns,
231
516
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
232
516
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
233
516
}
234
235
// ASOF JOIN index with inline values for cache-friendly branchless binary search.
236
// IntType is the integer representation of the ASOF column value:
237
//   uint32_t for DateV2, uint64_t for DateTimeV2 and TimestampTZ.
238
// Rows are sorted by asof_value during build, then materialized into SoA arrays
239
// so probe-side binary search only touches the ASOF values hot path.
240
template <typename IntType>
241
struct AsofIndexGroup {
242
    using int_type = IntType;
243
244
    struct Entry {
245
        IntType asof_value;
246
        uint32_t row_index; // 1-based, 0 = invalid/padding
247
    };
248
249
    std::vector<Entry> entries;
250
    std::vector<IntType> asof_values;
251
    std::vector<uint32_t> row_indexes;
252
253
4.65k
    void add_row(IntType value, uint32_t row_idx) { entries.push_back({value, row_idx}); }
_ZN5doris14AsofIndexGroupIjE7add_rowEjj
Line
Count
Source
253
1.06k
    void add_row(IntType value, uint32_t row_idx) { entries.push_back({value, row_idx}); }
_ZN5doris14AsofIndexGroupImE7add_rowEmj
Line
Count
Source
253
3.59k
    void add_row(IntType value, uint32_t row_idx) { entries.push_back({value, row_idx}); }
254
255
563
    void sort_and_finalize() {
256
563
        if (entries.empty()) {
257
4
            return;
258
4
        }
259
559
        if (entries.size() > 1) {
260
480
            pdqsort(entries.begin(), entries.end(),
261
16.6k
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
_ZZN5doris14AsofIndexGroupIjE17sort_and_finalizeEvENKUlRKNS1_5EntryES4_E_clES4_S4_
Line
Count
Source
261
2.06k
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
_ZZN5doris14AsofIndexGroupImE17sort_and_finalizeEvENKUlRKNS1_5EntryES4_E_clES4_S4_
Line
Count
Source
261
14.5k
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
262
480
        }
263
264
559
        asof_values.resize(entries.size());
265
559
        row_indexes.resize(entries.size());
266
5.21k
        for (size_t i = 0; i < entries.size(); ++i) {
267
4.65k
            asof_values[i] = entries[i].asof_value;
268
4.65k
            row_indexes[i] = entries[i].row_index;
269
4.65k
        }
270
271
559
        std::vector<Entry>().swap(entries);
272
559
    }
_ZN5doris14AsofIndexGroupIjE17sort_and_finalizeEv
Line
Count
Source
255
23
    void sort_and_finalize() {
256
23
        if (entries.empty()) {
257
4
            return;
258
4
        }
259
19
        if (entries.size() > 1) {
260
14
            pdqsort(entries.begin(), entries.end(),
261
14
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
262
14
        }
263
264
19
        asof_values.resize(entries.size());
265
19
        row_indexes.resize(entries.size());
266
1.07k
        for (size_t i = 0; i < entries.size(); ++i) {
267
1.06k
            asof_values[i] = entries[i].asof_value;
268
1.06k
            row_indexes[i] = entries[i].row_index;
269
1.06k
        }
270
271
19
        std::vector<Entry>().swap(entries);
272
19
    }
_ZN5doris14AsofIndexGroupImE17sort_and_finalizeEv
Line
Count
Source
255
540
    void sort_and_finalize() {
256
540
        if (entries.empty()) {
257
0
            return;
258
0
        }
259
540
        if (entries.size() > 1) {
260
466
            pdqsort(entries.begin(), entries.end(),
261
466
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
262
466
        }
263
264
540
        asof_values.resize(entries.size());
265
540
        row_indexes.resize(entries.size());
266
4.13k
        for (size_t i = 0; i < entries.size(); ++i) {
267
3.59k
            asof_values[i] = entries[i].asof_value;
268
3.59k
            row_indexes[i] = entries[i].row_index;
269
3.59k
        }
270
271
540
        std::vector<Entry>().swap(entries);
272
540
    }
273
274
817
    const IntType* values_data() const { return asof_values.data(); }
_ZNK5doris14AsofIndexGroupIjE11values_dataEv
Line
Count
Source
274
1
    const IntType* values_data() const { return asof_values.data(); }
_ZNK5doris14AsofIndexGroupImE11values_dataEv
Line
Count
Source
274
816
    const IntType* values_data() const { return asof_values.data(); }
275
276
    // Branchless lower_bound: first i where asof_values[i] >= target
277
454
    ALWAYS_INLINE size_t lower_bound(IntType target) const {
278
454
        size_t lo = 0, n = asof_values.size();
279
2.30k
        while (n > 1) {
280
1.85k
            size_t half = n / 2;
281
1.85k
            lo += half * (asof_values[lo + half] < target);
282
1.85k
            n -= half;
283
1.85k
        }
284
454
        if (lo < asof_values.size()) {
285
453
            lo += (asof_values[lo] < target);
286
453
        }
287
454
        return lo;
288
454
    }
_ZNK5doris14AsofIndexGroupIjE11lower_boundEj
Line
Count
Source
277
33
    ALWAYS_INLINE size_t lower_bound(IntType target) const {
278
33
        size_t lo = 0, n = asof_values.size();
279
163
        while (n > 1) {
280
130
            size_t half = n / 2;
281
130
            lo += half * (asof_values[lo + half] < target);
282
130
            n -= half;
283
130
        }
284
33
        if (lo < asof_values.size()) {
285
32
            lo += (asof_values[lo] < target);
286
32
        }
287
33
        return lo;
288
33
    }
_ZNK5doris14AsofIndexGroupImE11lower_boundEm
Line
Count
Source
277
421
    ALWAYS_INLINE size_t lower_bound(IntType target) const {
278
421
        size_t lo = 0, n = asof_values.size();
279
2.14k
        while (n > 1) {
280
1.72k
            size_t half = n / 2;
281
1.72k
            lo += half * (asof_values[lo + half] < target);
282
1.72k
            n -= half;
283
1.72k
        }
284
421
        if (lo < asof_values.size()) {
285
421
            lo += (asof_values[lo] < target);
286
421
        }
287
421
        return lo;
288
421
    }
289
290
    // Branchless upper_bound: first i where asof_values[i] > target
291
946
    ALWAYS_INLINE size_t upper_bound(IntType target) const {
292
946
        size_t lo = 0, n = asof_values.size();
293
4.06k
        while (n > 1) {
294
3.12k
            size_t half = n / 2;
295
3.12k
            lo += half * (asof_values[lo + half] <= target);
296
3.12k
            n -= half;
297
3.12k
        }
298
946
        if (lo < asof_values.size()) {
299
945
            lo += (asof_values[lo] <= target);
300
945
        }
301
946
        return lo;
302
946
    }
_ZNK5doris14AsofIndexGroupIjE11upper_boundEj
Line
Count
Source
291
41
    ALWAYS_INLINE size_t upper_bound(IntType target) const {
292
41
        size_t lo = 0, n = asof_values.size();
293
215
        while (n > 1) {
294
174
            size_t half = n / 2;
295
174
            lo += half * (asof_values[lo + half] <= target);
296
174
            n -= half;
297
174
        }
298
41
        if (lo < asof_values.size()) {
299
40
            lo += (asof_values[lo] <= target);
300
40
        }
301
41
        return lo;
302
41
    }
_ZNK5doris14AsofIndexGroupImE11upper_boundEm
Line
Count
Source
291
905
    ALWAYS_INLINE size_t upper_bound(IntType target) const {
292
905
        size_t lo = 0, n = asof_values.size();
293
3.85k
        while (n > 1) {
294
2.94k
            size_t half = n / 2;
295
2.94k
            lo += half * (asof_values[lo + half] <= target);
296
2.94k
            n -= half;
297
2.94k
        }
298
905
        if (lo < asof_values.size()) {
299
905
            lo += (asof_values[lo] <= target);
300
905
        }
301
905
        return lo;
302
905
    }
303
304
    // Semantics by (is_greater, is_strict):
305
    //   (true,  false): probe >= build  ->  find largest  build value <= probe
306
    //   (true,  true):  probe >  build  ->  find largest  build value <  probe
307
    //   (false, false): probe <= build  ->  find smallest build value >= probe
308
    //   (false, true):  probe <  build  ->  find smallest build value >  probe
309
    // Returns the build row index of the best match, or 0 if no match.
310
    template <bool IsGreater, bool IsStrict>
311
1.38k
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
1.38k
        if (asof_values.empty()) {
313
4
            return 0;
314
4
        }
315
1.38k
        if constexpr (IsGreater) {
316
943
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
943
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
943
        } else {
319
439
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
439
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
439
        }
322
1.38k
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb1ELb1EEEjj
Line
Count
Source
311
15
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
15
        if (asof_values.empty()) {
313
1
            return 0;
314
1
        }
315
14
        if constexpr (IsGreater) {
316
14
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
14
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
        } else {
319
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
        }
322
14
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb1ELb0EEEjj
Line
Count
Source
311
22
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
22
        if (asof_values.empty()) {
313
1
            return 0;
314
1
        }
315
21
        if constexpr (IsGreater) {
316
21
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
21
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
        } else {
319
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
        }
322
21
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb0ELb1EEEjj
Line
Count
Source
311
16
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
16
        if (asof_values.empty()) {
313
1
            return 0;
314
1
        }
315
        if constexpr (IsGreater) {
316
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
15
        } else {
319
15
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
15
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
15
        }
322
15
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb0ELb0EEEjj
Line
Count
Source
311
15
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
15
        if (asof_values.empty()) {
313
1
            return 0;
314
1
        }
315
        if constexpr (IsGreater) {
316
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
14
        } else {
319
14
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
14
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
14
        }
322
14
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb1ELb1EEEjm
Line
Count
Source
311
205
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
205
        if (asof_values.empty()) {
313
0
            return 0;
314
0
        }
315
205
        if constexpr (IsGreater) {
316
205
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
205
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
        } else {
319
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
        }
322
205
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb1ELb0EEEjm
Line
Count
Source
311
703
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
703
        if (asof_values.empty()) {
313
0
            return 0;
314
0
        }
315
703
        if constexpr (IsGreater) {
316
703
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
703
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
        } else {
319
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
        }
322
703
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb0ELb1EEEjm
Line
Count
Source
311
198
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
198
        if (asof_values.empty()) {
313
0
            return 0;
314
0
        }
315
        if constexpr (IsGreater) {
316
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
198
        } else {
319
198
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
198
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
198
        }
322
198
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb0ELb0EEEjm
Line
Count
Source
311
212
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
312
212
        if (asof_values.empty()) {
313
0
            return 0;
314
0
        }
315
        if constexpr (IsGreater) {
316
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
317
            return pos > 0 ? row_indexes[pos - 1] : 0;
318
212
        } else {
319
212
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
320
212
            return pos < asof_values.size() ? row_indexes[pos] : 0;
321
212
        }
322
212
    }
323
};
324
325
// Type-erased container for all ASOF index groups.
326
// DateV2 -> uint32_t, DateTimeV2/TimestampTZ -> uint64_t.
327
using AsofIndexVariant = std::variant<std::monostate, std::vector<AsofIndexGroup<uint32_t>>,
328
                                      std::vector<AsofIndexGroup<uint64_t>>>;
329
330
} // namespace doris