Coverage Report

Created: 2026-03-13 03:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/join_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <algorithm>
21
#include <variant>
22
23
#include "exec/common/hash_table/hash_key_type.h"
24
#include "exec/common/hash_table/hash_map_context.h"
25
#include "exec/common/hash_table/join_hash_table.h"
26
27
namespace doris {
28
29
// Devirtualize compare_at for ASOF JOIN supported column types.
30
// ASOF JOIN only supports DateV2, DateTimeV2, and TimestampTZ.
31
// Dispatches to the concrete ColumnVector<T> once so that all compare_at
32
// calls inside `func` are direct (non-virtual) calls.
33
// `func` receives a single argument: a const pointer to the concrete column
34
// (or const IColumn* as fallback for unexpected types).
35
template <typename Func>
36
102
decltype(auto) asof_column_dispatch(const IColumn* col, Func&& func) {
37
102
    if (const auto* c_dv2 = check_and_get_column<ColumnDateV2>(col)) {
38
1
        return std::forward<Func>(func)(c_dv2);
39
101
    } else if (const auto* c_dtv2 = check_and_get_column<ColumnDateTimeV2>(col)) {
40
100
        return std::forward<Func>(func)(c_dtv2);
41
100
    } else if (const auto* c_tstz = check_and_get_column<ColumnTimeStampTz>(col)) {
42
1
        return std::forward<Func>(func)(c_tstz);
43
1
    } else {
44
0
        return std::forward<Func>(func)(col);
45
0
    }
46
102
}
47
using JoinOpVariants =
48
        std::variant<std::integral_constant<TJoinOp::type, TJoinOp::INNER_JOIN>,
49
                     std::integral_constant<TJoinOp::type, TJoinOp::LEFT_SEMI_JOIN>,
50
                     std::integral_constant<TJoinOp::type, TJoinOp::LEFT_ANTI_JOIN>,
51
                     std::integral_constant<TJoinOp::type, TJoinOp::LEFT_OUTER_JOIN>,
52
                     std::integral_constant<TJoinOp::type, TJoinOp::FULL_OUTER_JOIN>,
53
                     std::integral_constant<TJoinOp::type, TJoinOp::RIGHT_OUTER_JOIN>,
54
                     std::integral_constant<TJoinOp::type, TJoinOp::CROSS_JOIN>,
55
                     std::integral_constant<TJoinOp::type, TJoinOp::RIGHT_SEMI_JOIN>,
56
                     std::integral_constant<TJoinOp::type, TJoinOp::RIGHT_ANTI_JOIN>,
57
                     std::integral_constant<TJoinOp::type, TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN>,
58
                     std::integral_constant<TJoinOp::type, TJoinOp::NULL_AWARE_LEFT_SEMI_JOIN>,
59
                     std::integral_constant<TJoinOp::type, TJoinOp::ASOF_LEFT_INNER_JOIN>,
60
                     std::integral_constant<TJoinOp::type, TJoinOp::ASOF_LEFT_OUTER_JOIN>>;
61
62
703k
inline bool is_asof_join(TJoinOp::type join_op) {
63
703k
    return join_op == TJoinOp::ASOF_LEFT_INNER_JOIN || join_op == TJoinOp::ASOF_LEFT_OUTER_JOIN;
64
703k
}
65
66
template <int JoinOpType>
67
inline constexpr bool is_asof_join_op_v =
68
        JoinOpType == TJoinOp::ASOF_LEFT_INNER_JOIN || JoinOpType == TJoinOp::ASOF_LEFT_OUTER_JOIN;
69
70
template <int JoinOpType>
71
inline constexpr bool is_asof_outer_join_op_v = JoinOpType == TJoinOp::ASOF_LEFT_OUTER_JOIN;
72
73
template <class T>
74
using PrimaryTypeHashTableContext = MethodOneNumber<T, JoinHashMap<T, HashCRC32<T>, false>>;
75
76
template <class T>
77
using DirectPrimaryTypeHashTableContext =
78
        MethodOneNumberDirect<T, JoinHashMap<T, HashCRC32<T>, true>>;
79
80
template <class Key>
81
using FixedKeyHashTableContext = MethodKeysFixed<JoinHashMap<Key, HashCRC32<Key>, false>>;
82
83
using SerializedHashTableContext =
84
        MethodSerialized<JoinHashMap<StringRef, DefaultHash<StringRef>, false>>;
85
using MethodOneString = MethodStringNoCache<JoinHashMap<StringRef, DefaultHash<StringRef>, false>>;
86
87
using HashTableVariants = std::variant<
88
        std::monostate, SerializedHashTableContext, PrimaryTypeHashTableContext<UInt8>,
89
        PrimaryTypeHashTableContext<UInt16>, PrimaryTypeHashTableContext<UInt32>,
90
        PrimaryTypeHashTableContext<UInt64>, PrimaryTypeHashTableContext<UInt128>,
91
        PrimaryTypeHashTableContext<UInt256>, DirectPrimaryTypeHashTableContext<UInt8>,
92
        DirectPrimaryTypeHashTableContext<UInt16>, DirectPrimaryTypeHashTableContext<UInt32>,
93
        DirectPrimaryTypeHashTableContext<UInt64>, DirectPrimaryTypeHashTableContext<UInt128>,
94
        FixedKeyHashTableContext<UInt64>, FixedKeyHashTableContext<UInt72>,
95
        FixedKeyHashTableContext<UInt96>, FixedKeyHashTableContext<UInt104>,
96
        FixedKeyHashTableContext<UInt128>, FixedKeyHashTableContext<UInt136>,
97
        FixedKeyHashTableContext<UInt256>, MethodOneString>;
98
99
struct JoinDataVariants {
100
    HashTableVariants method_variant;
101
102
163k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
103
163k
        switch (type) {
104
15.4k
        case HashKeyType::serialized:
105
15.4k
            method_variant.emplace<SerializedHashTableContext>();
106
15.4k
            break;
107
2.49k
        case HashKeyType::int8_key:
108
2.49k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt8>>();
109
2.49k
            break;
110
1.53k
        case HashKeyType::int16_key:
111
1.53k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt16>>();
112
1.53k
            break;
113
22.1k
        case HashKeyType::int32_key:
114
22.1k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt32>>();
115
22.1k
            break;
116
80.1k
        case HashKeyType::int64_key:
117
80.1k
            method_variant.emplace<PrimaryTypeHashTableContext<UInt64>>();
118
80.1k
            break;
119
881
        case HashKeyType::int128_key:
120
881
            method_variant.emplace<PrimaryTypeHashTableContext<UInt128>>();
121
881
            break;
122
45
        case HashKeyType::int256_key:
123
45
            method_variant.emplace<PrimaryTypeHashTableContext<UInt256>>();
124
45
            break;
125
2.09k
        case HashKeyType::string_key:
126
2.09k
            method_variant.emplace<MethodOneString>();
127
2.09k
            break;
128
10.0k
        case HashKeyType::fixed64:
129
10.0k
            method_variant.emplace<FixedKeyHashTableContext<UInt64>>(get_key_sizes(data_types));
130
10.0k
            break;
131
3.40k
        case HashKeyType::fixed72:
132
3.40k
            method_variant.emplace<FixedKeyHashTableContext<UInt72>>(get_key_sizes(data_types));
133
3.40k
            break;
134
8.13k
        case HashKeyType::fixed96:
135
8.13k
            method_variant.emplace<FixedKeyHashTableContext<UInt96>>(get_key_sizes(data_types));
136
8.13k
            break;
137
121
        case HashKeyType::fixed104:
138
121
            method_variant.emplace<FixedKeyHashTableContext<UInt104>>(get_key_sizes(data_types));
139
121
            break;
140
8.11k
        case HashKeyType::fixed128:
141
8.11k
            method_variant.emplace<FixedKeyHashTableContext<UInt128>>(get_key_sizes(data_types));
142
8.11k
            break;
143
960
        case HashKeyType::fixed136:
144
960
            method_variant.emplace<FixedKeyHashTableContext<UInt136>>(get_key_sizes(data_types));
145
960
            break;
146
7.93k
        case HashKeyType::fixed256:
147
7.93k
            method_variant.emplace<FixedKeyHashTableContext<UInt256>>(get_key_sizes(data_types));
148
7.93k
            break;
149
0
        default:
150
0
            throw Exception(ErrorCode::INTERNAL_ERROR,
151
0
                            "JoinDataVariants meet invalid key type, type={}", type);
152
163k
        }
153
163k
    }
154
};
155
156
template <typename Method>
157
void primary_to_direct_mapping(Method* context, const ColumnRawPtrs& key_columns,
158
99.0k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
159
99.0k
    using FieldType = typename Method::Base::Key;
160
99.0k
    FieldType max_key = std::numeric_limits<FieldType>::min();
161
99.0k
    FieldType min_key = std::numeric_limits<FieldType>::max();
162
163
99.0k
    size_t num_rows = key_columns[0]->size();
164
99.0k
    if (key_columns[0]->is_nullable()) {
165
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
166
0
                                              ->get_nested_column_ptr()
167
0
                                              ->get_raw_data()
168
0
                                              .data;
169
0
        const NullMap& null_map =
170
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
171
        // skip first mocked row
172
0
        for (size_t i = 1; i < num_rows; i++) {
173
0
            if (null_map[i]) {
174
0
                continue;
175
0
            }
176
0
            max_key = std::max(max_key, input_keys[i]);
177
0
            min_key = std::min(min_key, input_keys[i]);
178
0
        }
179
99.0k
    } else {
180
99.0k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
181
        // skip first mocked row
182
26.4M
        for (size_t i = 1; i < num_rows; i++) {
183
26.3M
            max_key = std::max(max_key, input_keys[i]);
184
26.3M
            min_key = std::min(min_key, input_keys[i]);
185
26.3M
        }
186
99.0k
    }
187
188
99.0k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
189
99.0k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
190
99.0k
    if (allow_direct_mapping) {
191
30.7k
        for (const auto& variant_ptr : variant_ptrs) {
192
30.7k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
193
30.7k
                    max_key, min_key);
194
30.7k
        }
195
23.0k
    }
196
99.0k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberIhNS_13JoinHashTableIh9HashCRC32IhELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
158
2.36k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
159
2.36k
    using FieldType = typename Method::Base::Key;
160
2.36k
    FieldType max_key = std::numeric_limits<FieldType>::min();
161
2.36k
    FieldType min_key = std::numeric_limits<FieldType>::max();
162
163
2.36k
    size_t num_rows = key_columns[0]->size();
164
2.36k
    if (key_columns[0]->is_nullable()) {
165
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
166
0
                                              ->get_nested_column_ptr()
167
0
                                              ->get_raw_data()
168
0
                                              .data;
169
0
        const NullMap& null_map =
170
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
171
        // skip first mocked row
172
0
        for (size_t i = 1; i < num_rows; i++) {
173
0
            if (null_map[i]) {
174
0
                continue;
175
0
            }
176
0
            max_key = std::max(max_key, input_keys[i]);
177
0
            min_key = std::min(min_key, input_keys[i]);
178
0
        }
179
2.36k
    } else {
180
2.36k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
181
        // skip first mocked row
182
6.46k
        for (size_t i = 1; i < num_rows; i++) {
183
4.10k
            max_key = std::max(max_key, input_keys[i]);
184
4.10k
            min_key = std::min(min_key, input_keys[i]);
185
4.10k
        }
186
2.36k
    }
187
188
2.36k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
189
2.36k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
190
2.36k
    if (allow_direct_mapping) {
191
1.58k
        for (const auto& variant_ptr : variant_ptrs) {
192
1.58k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
193
1.58k
                    max_key, min_key);
194
1.58k
        }
195
1.46k
    }
196
2.36k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberItNS_13JoinHashTableIt9HashCRC32ItELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
158
1.38k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
159
1.38k
    using FieldType = typename Method::Base::Key;
160
1.38k
    FieldType max_key = std::numeric_limits<FieldType>::min();
161
1.38k
    FieldType min_key = std::numeric_limits<FieldType>::max();
162
163
1.38k
    size_t num_rows = key_columns[0]->size();
164
1.38k
    if (key_columns[0]->is_nullable()) {
165
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
166
0
                                              ->get_nested_column_ptr()
167
0
                                              ->get_raw_data()
168
0
                                              .data;
169
0
        const NullMap& null_map =
170
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
171
        // skip first mocked row
172
0
        for (size_t i = 1; i < num_rows; i++) {
173
0
            if (null_map[i]) {
174
0
                continue;
175
0
            }
176
0
            max_key = std::max(max_key, input_keys[i]);
177
0
            min_key = std::min(min_key, input_keys[i]);
178
0
        }
179
1.38k
    } else {
180
1.38k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
181
        // skip first mocked row
182
10.1k
        for (size_t i = 1; i < num_rows; i++) {
183
8.71k
            max_key = std::max(max_key, input_keys[i]);
184
8.71k
            min_key = std::min(min_key, input_keys[i]);
185
8.71k
        }
186
1.38k
    }
187
188
1.38k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
189
1.38k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
190
1.38k
    if (allow_direct_mapping) {
191
1.29k
        for (const auto& variant_ptr : variant_ptrs) {
192
1.29k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
193
1.29k
                    max_key, min_key);
194
1.29k
        }
195
1.14k
    }
196
1.38k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberIjNS_13JoinHashTableIj9HashCRC32IjELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
158
16.9k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
159
16.9k
    using FieldType = typename Method::Base::Key;
160
16.9k
    FieldType max_key = std::numeric_limits<FieldType>::min();
161
16.9k
    FieldType min_key = std::numeric_limits<FieldType>::max();
162
163
16.9k
    size_t num_rows = key_columns[0]->size();
164
16.9k
    if (key_columns[0]->is_nullable()) {
165
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
166
0
                                              ->get_nested_column_ptr()
167
0
                                              ->get_raw_data()
168
0
                                              .data;
169
0
        const NullMap& null_map =
170
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
171
        // skip first mocked row
172
0
        for (size_t i = 1; i < num_rows; i++) {
173
0
            if (null_map[i]) {
174
0
                continue;
175
0
            }
176
0
            max_key = std::max(max_key, input_keys[i]);
177
0
            min_key = std::min(min_key, input_keys[i]);
178
0
        }
179
16.9k
    } else {
180
16.9k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
181
        // skip first mocked row
182
26.1M
        for (size_t i = 1; i < num_rows; i++) {
183
26.1M
            max_key = std::max(max_key, input_keys[i]);
184
26.1M
            min_key = std::min(min_key, input_keys[i]);
185
26.1M
        }
186
16.9k
    }
187
188
16.9k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
189
16.9k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
190
16.9k
    if (allow_direct_mapping) {
191
19.0k
        for (const auto& variant_ptr : variant_ptrs) {
192
19.0k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
193
19.0k
                    max_key, min_key);
194
19.0k
        }
195
14.1k
    }
196
16.9k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberImNS_13JoinHashTableIm9HashCRC32ImELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
158
77.7k
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
159
77.7k
    using FieldType = typename Method::Base::Key;
160
77.7k
    FieldType max_key = std::numeric_limits<FieldType>::min();
161
77.7k
    FieldType min_key = std::numeric_limits<FieldType>::max();
162
163
77.7k
    size_t num_rows = key_columns[0]->size();
164
77.7k
    if (key_columns[0]->is_nullable()) {
165
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
166
0
                                              ->get_nested_column_ptr()
167
0
                                              ->get_raw_data()
168
0
                                              .data;
169
0
        const NullMap& null_map =
170
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
171
        // skip first mocked row
172
0
        for (size_t i = 1; i < num_rows; i++) {
173
0
            if (null_map[i]) {
174
0
                continue;
175
0
            }
176
0
            max_key = std::max(max_key, input_keys[i]);
177
0
            min_key = std::min(min_key, input_keys[i]);
178
0
        }
179
77.7k
    } else {
180
77.7k
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
181
        // skip first mocked row
182
322k
        for (size_t i = 1; i < num_rows; i++) {
183
245k
            max_key = std::max(max_key, input_keys[i]);
184
245k
            min_key = std::min(min_key, input_keys[i]);
185
245k
        }
186
77.7k
    }
187
188
77.7k
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
189
77.7k
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
190
77.7k
    if (allow_direct_mapping) {
191
8.01k
        for (const auto& variant_ptr : variant_ptrs) {
192
8.01k
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
193
8.01k
                    max_key, min_key);
194
8.01k
        }
195
5.76k
    }
196
77.7k
}
_ZN5doris25primary_to_direct_mappingINS_15MethodOneNumberIN4wide7integerILm128EjEENS_13JoinHashTableIS4_9HashCRC32IS4_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
158
572
                               const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
159
572
    using FieldType = typename Method::Base::Key;
160
572
    FieldType max_key = std::numeric_limits<FieldType>::min();
161
572
    FieldType min_key = std::numeric_limits<FieldType>::max();
162
163
572
    size_t num_rows = key_columns[0]->size();
164
572
    if (key_columns[0]->is_nullable()) {
165
0
        const FieldType* input_keys = (FieldType*)assert_cast<const ColumnNullable*>(key_columns[0])
166
0
                                              ->get_nested_column_ptr()
167
0
                                              ->get_raw_data()
168
0
                                              .data;
169
0
        const NullMap& null_map =
170
0
                assert_cast<const ColumnNullable*>(key_columns[0])->get_null_map_data();
171
        // skip first mocked row
172
0
        for (size_t i = 1; i < num_rows; i++) {
173
0
            if (null_map[i]) {
174
0
                continue;
175
0
            }
176
0
            max_key = std::max(max_key, input_keys[i]);
177
0
            min_key = std::min(min_key, input_keys[i]);
178
0
        }
179
572
    } else {
180
572
        const FieldType* input_keys = (FieldType*)key_columns[0]->get_raw_data().data;
181
        // skip first mocked row
182
1.84k
        for (size_t i = 1; i < num_rows; i++) {
183
1.26k
            max_key = std::max(max_key, input_keys[i]);
184
1.26k
            min_key = std::min(min_key, input_keys[i]);
185
1.26k
        }
186
572
    }
187
188
572
    constexpr auto MAX_MAPPING_RANGE = 1 << 23;
189
572
    bool allow_direct_mapping = (max_key >= min_key && max_key - min_key < MAX_MAPPING_RANGE - 1);
190
572
    if (allow_direct_mapping) {
191
726
        for (const auto& variant_ptr : variant_ptrs) {
192
726
            variant_ptr->method_variant.emplace<DirectPrimaryTypeHashTableContext<FieldType>>(
193
726
                    max_key, min_key);
194
726
        }
195
470
    }
196
572
}
197
198
template <typename Method>
199
void try_convert_to_direct_mapping(
200
        Method* method, const ColumnRawPtrs& key_columns,
201
52.5k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingISt9monostateEEvPT_RKSt6vectorIPKNS_7IColumnESaIS7_EERKS4_ISt10shared_ptrINS_16JoinDataVariantsEESaISE_EE
_ZN5doris29try_convert_to_direct_mappingINS_16MethodSerializedINS_13JoinHashTableINS_9StringRefE11DefaultHashIS3_vELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
201
14.8k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodOneNumberIN4wide7integerILm256EjEENS_13JoinHashTableIS4_9HashCRC32IS4_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
201
45
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectIhNS_13JoinHashTableIh9HashCRC32IhELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectItNS_13JoinHashTableIt9HashCRC32ItELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectIjNS_13JoinHashTableIj9HashCRC32IjELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectImNS_13JoinHashTableIm9HashCRC32ImELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Unexecuted instantiation: _ZN5doris29try_convert_to_direct_mappingINS_21MethodOneNumberDirectIN4wide7integerILm128EjEENS_13JoinHashTableIS4_9HashCRC32IS4_ELb1EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableIm9HashCRC32ImELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISC_EERKS9_ISt10shared_ptrINS_16JoinDataVariantsEESaISJ_EE
Line
Count
Source
201
7.36k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_6UInt72E9HashCRC32IS3_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
201
3.40k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_6UInt96E9HashCRC32IS3_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
201
8.10k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_7UInt104E9HashCRC32IS3_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
201
121
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableIN4wide7integerILm128EjEE9HashCRC32IS5_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
201
8.10k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableINS_7UInt136E9HashCRC32IS3_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
201
960
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_15MethodKeysFixedINS_13JoinHashTableIN4wide7integerILm256EjEE9HashCRC32IS5_ELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISF_EERKSC_ISt10shared_ptrINS_16JoinDataVariantsEESaISM_EE
Line
Count
Source
201
7.93k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
_ZN5doris29try_convert_to_direct_mappingINS_19MethodStringNoCacheINS_13JoinHashTableINS_9StringRefE11DefaultHashIS3_vELb0EEEEEEEvPT_RKSt6vectorIPKNS_7IColumnESaISD_EERKSA_ISt10shared_ptrINS_16JoinDataVariantsEESaISK_EE
Line
Count
Source
201
1.65k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {}
202
203
inline void try_convert_to_direct_mapping(
204
        PrimaryTypeHashTableContext<UInt8>* context, const ColumnRawPtrs& key_columns,
205
2.36k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
206
2.36k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
207
2.36k
}
208
209
inline void try_convert_to_direct_mapping(
210
        PrimaryTypeHashTableContext<UInt16>* context, const ColumnRawPtrs& key_columns,
211
1.38k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
212
1.38k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
213
1.38k
}
214
215
inline void try_convert_to_direct_mapping(
216
        PrimaryTypeHashTableContext<UInt32>* context, const ColumnRawPtrs& key_columns,
217
16.9k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
218
16.9k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
219
16.9k
}
220
221
inline void try_convert_to_direct_mapping(
222
        PrimaryTypeHashTableContext<UInt64>* context, const ColumnRawPtrs& key_columns,
223
77.7k
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
224
77.7k
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
225
77.7k
}
226
227
inline void try_convert_to_direct_mapping(
228
        PrimaryTypeHashTableContext<UInt128>* context, const ColumnRawPtrs& key_columns,
229
572
        const std::vector<std::shared_ptr<JoinDataVariants>>& variant_ptrs) {
230
572
    primary_to_direct_mapping(context, key_columns, variant_ptrs);
231
572
}
232
233
// ASOF JOIN index with inline values for cache-friendly branchless binary search.
234
// IntType is the integer representation of the ASOF column value:
235
//   uint32_t for DateV2, uint64_t for DateTimeV2 and TimestampTZ.
236
// Rows are sorted by asof_value during build, then materialized into SoA arrays
237
// so probe-side binary search only touches the ASOF values hot path.
238
template <typename IntType>
239
struct AsofIndexGroup {
240
    using int_type = IntType;
241
242
    struct Entry {
243
        IntType asof_value;
244
        uint32_t row_index; // 1-based, 0 = invalid/padding
245
    };
246
247
    std::vector<Entry> entries;
248
    std::vector<IntType> asof_values;
249
    std::vector<uint32_t> row_indexes;
250
251
2.30k
    void add_row(IntType value, uint32_t row_idx) { entries.push_back({value, row_idx}); }
_ZN5doris14AsofIndexGroupIjE7add_rowEjj
Line
Count
Source
251
1.05k
    void add_row(IntType value, uint32_t row_idx) { entries.push_back({value, row_idx}); }
_ZN5doris14AsofIndexGroupImE7add_rowEmj
Line
Count
Source
251
1.24k
    void add_row(IntType value, uint32_t row_idx) { entries.push_back({value, row_idx}); }
252
253
244
    void sort_and_finalize() {
254
244
        if (entries.empty()) {
255
4
            return;
256
4
        }
257
240
        if (entries.size() > 1) {
258
201
            pdqsort(entries.begin(), entries.end(),
259
6.70k
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
_ZZN5doris14AsofIndexGroupIjE17sort_and_finalizeEvENKUlRKNS1_5EntryES4_E_clES4_S4_
Line
Count
Source
259
2.05k
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
_ZZN5doris14AsofIndexGroupImE17sort_and_finalizeEvENKUlRKNS1_5EntryES4_E_clES4_S4_
Line
Count
Source
259
4.64k
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
260
201
        }
261
262
240
        asof_values.resize(entries.size());
263
240
        row_indexes.resize(entries.size());
264
2.54k
        for (size_t i = 0; i < entries.size(); ++i) {
265
2.30k
            asof_values[i] = entries[i].asof_value;
266
2.30k
            row_indexes[i] = entries[i].row_index;
267
2.30k
        }
268
269
240
        std::vector<Entry>().swap(entries);
270
240
    }
_ZN5doris14AsofIndexGroupIjE17sort_and_finalizeEv
Line
Count
Source
253
21
    void sort_and_finalize() {
254
21
        if (entries.empty()) {
255
4
            return;
256
4
        }
257
17
        if (entries.size() > 1) {
258
13
            pdqsort(entries.begin(), entries.end(),
259
13
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
260
13
        }
261
262
17
        asof_values.resize(entries.size());
263
17
        row_indexes.resize(entries.size());
264
1.07k
        for (size_t i = 0; i < entries.size(); ++i) {
265
1.05k
            asof_values[i] = entries[i].asof_value;
266
1.05k
            row_indexes[i] = entries[i].row_index;
267
1.05k
        }
268
269
17
        std::vector<Entry>().swap(entries);
270
17
    }
_ZN5doris14AsofIndexGroupImE17sort_and_finalizeEv
Line
Count
Source
253
223
    void sort_and_finalize() {
254
223
        if (entries.empty()) {
255
0
            return;
256
0
        }
257
223
        if (entries.size() > 1) {
258
188
            pdqsort(entries.begin(), entries.end(),
259
188
                    [](const Entry& a, const Entry& b) { return a.asof_value < b.asof_value; });
260
188
        }
261
262
223
        asof_values.resize(entries.size());
263
223
        row_indexes.resize(entries.size());
264
1.47k
        for (size_t i = 0; i < entries.size(); ++i) {
265
1.24k
            asof_values[i] = entries[i].asof_value;
266
1.24k
            row_indexes[i] = entries[i].row_index;
267
1.24k
        }
268
269
223
        std::vector<Entry>().swap(entries);
270
223
    }
271
272
803
    const IntType* values_data() const { return asof_values.data(); }
_ZNK5doris14AsofIndexGroupIjE11values_dataEv
Line
Count
Source
272
1
    const IntType* values_data() const { return asof_values.data(); }
_ZNK5doris14AsofIndexGroupImE11values_dataEv
Line
Count
Source
272
802
    const IntType* values_data() const { return asof_values.data(); }
273
274
    // Branchless lower_bound: first i where asof_values[i] >= target
275
427
    ALWAYS_INLINE size_t lower_bound(IntType target) const {
276
427
        size_t lo = 0, n = asof_values.size();
277
2.21k
        while (n > 1) {
278
1.78k
            size_t half = n / 2;
279
1.78k
            lo += half * (asof_values[lo + half] < target);
280
1.78k
            n -= half;
281
1.78k
        }
282
427
        if (lo < asof_values.size()) {
283
426
            lo += (asof_values[lo] < target);
284
426
        }
285
427
        return lo;
286
427
    }
_ZNK5doris14AsofIndexGroupIjE11lower_boundEj
Line
Count
Source
275
33
    ALWAYS_INLINE size_t lower_bound(IntType target) const {
276
33
        size_t lo = 0, n = asof_values.size();
277
163
        while (n > 1) {
278
130
            size_t half = n / 2;
279
130
            lo += half * (asof_values[lo + half] < target);
280
130
            n -= half;
281
130
        }
282
33
        if (lo < asof_values.size()) {
283
32
            lo += (asof_values[lo] < target);
284
32
        }
285
33
        return lo;
286
33
    }
_ZNK5doris14AsofIndexGroupImE11lower_boundEm
Line
Count
Source
275
394
    ALWAYS_INLINE size_t lower_bound(IntType target) const {
276
394
        size_t lo = 0, n = asof_values.size();
277
2.05k
        while (n > 1) {
278
1.65k
            size_t half = n / 2;
279
1.65k
            lo += half * (asof_values[lo + half] < target);
280
1.65k
            n -= half;
281
1.65k
        }
282
394
        if (lo < asof_values.size()) {
283
394
            lo += (asof_values[lo] < target);
284
394
        }
285
394
        return lo;
286
394
    }
287
288
    // Branchless upper_bound: first i where asof_values[i] > target
289
869
    ALWAYS_INLINE size_t upper_bound(IntType target) const {
290
869
        size_t lo = 0, n = asof_values.size();
291
3.87k
        while (n > 1) {
292
3.00k
            size_t half = n / 2;
293
3.00k
            lo += half * (asof_values[lo + half] <= target);
294
3.00k
            n -= half;
295
3.00k
        }
296
869
        if (lo < asof_values.size()) {
297
868
            lo += (asof_values[lo] <= target);
298
868
        }
299
869
        return lo;
300
869
    }
_ZNK5doris14AsofIndexGroupIjE11upper_boundEj
Line
Count
Source
289
41
    ALWAYS_INLINE size_t upper_bound(IntType target) const {
290
41
        size_t lo = 0, n = asof_values.size();
291
215
        while (n > 1) {
292
174
            size_t half = n / 2;
293
174
            lo += half * (asof_values[lo + half] <= target);
294
174
            n -= half;
295
174
        }
296
41
        if (lo < asof_values.size()) {
297
40
            lo += (asof_values[lo] <= target);
298
40
        }
299
41
        return lo;
300
41
    }
_ZNK5doris14AsofIndexGroupImE11upper_boundEm
Line
Count
Source
289
828
    ALWAYS_INLINE size_t upper_bound(IntType target) const {
290
828
        size_t lo = 0, n = asof_values.size();
291
3.66k
        while (n > 1) {
292
2.83k
            size_t half = n / 2;
293
2.83k
            lo += half * (asof_values[lo + half] <= target);
294
2.83k
            n -= half;
295
2.83k
        }
296
828
        if (lo < asof_values.size()) {
297
828
            lo += (asof_values[lo] <= target);
298
828
        }
299
828
        return lo;
300
828
    }
301
302
    // Semantics by (is_greater, is_strict):
303
    //   (true,  false): probe >= build  ->  find largest  build value <= probe
304
    //   (true,  true):  probe >  build  ->  find largest  build value <  probe
305
    //   (false, false): probe <= build  ->  find smallest build value >= probe
306
    //   (false, true):  probe <  build  ->  find smallest build value >  probe
307
    // Returns the build row index of the best match, or 0 if no match.
308
    template <bool IsGreater, bool IsStrict>
309
1.28k
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
1.28k
        if (asof_values.empty()) {
311
4
            return 0;
312
4
        }
313
1.27k
        if constexpr (IsGreater) {
314
866
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
866
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
866
        } else {
317
412
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
412
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
412
        }
320
1.27k
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb1ELb1EEEjj
Line
Count
Source
309
15
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
15
        if (asof_values.empty()) {
311
1
            return 0;
312
1
        }
313
14
        if constexpr (IsGreater) {
314
14
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
14
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
        } else {
317
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
        }
320
14
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb1ELb0EEEjj
Line
Count
Source
309
22
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
22
        if (asof_values.empty()) {
311
1
            return 0;
312
1
        }
313
21
        if constexpr (IsGreater) {
314
21
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
21
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
        } else {
317
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
        }
320
21
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb0ELb1EEEjj
Line
Count
Source
309
16
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
16
        if (asof_values.empty()) {
311
1
            return 0;
312
1
        }
313
        if constexpr (IsGreater) {
314
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
15
        } else {
317
15
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
15
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
15
        }
320
15
    }
_ZNK5doris14AsofIndexGroupIjE15find_best_matchILb0ELb0EEEjj
Line
Count
Source
309
15
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
15
        if (asof_values.empty()) {
311
1
            return 0;
312
1
        }
313
        if constexpr (IsGreater) {
314
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
14
        } else {
317
14
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
14
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
14
        }
320
14
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb1ELb1EEEjm
Line
Count
Source
309
197
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
197
        if (asof_values.empty()) {
311
0
            return 0;
312
0
        }
313
197
        if constexpr (IsGreater) {
314
197
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
197
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
        } else {
317
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
        }
320
197
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb1ELb0EEEjm
Line
Count
Source
309
634
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
634
        if (asof_values.empty()) {
311
0
            return 0;
312
0
        }
313
634
        if constexpr (IsGreater) {
314
634
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
634
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
        } else {
317
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
        }
320
634
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb0ELb1EEEjm
Line
Count
Source
309
190
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
190
        if (asof_values.empty()) {
311
0
            return 0;
312
0
        }
313
        if constexpr (IsGreater) {
314
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
190
        } else {
317
190
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
190
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
190
        }
320
190
    }
_ZNK5doris14AsofIndexGroupImE15find_best_matchILb0ELb0EEEjm
Line
Count
Source
309
193
    ALWAYS_INLINE uint32_t find_best_match(IntType probe_value) const {
310
193
        if (asof_values.empty()) {
311
0
            return 0;
312
0
        }
313
        if constexpr (IsGreater) {
314
            size_t pos = IsStrict ? lower_bound(probe_value) : upper_bound(probe_value);
315
            return pos > 0 ? row_indexes[pos - 1] : 0;
316
193
        } else {
317
193
            size_t pos = IsStrict ? upper_bound(probe_value) : lower_bound(probe_value);
318
193
            return pos < asof_values.size() ? row_indexes[pos] : 0;
319
193
        }
320
193
    }
321
};
322
323
// Type-erased container for all ASOF index groups.
324
// DateV2 -> uint32_t, DateTimeV2/TimestampTZ -> uint64_t.
325
using AsofIndexVariant = std::variant<std::monostate, std::vector<AsofIndexGroup<uint32_t>>,
326
                                      std::vector<AsofIndexGroup<uint64_t>>>;
327
328
} // namespace doris