/root/doris/be/src/vec/functions/dictionary.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <memory> |
21 | | #include <type_traits> |
22 | | #include <unordered_map> |
23 | | #include <utility> |
24 | | #include <vector> |
25 | | |
26 | | #include "vec/columns/column.h" |
27 | | #include "vec/columns/column_string.h" |
28 | | #include "vec/common/assert_cast.h" |
29 | | #include "vec/core/types.h" |
30 | | #include "vec/data_types/data_type.h" |
31 | | #include "vec/data_types/data_type_date_or_datetime_v2.h" |
32 | | #include "vec/data_types/data_type_date_time.h" |
33 | | #include "vec/data_types/data_type_ipv4.h" |
34 | | #include "vec/data_types/data_type_ipv6.h" |
35 | | #include "vec/data_types/data_type_number.h" |
36 | | #include "vec/data_types/data_type_string.h" |
37 | | #include "vec/functions/cast_type_to_either.h" |
38 | | |
39 | | namespace doris { |
40 | | class MemTrackerLimiter; |
41 | | } |
42 | | class DictionaryFactory; |
43 | | namespace doris::vectorized { |
44 | | /* |
45 | | * Dictionary implementation in Doris that provides key-value mapping functionality |
46 | | * Currently only supports in-memory dictionary storage |
47 | | */ |
48 | | |
49 | | const static std::string DICT_DATA_ERROR_TAG = "[INVALID_DICT_MARK]"; |
50 | | |
51 | | struct DictionaryAttribute { |
52 | | const std::string name; // value name |
53 | | const DataTypePtr type; // value type |
54 | | }; |
55 | | |
56 | | // Abstract base class IDictionary that only stores values. Keys are maintained by specific derived classes |
57 | | // IDictionary serves as the foundation for dictionary implementations where: |
58 | | // - Only values are stored at the base level |
59 | | // - Key management is delegated to derived classes |
60 | | // - Provides interface for dictionary operations |
61 | | class IDictionary { |
62 | | public: |
63 | | IDictionary(std::string name, std::vector<DictionaryAttribute> values); |
64 | | virtual ~IDictionary(); |
65 | 5 | std::string dict_name() const { return _dict_name; } |
66 | | |
67 | | // Returns the result column, throws an exception if there is an issue |
68 | | // attribute_type , key_type must be no nullable type |
69 | | virtual ColumnPtr get_column(const std::string& attribute_name, |
70 | | const DataTypePtr& attribute_type, const ColumnPtr& key_column, |
71 | | const DataTypePtr& key_type) const = 0; |
72 | | |
73 | | // Returns multiple result columns, throws an exception if there is an issue |
74 | | // The default implementation calls get_column. If a more performant implementation is needed, this method can be overridden |
75 | | virtual ColumnPtrs get_columns(const std::vector<std::string>& attribute_names, |
76 | | const DataTypes& attribute_types, const ColumnPtr& key_column, |
77 | 0 | const DataTypePtr& key_type) const { |
78 | 0 | ColumnPtrs columns; |
79 | 0 | for (size_t i = 0; i < attribute_names.size(); ++i) { |
80 | 0 | columns.push_back( |
81 | 0 | get_column(attribute_names[i], attribute_types[i], key_column, key_type)); |
82 | 0 | } |
83 | 0 | return columns; |
84 | 0 | } |
85 | | |
86 | | // Compared to get_column and get_columns, supports multiple key columns and multiple value columns |
87 | | // The default implementation only supports one key column, such as IPAddressDictionary, HashMapDictionary |
88 | | // If support for multiple key columns is needed, this method can be overridden |
89 | | virtual ColumnPtrs get_tuple_columns(const std::vector<std::string>& attribute_names, |
90 | | const DataTypes& attribute_types, |
91 | | const ColumnPtrs& key_columns, |
92 | 0 | const DataTypes& key_types) const { |
93 | 0 | if (key_types.size() != 1) { |
94 | 0 | throw doris::Exception(ErrorCode::INTERNAL_ERROR, |
95 | 0 | "Dictionary {} does not support multiple key columns", |
96 | 0 | dict_name()); |
97 | 0 | } |
98 | 0 | return get_columns(attribute_names, attribute_types, key_columns[0], key_types[0]); |
99 | 0 | } |
100 | | |
101 | | bool has_attribute(const std::string& name) const; |
102 | | |
103 | | // will return a non-nullable type |
104 | | DataTypePtr get_attribute_type(const std::string& name) const; |
105 | | size_t attribute_index(const std::string& name) const; |
106 | | |
107 | | bool attribute_is_nullable(size_t idx) const; |
108 | | |
109 | | std::variant<std::false_type, std::true_type> attribute_nullable_variant(size_t idx) const; |
110 | | |
111 | | template <typename F> |
112 | 700 | static bool cast_type(const IDataType* type, F&& f) { |
113 | | // The data types supported by cast_type must be consistent with the AttributeData below. |
114 | 700 | return cast_type_to_either<DataTypeUInt8, DataTypeInt8, DataTypeInt16, DataTypeInt32, |
115 | 700 | DataTypeInt64, DataTypeInt128, DataTypeFloat32, DataTypeFloat64, |
116 | 700 | DataTypeIPv4, DataTypeIPv6, DataTypeString, DataTypeDateV2, |
117 | 700 | DataTypeDateTimeV2, DataTypeDecimal<Decimal32>, |
118 | 700 | DataTypeDecimal<Decimal64>, DataTypeDecimal<Decimal128V3>, |
119 | 700 | DataTypeDecimal<Decimal256>>(type, std::forward<F>(f)); |
120 | 700 | } |
121 | | |
122 | | virtual size_t allocated_bytes() const; |
123 | | |
124 | | protected: |
125 | | friend class DictionaryFactory; |
126 | | |
127 | | // Only used to distinguish from DataTypeString, used for ColumnWithType |
128 | | struct DictDataTypeString64 { |
129 | | using ColumnType = ColumnString; |
130 | | }; |
131 | | |
132 | | template <typename Type> |
133 | | struct ColumnWithType { |
134 | | // OutputColumnType is used as the result column type |
135 | | using OutputColumnType = Type::ColumnType; |
136 | | ColumnPtr column; |
137 | | ColumnPtr null_map; |
138 | | // RealColumnType is the real type of the column, as there may be ColumnString64, but the result column will not be ColumnString64 |
139 | | using RealColumnType = std::conditional_t<std::is_same_v<DictDataTypeString64, Type>, |
140 | | ColumnString64, OutputColumnType>; |
141 | 1.04k | const RealColumnType* get() const { |
142 | 1.04k | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); |
143 | 1.04k | } _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIhEEE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIaEEE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIsEEE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIiEEE3getEv Line | Count | Source | 141 | 97 | const RealColumnType* get() const { | 142 | 97 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 97 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIlEEE3getEv Line | Count | Source | 141 | 84 | const RealColumnType* get() const { | 142 | 84 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 84 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberInEEE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIfEEE3getEv Line | Count | Source | 141 | 79 | const RealColumnType* get() const { | 142 | 79 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 79 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIdEEE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv4EE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv6EE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeStringEE3getEv Line | Count | Source | 141 | 55 | const RealColumnType* get() const { | 142 | 55 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 55 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS1_20DictDataTypeString64EE3getEv Line | Count | Source | 141 | 27 | const RealColumnType* get() const { | 142 | 27 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 27 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeDateV2EE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_18DataTypeDateTimeV2EE3getEv Line | Count | Source | 141 | 78 | const RealColumnType* get() const { | 142 | 78 | return assert_cast<const RealColumnType*, TypeCheckOnRelease::DISABLE>(column.get()); | 143 | 78 | } |
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIiEEEEE3getEv Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIlEEEEE3getEv Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_12Decimal128V3EEEE3getEv Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEEE3getEv |
144 | | |
145 | 1.04k | const ColumnUInt8* get_null_map() const { |
146 | 1.04k | if (!null_map) { |
147 | 1.03k | return nullptr; |
148 | 1.03k | } |
149 | 8 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); |
150 | 1.04k | } _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIhEEE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIaEEE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIsEEE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIiEEE12get_null_mapEv Line | Count | Source | 145 | 97 | const ColumnUInt8* get_null_map() const { | 146 | 97 | if (!null_map) { | 147 | 89 | return nullptr; | 148 | 89 | } | 149 | 8 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 97 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIlEEE12get_null_mapEv Line | Count | Source | 145 | 82 | const ColumnUInt8* get_null_map() const { | 146 | 82 | if (!null_map) { | 147 | 82 | return nullptr; | 148 | 82 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 82 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberInEEE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIfEEE12get_null_mapEv Line | Count | Source | 145 | 79 | const ColumnUInt8* get_null_map() const { | 146 | 79 | if (!null_map) { | 147 | 79 | return nullptr; | 148 | 79 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 79 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeNumberIdEEE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv4EE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_12DataTypeIPv6EE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeStringEE12get_null_mapEv Line | Count | Source | 145 | 55 | const ColumnUInt8* get_null_map() const { | 146 | 55 | if (!null_map) { | 147 | 55 | return nullptr; | 148 | 55 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 55 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS1_20DictDataTypeString64EE12get_null_mapEv Line | Count | Source | 145 | 27 | const ColumnUInt8* get_null_map() const { | 146 | 27 | if (!null_map) { | 147 | 27 | return nullptr; | 148 | 27 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 27 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_14DataTypeDateV2EE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
_ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_18DataTypeDateTimeV2EE12get_null_mapEv Line | Count | Source | 145 | 78 | const ColumnUInt8* get_null_map() const { | 146 | 78 | if (!null_map) { | 147 | 78 | return nullptr; | 148 | 78 | } | 149 | 0 | return assert_cast<const ColumnUInt8*, TypeCheckOnRelease::DISABLE>(null_map.get()); | 150 | 78 | } |
Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIiEEEEE12get_null_mapEv Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIlEEEEE12get_null_mapEv Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_12Decimal128V3EEEE12get_null_mapEv Unexecuted instantiation: _ZNK5doris10vectorized11IDictionary14ColumnWithTypeINS0_15DataTypeDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEEE12get_null_mapEv |
151 | | }; |
152 | | |
153 | | // res_real_column : result column (get_column result) |
154 | | // res_null : if value is null, will set res_null to true |
155 | | // value_column : corresponding value column, non-nullable |
156 | | // value_null_column : corresponding value null map, if the original value is non-nullable, it will be nullptr |
157 | | // value_idx : index in the value column |
158 | | template <bool value_is_nullable, typename ResultColumnType> |
159 | | ALWAYS_INLINE static void set_value_data(ResultColumnType* res_real_column, UInt8& res_null, |
160 | | const auto* value_column, |
161 | | const ColumnUInt8* value_null_column, |
162 | 55.8k | const size_t& value_idx) { |
163 | 55.8k | if constexpr (value_is_nullable) { |
164 | | // if the value is null, set the result column to null |
165 | 19 | if (value_null_column->get_element(value_idx)) { |
166 | 9 | res_null = true; |
167 | 9 | res_real_column->insert_default(); |
168 | 9 | return; |
169 | 9 | } |
170 | 19 | } |
171 | 55.8k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { |
172 | | // If it is a string column, use get_data_at to avoid copying |
173 | 4.31k | StringRef str_ref = value_column->get_data_at(value_idx); |
174 | 4.31k | res_real_column->insert_data(str_ref.data, str_ref.size); |
175 | 51.5k | } else { |
176 | 51.5k | res_real_column->insert_value(value_column->get_element(value_idx)); |
177 | 51.5k | } |
178 | 10 | } _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIhEES4_EEvPT0_RhPKT1_PKS4_RKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIhEES4_EEvPT0_RhPKT1_PKS4_RKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIaEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIaEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIsEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIsEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIiEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.30k | const size_t& value_idx) { | 163 | 4.30k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.30k | if (value_null_column->get_element(value_idx)) { | 166 | 4.30k | res_null = true; | 167 | 4.30k | res_real_column->insert_default(); | 168 | 4.30k | return; | 169 | 4.30k | } | 170 | 4.30k | } | 171 | 4.30k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.30k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.30k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.30k | } else { | 176 | 4.30k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.30k | } | 178 | 4.30k | } |
_ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIiEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 19 | const size_t& value_idx) { | 163 | 19 | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 19 | if (value_null_column->get_element(value_idx)) { | 166 | 9 | res_null = true; | 167 | 9 | res_real_column->insert_default(); | 168 | 9 | return; | 169 | 9 | } | 170 | 19 | } | 171 | 10 | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 10 | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 10 | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 10 | } else { | 176 | 10 | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 10 | } | 178 | 10 | } |
_ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIlEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.32k | const size_t& value_idx) { | 163 | 4.32k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.32k | if (value_null_column->get_element(value_idx)) { | 166 | 4.32k | res_null = true; | 167 | 4.32k | res_real_column->insert_default(); | 168 | 4.32k | return; | 169 | 4.32k | } | 170 | 4.32k | } | 171 | 4.32k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.32k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.32k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.32k | } else { | 176 | 4.32k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.32k | } | 178 | 4.32k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIlEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorInEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorInEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIfEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIfEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIdEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIdEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIjEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 8.58k | const size_t& value_idx) { | 163 | 8.58k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 8.58k | if (value_null_column->get_element(value_idx)) { | 166 | 8.58k | res_null = true; | 167 | 8.58k | res_real_column->insert_default(); | 168 | 8.58k | return; | 169 | 8.58k | } | 170 | 8.58k | } | 171 | 8.58k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 8.58k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 8.58k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 8.58k | } else { | 176 | 8.58k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 8.58k | } | 178 | 8.58k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIjEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorIoEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorIoEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_9ColumnStrIjEES4_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Line | Count | Source | 162 | 2.86k | const size_t& value_idx) { | 163 | 2.86k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 2.86k | if (value_null_column->get_element(value_idx)) { | 166 | 2.86k | res_null = true; | 167 | 2.86k | res_real_column->insert_default(); | 168 | 2.86k | return; | 169 | 2.86k | } | 170 | 2.86k | } | 171 | 2.86k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 2.86k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 2.86k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 2.86k | } else { | 176 | 2.86k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 2.86k | } | 178 | 2.86k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_9ColumnStrIjEES4_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_9ColumnStrIjEENS3_ImEEEEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Line | Count | Source | 162 | 1.44k | const size_t& value_idx) { | 163 | 1.44k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 1.44k | if (value_null_column->get_element(value_idx)) { | 166 | 1.44k | res_null = true; | 167 | 1.44k | res_real_column->insert_default(); | 168 | 1.44k | return; | 169 | 1.44k | } | 170 | 1.44k | } | 171 | 1.44k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 1.44k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 1.44k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 1.44k | } else { | 176 | 1.44k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 1.44k | } | 178 | 1.44k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_9ColumnStrIjEENS3_ImEEEEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_12ColumnVectorImEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Line | Count | Source | 162 | 4.29k | const size_t& value_idx) { | 163 | 4.29k | if constexpr (value_is_nullable) { | 164 | | // if the value is null, set the result column to null | 165 | 4.29k | if (value_null_column->get_element(value_idx)) { | 166 | 4.29k | res_null = true; | 167 | 4.29k | res_real_column->insert_default(); | 168 | 4.29k | return; | 169 | 4.29k | } | 170 | 4.29k | } | 171 | 4.29k | if constexpr (std::is_same_v<ResultColumnType, ColumnString>) { | 172 | | // If it is a string column, use get_data_at to avoid copying | 173 | 4.29k | StringRef str_ref = value_column->get_data_at(value_idx); | 174 | 4.29k | res_real_column->insert_data(str_ref.data, str_ref.size); | 175 | 4.29k | } else { | 176 | 4.29k | res_real_column->insert_value(value_column->get_element(value_idx)); | 177 | 4.29k | } | 178 | 4.29k | } |
Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_12ColumnVectorImEES4_EEvPT0_RhPKT1_PKNS3_IhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIiEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIiEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIlEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIlEEEES6_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_12Decimal128V3EEES5_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_12Decimal128V3EEES5_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb0ENS0_13ColumnDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEES9_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm Unexecuted instantiation: _ZN5doris10vectorized11IDictionary14set_value_dataILb1ENS0_13ColumnDecimalINS0_7DecimalIN4wide7integerILm256EiEEEEEES9_EEvPT0_RhPKT1_PKNS0_12ColumnVectorIhEERKm |
179 | | |
180 | | /// TODO: Add support for more data types ,such as Array, Map, etc. |
181 | | using ValueData = |
182 | | std::variant<ColumnWithType<DataTypeUInt8>, ColumnWithType<DataTypeInt8>, |
183 | | ColumnWithType<DataTypeInt16>, ColumnWithType<DataTypeInt32>, |
184 | | ColumnWithType<DataTypeInt64>, ColumnWithType<DataTypeInt128>, |
185 | | |
186 | | ColumnWithType<DataTypeFloat32>, ColumnWithType<DataTypeFloat64>, |
187 | | |
188 | | ColumnWithType<DataTypeIPv4>, ColumnWithType<DataTypeIPv6>, |
189 | | |
190 | | ColumnWithType<DataTypeString>, ColumnWithType<DictDataTypeString64>, |
191 | | |
192 | | ColumnWithType<DataTypeDateV2>, ColumnWithType<DataTypeDateTimeV2>, |
193 | | |
194 | | ColumnWithType<DataTypeDecimal<Decimal32>>, |
195 | | ColumnWithType<DataTypeDecimal<Decimal64>>, |
196 | | ColumnWithType<DataTypeDecimal<Decimal128V3>>, |
197 | | ColumnWithType<DataTypeDecimal<Decimal256>>>; |
198 | | |
199 | | void load_values(const std::vector<ColumnPtr>& values_column); |
200 | | |
201 | | // _value_data is used to store the data of value columns. |
202 | | std::vector<ValueData> _values_data; |
203 | | std::string _dict_name; |
204 | | std::vector<DictionaryAttribute> _attributes; |
205 | | // A mapping from attribute names to their corresponding indices. |
206 | | std::unordered_map<std::string, size_t> _name_to_attributes_index; |
207 | | |
208 | | // mem_tracker comes from DictionaryFactory. If _mem_tracker is nullptr, it means it is in UT. |
209 | | std::shared_ptr<MemTrackerLimiter> _mem_tracker; |
210 | | }; |
211 | | |
212 | | using DictionaryPtr = std::shared_ptr<IDictionary>; |
213 | | |
214 | | } // namespace doris::vectorized |