Coverage Report

Created: 2026-05-09 12:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/parquet/parquet_predicate.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/parquet_types.h>
21
22
#include <cmath>
23
#include <cstring>
24
#include <vector>
25
26
#include "cctz/time_zone.h"
27
#include "core/data_type/data_type_decimal.h"
28
#include "core/data_type/primitive_type.h"
29
#include "exec/common/endian.h"
30
#include "format/format_common.h"
31
#include "format/parquet/parquet_block_split_bloom_filter.h"
32
#include "format/parquet/parquet_column_convert.h"
33
#include "format/parquet/parquet_common.h"
34
#include "format/parquet/schema_desc.h"
35
#include "storage/olap_scan_common.h"
36
#include "storage/segment/row_ranges.h"
37
#include "util/timezone_utils.h"
38
39
namespace doris {
40
class ParquetPredicate {
41
private:
42
9.81k
    static inline bool _is_ascii(uint8_t byte) { return byte < 128; }
43
44
849
    static int _common_prefix(const std::string& encoding_min, const std::string& encoding_max) {
45
849
        size_t min_length = std::min(encoding_min.size(), encoding_max.size());
46
849
        int common_length = 0;
47
2.11k
        while (common_length < min_length &&
48
2.11k
               encoding_min[common_length] == encoding_max[common_length]) {
49
1.26k
            common_length++;
50
1.26k
        }
51
849
        return common_length;
52
849
    }
53
54
1.23k
    static bool _try_read_old_utf8_stats(std::string& encoding_min, std::string& encoding_max) {
55
1.23k
        if (encoding_min == encoding_max) {
56
            // If min = max, then there is a single value only
57
            // No need to modify, just use min
58
382
            encoding_max = encoding_min;
59
382
            return true;
60
850
        } else {
61
850
            int common_prefix_length = _common_prefix(encoding_min, encoding_max);
62
63
            // For min we can retain all-ASCII, because this produces a strictly lower value.
64
850
            int min_good_length = common_prefix_length;
65
8.30k
            while (min_good_length < encoding_min.size() &&
66
8.30k
                   _is_ascii(static_cast<uint8_t>(encoding_min[min_good_length]))) {
67
7.45k
                min_good_length++;
68
7.45k
            }
69
70
            // For max we can be sure only of the part matching the min. When they differ, we can consider only one next, and only if both are ASCII
71
850
            int max_good_length = common_prefix_length;
72
850
            if (max_good_length < encoding_max.size() && max_good_length < encoding_min.size() &&
73
850
                _is_ascii(static_cast<uint8_t>(encoding_min[max_good_length])) &&
74
850
                _is_ascii(static_cast<uint8_t>(encoding_max[max_good_length]))) {
75
761
                max_good_length++;
76
761
            }
77
            // Incrementing 127 would overflow. Incrementing within non-ASCII can have side-effects.
78
855
            while (max_good_length > 0 &&
79
855
                   (static_cast<uint8_t>(encoding_max[max_good_length - 1]) == 127 ||
80
808
                    !_is_ascii(static_cast<uint8_t>(encoding_max[max_good_length - 1])))) {
81
5
                max_good_length--;
82
5
            }
83
850
            if (max_good_length == 0) {
84
                // We can return just min bound, but code downstream likely expects both are present or both are absent.
85
46
                return false;
86
46
            }
87
88
804
            encoding_min.resize(min_good_length);
89
804
            encoding_max.resize(max_good_length);
90
804
            if (max_good_length > 0) {
91
801
                encoding_max[max_good_length - 1]++;
92
801
            }
93
804
            return true;
94
850
        }
95
1.23k
    }
96
97
134
    static SortOrder _determine_sort_order(const tparquet::SchemaElement& parquet_schema) {
98
134
        tparquet::Type::type physical_type = parquet_schema.type;
99
134
        const tparquet::LogicalType& logical_type = parquet_schema.logicalType;
100
101
        // Assume string type is SortOrder::SIGNED, use ParquetPredicate::_try_read_old_utf8_stats() to handle it.
102
134
        if (logical_type.__isset.STRING &&
103
134
            (physical_type == tparquet::Type::BYTE_ARRAY ||
104
0
             physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY)) {
105
0
            return SortOrder::SIGNED;
106
0
        }
107
108
134
        if (logical_type.__isset.INTEGER) {
109
0
            if (logical_type.INTEGER.isSigned) {
110
0
                return SortOrder::SIGNED;
111
0
            } else {
112
0
                return SortOrder::UNSIGNED;
113
0
            }
114
134
        } else if (logical_type.__isset.DATE) {
115
0
            return SortOrder::SIGNED;
116
134
        } else if (logical_type.__isset.ENUM) {
117
0
            return SortOrder::UNSIGNED;
118
134
        } else if (logical_type.__isset.BSON) {
119
0
            return SortOrder::UNSIGNED;
120
134
        } else if (logical_type.__isset.JSON) {
121
0
            return SortOrder::UNSIGNED;
122
134
        } else if (logical_type.__isset.STRING) {
123
0
            return SortOrder::UNSIGNED;
124
134
        } else if (logical_type.__isset.DECIMAL) {
125
0
            return SortOrder::UNKNOWN;
126
134
        } else if (logical_type.__isset.MAP) {
127
0
            return SortOrder::UNKNOWN;
128
134
        } else if (logical_type.__isset.LIST) {
129
0
            return SortOrder::UNKNOWN;
130
134
        } else if (logical_type.__isset.TIME) {
131
0
            return SortOrder::SIGNED;
132
134
        } else if (logical_type.__isset.TIMESTAMP) {
133
0
            return SortOrder::SIGNED;
134
134
        } else if (logical_type.__isset.UNKNOWN) {
135
0
            return SortOrder::UNKNOWN;
136
134
        } else {
137
134
            switch (physical_type) {
138
0
            case tparquet::Type::BOOLEAN:
139
126
            case tparquet::Type::INT32:
140
126
            case tparquet::Type::INT64:
141
128
            case tparquet::Type::FLOAT:
142
130
            case tparquet::Type::DOUBLE:
143
130
                return SortOrder::SIGNED;
144
2
            case tparquet::Type::BYTE_ARRAY:
145
4
            case tparquet::Type::FIXED_LEN_BYTE_ARRAY:
146
4
                return SortOrder::UNSIGNED;
147
0
            case tparquet::Type::INT96:
148
0
                return SortOrder::UNKNOWN;
149
0
            default:
150
0
                return SortOrder::UNKNOWN;
151
134
            }
152
134
        }
153
134
    }
154
155
public:
156
    static constexpr int BLOOM_FILTER_MAX_HEADER_LENGTH = 64;
157
    struct ColumnStat {
158
        std::string encoded_min_value;
159
        std::string encoded_max_value;
160
        bool has_null;
161
        bool is_all_null;
162
        const FieldSchema* col_schema;
163
        const cctz::time_zone* ctz;
164
        std::unique_ptr<ParquetBlockSplitBloomFilter> bloom_filter;
165
        std::function<bool(ParquetPredicate::ColumnStat*, const int)>* get_stat_func = nullptr;
166
        std::function<bool(ParquetPredicate::ColumnStat*, const int)>* get_bloom_filter_func =
167
                nullptr;
168
    };
169
170
14
    static bool bloom_filter_supported(PrimitiveType type) {
171
        // Only support types where physical type == logical type (no conversion needed)
172
        // For types like DATEV2, DATETIMEV2, DECIMAL, Parquet stores them in physical format
173
        // (INT32, INT64, etc.) but Doris uses different internal representations.
174
        // Bloom filter works with physical bytes, but we only have logical type values,
175
        // and there's no reverse conversion (logical -> physical) available.
176
        // TINYINT/SMALLINT also need conversion via LittleIntPhysicalConverter.
177
14
        switch (type) {
178
0
        case TYPE_BOOLEAN:
179
5
        case TYPE_INT:
180
6
        case TYPE_BIGINT:
181
7
        case TYPE_FLOAT:
182
8
        case TYPE_DOUBLE:
183
8
        case TYPE_CHAR:
184
8
        case TYPE_VARCHAR:
185
9
        case TYPE_STRING:
186
9
            return true;
187
5
        default:
188
5
            return false;
189
14
        }
190
14
    }
191
192
    struct PageIndexStat {
193
        // Indicates whether the page index information in this column can be used.
194
        bool available = false;
195
        int64_t num_of_pages;
196
        std::vector<std::string> encoded_min_value;
197
        std::vector<std::string> encoded_max_value;
198
        std::vector<bool> has_null;
199
        std::vector<bool> is_all_null;
200
        const FieldSchema* col_schema;
201
202
        // Record the row range corresponding to each page.
203
        std::vector<segment_v2::RowRange> ranges;
204
    };
205
206
    struct CachedPageIndexStat {
207
        const cctz::time_zone* ctz;
208
        std::map<int, PageIndexStat> stats;
209
        std::function<bool(PageIndexStat**, int)> get_stat_func;
210
        RowRange row_group_range;
211
    };
212
213
    // The encoded Parquet min-max value is parsed into `fields`;
214
    // Can be used in row groups and page index statistics.
215
    static Status parse_min_max_value(const FieldSchema* col_schema, const std::string& encoded_min,
216
                                      const std::string& encoded_max, const cctz::time_zone& ctz,
217
23.2k
                                      Field* min_field, Field* max_field) {
218
23.2k
        auto logical_data_type = remove_nullable(col_schema->data_type);
219
23.2k
        auto converter = parquet::PhysicalToLogicalConverter::get_converter(
220
23.2k
                col_schema, logical_data_type, logical_data_type, &ctz);
221
23.2k
        ColumnPtr physical_column;
222
23.2k
        switch (col_schema->parquet_schema.type) {
223
344
        case tparquet::Type::type::BOOLEAN: {
224
344
            auto physical_col = ColumnUInt8::create();
225
344
            physical_col->get_data().data();
226
344
            physical_col->resize(2);
227
344
            physical_col->get_data()[0] = *reinterpret_cast<const bool*>(encoded_min.data());
228
344
            physical_col->get_data()[1] = *reinterpret_cast<const bool*>(encoded_max.data());
229
344
            physical_column = std::move(physical_col);
230
344
            break;
231
0
        }
232
17.3k
        case tparquet::Type::type::INT32: {
233
17.3k
            auto physical_col = ColumnInt32::create();
234
17.3k
            physical_col->resize(2);
235
236
17.3k
            physical_col->get_data()[0] = *reinterpret_cast<const int32_t*>(encoded_min.data());
237
17.3k
            physical_col->get_data()[1] = *reinterpret_cast<const int32_t*>(encoded_max.data());
238
239
17.3k
            physical_column = std::move(physical_col);
240
17.3k
            break;
241
0
        }
242
2.37k
        case tparquet::Type::type::INT64: {
243
2.37k
            auto physical_col = ColumnInt64::create();
244
2.37k
            physical_col->resize(2);
245
2.37k
            physical_col->get_data()[0] = *reinterpret_cast<const int64_t*>(encoded_min.data());
246
2.37k
            physical_col->get_data()[1] = *reinterpret_cast<const int64_t*>(encoded_max.data());
247
2.37k
            physical_column = std::move(physical_col);
248
2.37k
            break;
249
0
        }
250
212
        case tparquet::Type::type::FLOAT: {
251
212
            auto physical_col = ColumnFloat32::create();
252
212
            physical_col->resize(2);
253
212
            physical_col->get_data()[0] = *reinterpret_cast<const float*>(encoded_min.data());
254
212
            physical_col->get_data()[1] = *reinterpret_cast<const float*>(encoded_max.data());
255
212
            physical_column = std::move(physical_col);
256
212
            break;
257
0
        }
258
203
        case tparquet::Type::type::DOUBLE: {
259
203
            auto physical_col = ColumnFloat64 ::create();
260
203
            physical_col->resize(2);
261
203
            physical_col->get_data()[0] = *reinterpret_cast<const double*>(encoded_min.data());
262
203
            physical_col->get_data()[1] = *reinterpret_cast<const double*>(encoded_max.data());
263
203
            physical_column = std::move(physical_col);
264
203
            break;
265
0
        }
266
2.59k
        case tparquet::Type::type::BYTE_ARRAY: {
267
2.59k
            auto physical_col = ColumnString::create();
268
2.59k
            physical_col->insert_data(encoded_min.data(), encoded_min.size());
269
2.59k
            physical_col->insert_data(encoded_max.data(), encoded_max.size());
270
2.59k
            physical_column = std::move(physical_col);
271
2.59k
            break;
272
0
        }
273
160
        case tparquet::Type::type::FIXED_LEN_BYTE_ARRAY: {
274
160
            auto physical_col = ColumnUInt8::create();
275
160
            physical_col->resize(2 * col_schema->parquet_schema.type_length);
276
160
            DCHECK(col_schema->parquet_schema.type_length == encoded_min.length());
277
160
            DCHECK(col_schema->parquet_schema.type_length == encoded_max.length());
278
279
160
            auto ptr = physical_col->get_data().data();
280
160
            memcpy(ptr, encoded_min.data(), encoded_min.length());
281
160
            memcpy(ptr + encoded_min.length(), encoded_max.data(), encoded_max.length());
282
160
            physical_column = std::move(physical_col);
283
160
            break;
284
0
        }
285
0
        case tparquet::Type::type::INT96: {
286
0
            auto physical_col = ColumnInt8::create();
287
0
            physical_col->resize(2 * sizeof(ParquetInt96));
288
0
            DCHECK(sizeof(ParquetInt96) == encoded_min.length());
289
0
            DCHECK(sizeof(ParquetInt96) == encoded_max.length());
290
291
0
            auto ptr = physical_col->get_data().data();
292
0
            memcpy(ptr, encoded_min.data(), encoded_min.length());
293
0
            memcpy(ptr + encoded_min.length(), encoded_max.data(), encoded_max.length());
294
0
            physical_column = std::move(physical_col);
295
0
            break;
296
0
        }
297
23.2k
        }
298
299
23.2k
        ColumnPtr logical_column;
300
23.2k
        if (converter->is_consistent()) {
301
20.1k
            logical_column = physical_column;
302
20.1k
        } else {
303
3.05k
            logical_column = logical_data_type->create_column();
304
3.05k
            RETURN_IF_ERROR(converter->physical_convert(physical_column, logical_column));
305
3.05k
        }
306
307
23.2k
        DCHECK(logical_column->size() == 2);
308
23.2k
        *min_field = logical_column->operator[](0);
309
23.2k
        *max_field = logical_column->operator[](1);
310
311
23.2k
        auto logical_prim_type = logical_data_type->get_primitive_type();
312
313
23.2k
        if (logical_prim_type == TYPE_FLOAT) {
314
210
            auto& min_value = min_field->get<TYPE_FLOAT>();
315
210
            auto& max_value = max_field->get<TYPE_FLOAT>();
316
317
210
            if (std::isnan(min_value) || std::isnan(max_value)) {
318
1
                return Status::DataQualityError("Can not use this parquet min/max value.");
319
1
            }
320
            // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values would be skipped
321
209
            if (std::signbit(min_value) == 0 && min_value == 0.0F) {
322
0
                min_value = -0.0F;
323
0
            }
324
209
            if (std::signbit(max_value) != 0 && max_value == -0.0F) {
325
0
                max_value = 0.0F;
326
0
            }
327
23.0k
        } else if (logical_prim_type == TYPE_DOUBLE) {
328
203
            auto& min_value = min_field->get<TYPE_DOUBLE>();
329
203
            auto& max_value = max_field->get<TYPE_DOUBLE>();
330
331
203
            if (std::isnan(min_value) || std::isnan(max_value)) {
332
0
                return Status::DataQualityError("Can not use this parquet min/max value.");
333
0
            }
334
            // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values would be skipped
335
203
            if (std::signbit(min_value) == 0 && min_value == 0.0F) {
336
0
                min_value = -0.0F;
337
0
            }
338
203
            if (std::signbit(max_value) != 0 && max_value == -0.0F) {
339
0
                max_value = 0.0F;
340
0
            }
341
22.8k
        } else if (col_schema->parquet_schema.type == tparquet::Type::type::INT96 ||
342
22.8k
                   logical_prim_type == TYPE_DATETIMEV2) {
343
818
            auto min_value = min_field->get<TYPE_DATETIMEV2>();
344
818
            auto max_value = min_field->get<TYPE_DATETIMEV2>();
345
346
            // From Trino: Parquet INT96 timestamp values were compared incorrectly
347
            // for the purposes of producing statistics by older parquet writers,
348
            // so PARQUET-1065 deprecated them. The result is that any writer that produced stats
349
            // was producing unusable incorrect values, except the special case where min == max
350
            // and an incorrect ordering would not be material to the result.
351
            // PARQUET-1026 made binary stats available and valid in that special case.
352
818
            if (min_value != max_value) {
353
0
                return Status::DataQualityError("invalid min/max value");
354
0
            }
355
818
        }
356
357
23.2k
        return Status::OK();
358
23.2k
    }
359
360
    static Status read_column_stats(const FieldSchema* col_schema,
361
                                    const tparquet::ColumnMetaData& column_meta_data,
362
                                    std::unordered_map<tparquet::Type::type, bool>* ignored_stats,
363
7.34k
                                    const std::string& file_created_by, ColumnStat* ans_stat) {
364
7.34k
        auto& statistic = column_meta_data.statistics;
365
366
7.34k
        if (!statistic.__isset.null_count) [[unlikely]] {
367
2
            return Status::DataQualityError("This parquet Column meta no set null_count.");
368
2
        }
369
7.34k
        ans_stat->has_null = statistic.null_count > 0;
370
7.34k
        ans_stat->is_all_null = statistic.null_count == column_meta_data.num_values;
371
7.34k
        if (ans_stat->is_all_null) {
372
180
            return Status::OK();
373
180
        }
374
7.16k
        auto prim_type = remove_nullable(col_schema->data_type)->get_primitive_type();
375
376
        // Min-max of statistic is plain-encoded value
377
7.16k
        if (statistic.__isset.min_value && statistic.__isset.max_value) {
378
7.03k
            ColumnOrderName column_order =
379
7.03k
                    col_schema->physical_type == tparquet::Type::INT96 ||
380
7.03k
                                    col_schema->parquet_schema.logicalType.__isset.UNKNOWN
381
7.03k
                            ? ColumnOrderName::UNDEFINED
382
7.03k
                            : ColumnOrderName::TYPE_DEFINED_ORDER;
383
7.03k
            if ((statistic.min_value != statistic.max_value) &&
384
7.03k
                (column_order != ColumnOrderName::TYPE_DEFINED_ORDER)) {
385
0
                return Status::DataQualityError("Can not use this parquet min/max value.");
386
0
            }
387
7.03k
            ans_stat->encoded_min_value = statistic.min_value;
388
7.03k
            ans_stat->encoded_max_value = statistic.max_value;
389
390
7.03k
            if (prim_type == TYPE_VARCHAR || prim_type == TYPE_CHAR || prim_type == TYPE_STRING) {
391
1.22k
                auto encoded_min_copy = ans_stat->encoded_min_value;
392
1.22k
                auto encoded_max_copy = ans_stat->encoded_max_value;
393
1.22k
                if (!_try_read_old_utf8_stats(encoded_min_copy, encoded_max_copy)) {
394
42
                    return Status::DataQualityError("Can not use this parquet min/max value.");
395
42
                }
396
1.17k
                ans_stat->encoded_min_value = encoded_min_copy;
397
1.17k
                ans_stat->encoded_max_value = encoded_max_copy;
398
1.17k
            }
399
400
7.03k
        } else if (statistic.__isset.min && statistic.__isset.max) {
401
134
            bool max_equals_min = statistic.min == statistic.max;
402
403
134
            SortOrder sort_order = _determine_sort_order(col_schema->parquet_schema);
404
134
            bool sort_orders_match = SortOrder::SIGNED == sort_order;
405
134
            if (!sort_orders_match && !max_equals_min) {
406
4
                return Status::NotSupported("Can not use this parquet min/max value.");
407
4
            }
408
409
130
            bool should_ignore_corrupted_stats = false;
410
130
            if (ignored_stats != nullptr) {
411
130
                if (ignored_stats->count(col_schema->physical_type) == 0) {
412
109
                    if (CorruptStatistics::should_ignore_statistics(file_created_by,
413
109
                                                                    col_schema->physical_type)) {
414
0
                        ignored_stats->emplace(col_schema->physical_type, true);
415
0
                        should_ignore_corrupted_stats = true;
416
109
                    } else {
417
109
                        ignored_stats->emplace(col_schema->physical_type, false);
418
109
                    }
419
109
                } else if (ignored_stats->at(col_schema->physical_type)) {
420
0
                    should_ignore_corrupted_stats = true;
421
0
                }
422
130
            } else if (CorruptStatistics::should_ignore_statistics(file_created_by,
423
0
                                                                   col_schema->physical_type)) {
424
0
                should_ignore_corrupted_stats = true;
425
0
            }
426
427
130
            if (should_ignore_corrupted_stats) {
428
0
                return Status::DataQualityError("Error statistics, should ignore.");
429
0
            }
430
431
130
            ans_stat->encoded_min_value = statistic.min;
432
130
            ans_stat->encoded_max_value = statistic.max;
433
18.4E
        } else {
434
18.4E
            return Status::DataQualityError("This parquet file not set min/max value");
435
18.4E
        }
436
437
7.12k
        return Status::OK();
438
7.16k
    }
439
440
    static Status read_bloom_filter(const tparquet::ColumnMetaData& column_meta_data,
441
                                    io::FileReaderSPtr file_reader, io::IOContext* io_ctx,
442
11
                                    ColumnStat* ans_stat) {
443
11
        size_t size;
444
11
        if (!column_meta_data.__isset.bloom_filter_offset) {
445
0
            return Status::NotSupported("Can not use this parquet bloom filter.");
446
0
        }
447
448
11
        if (column_meta_data.__isset.bloom_filter_length &&
449
11
            column_meta_data.bloom_filter_length > 0) {
450
11
            size = column_meta_data.bloom_filter_length;
451
11
        } else {
452
0
            size = BLOOM_FILTER_MAX_HEADER_LENGTH;
453
0
        }
454
11
        size_t bytes_read = 0;
455
11
        std::vector<uint8_t> header_buffer(size);
456
11
        RETURN_IF_ERROR(file_reader->read_at(column_meta_data.bloom_filter_offset,
457
11
                                             Slice(header_buffer.data(), size), &bytes_read,
458
11
                                             io_ctx));
459
460
11
        tparquet::BloomFilterHeader t_bloom_filter_header;
461
11
        uint32_t t_bloom_filter_header_size = static_cast<uint32_t>(bytes_read);
462
11
        RETURN_IF_ERROR(deserialize_thrift_msg(header_buffer.data(), &t_bloom_filter_header_size,
463
11
                                               true, &t_bloom_filter_header));
464
465
        // TODO the bloom filter could be encrypted, too, so need to double check that this is NOT the case
466
11
        if (!t_bloom_filter_header.algorithm.__isset.BLOCK ||
467
11
            !t_bloom_filter_header.compression.__isset.UNCOMPRESSED ||
468
11
            !t_bloom_filter_header.hash.__isset.XXHASH) {
469
0
            return Status::NotSupported("Can not use this parquet bloom filter.");
470
0
        }
471
472
11
        ans_stat->bloom_filter = std::make_unique<ParquetBlockSplitBloomFilter>();
473
474
11
        std::vector<uint8_t> data_buffer(t_bloom_filter_header.numBytes);
475
11
        RETURN_IF_ERROR(file_reader->read_at(
476
11
                column_meta_data.bloom_filter_offset + t_bloom_filter_header_size,
477
11
                Slice(data_buffer.data(), t_bloom_filter_header.numBytes), &bytes_read, io_ctx));
478
479
11
        RETURN_IF_ERROR(ans_stat->bloom_filter->init(
480
11
                reinterpret_cast<const char*>(data_buffer.data()), t_bloom_filter_header.numBytes,
481
11
                segment_v2::HashStrategyPB::XX_HASH_64));
482
483
11
        return Status::OK();
484
11
    }
485
};
486
487
} // namespace doris