be/src/format/table/paimon_reader.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <memory> |
21 | | #include <utility> |
22 | | #include <vector> |
23 | | |
24 | | #include "format/orc/vorc_reader.h" |
25 | | #include "format/parquet/vparquet_reader.h" |
26 | | #include "format/table/table_schema_change_helper.h" |
27 | | |
28 | | namespace doris { |
29 | | class ShardedKVCache; |
30 | | |
31 | | // PaimonOrcReader: directly inherits OrcReader (no composition wrapping). |
32 | | // Schema mapping in on_before_init_reader, deletion vector reading in on_after_init_reader. |
33 | | class PaimonOrcReader final : public OrcReader, public TableSchemaChangeHelper { |
34 | | public: |
35 | | ENABLE_FACTORY_CREATOR(PaimonOrcReader); |
36 | | PaimonOrcReader(RuntimeProfile* profile, RuntimeState* state, |
37 | | const TFileScanRangeParams& params, const TFileRangeDesc& range, |
38 | | size_t batch_size, const std::string& ctz, ShardedKVCache* kv_cache, |
39 | | io::IOContext* io_ctx, FileMetaCache* meta_cache = nullptr, |
40 | | bool enable_lazy_mat = true) |
41 | | : OrcReader(profile, state, params, range, batch_size, ctz, io_ctx, meta_cache, |
42 | | enable_lazy_mat), |
43 | 0 | _kv_cache(kv_cache) { |
44 | 0 | _init_paimon_profile(); |
45 | 0 | } |
46 | | PaimonOrcReader(RuntimeProfile* profile, RuntimeState* state, |
47 | | const TFileScanRangeParams& params, const TFileRangeDesc& range, |
48 | | size_t batch_size, const std::string& ctz, ShardedKVCache* kv_cache, |
49 | | std::shared_ptr<io::IOContext> io_ctx_holder, |
50 | | FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true) |
51 | 2.08k | : OrcReader(profile, state, params, range, batch_size, ctz, std::move(io_ctx_holder), |
52 | 2.08k | meta_cache, enable_lazy_mat), |
53 | 2.08k | _kv_cache(kv_cache) { |
54 | 2.08k | _init_paimon_profile(); |
55 | 2.08k | } |
56 | 2.08k | ~PaimonOrcReader() final = default; |
57 | | |
58 | | protected: |
59 | | Status on_before_init_reader(ReaderInitContext* ctx) override; |
60 | | |
61 | | Status on_after_init_reader(ReaderInitContext* /*ctx*/) override; |
62 | | |
63 | | private: |
64 | | void _init_paimon_profile(); |
65 | | Status _init_deletion_vector(); |
66 | | |
67 | | struct PaimonProfile { |
68 | | RuntimeProfile::Counter* num_delete_rows = nullptr; |
69 | | RuntimeProfile::Counter* delete_files_read_time = nullptr; |
70 | | RuntimeProfile::Counter* parse_deletion_vector_time = nullptr; |
71 | | }; |
72 | | |
73 | | const std::vector<int64_t>* _delete_rows = nullptr; |
74 | | ShardedKVCache* _kv_cache; |
75 | | PaimonProfile _paimon_profile; |
76 | | }; |
77 | | |
78 | | // PaimonParquetReader: directly inherits ParquetReader (no composition wrapping). |
79 | | class PaimonParquetReader final : public ParquetReader, public TableSchemaChangeHelper { |
80 | | public: |
81 | | ENABLE_FACTORY_CREATOR(PaimonParquetReader); |
82 | | PaimonParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params, |
83 | | const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz, |
84 | | ShardedKVCache* kv_cache, io::IOContext* io_ctx, RuntimeState* state, |
85 | | FileMetaCache* meta_cache = nullptr, bool enable_lazy_mat = true) |
86 | | : ParquetReader(profile, params, range, batch_size, ctz, io_ctx, state, meta_cache, |
87 | | enable_lazy_mat), |
88 | 0 | _kv_cache(kv_cache) { |
89 | 0 | _init_paimon_profile(); |
90 | 0 | } |
91 | | PaimonParquetReader(RuntimeProfile* profile, const TFileScanRangeParams& params, |
92 | | const TFileRangeDesc& range, size_t batch_size, const cctz::time_zone* ctz, |
93 | | ShardedKVCache* kv_cache, std::shared_ptr<io::IOContext> io_ctx_holder, |
94 | | RuntimeState* state, FileMetaCache* meta_cache = nullptr, |
95 | | bool enable_lazy_mat = true) |
96 | 2.68k | : ParquetReader(profile, params, range, batch_size, ctz, std::move(io_ctx_holder), |
97 | 2.68k | state, meta_cache, enable_lazy_mat), |
98 | 2.68k | _kv_cache(kv_cache) { |
99 | 2.68k | _init_paimon_profile(); |
100 | 2.68k | } |
101 | 2.68k | ~PaimonParquetReader() final = default; |
102 | | |
103 | | protected: |
104 | | Status on_before_init_reader(ReaderInitContext* ctx) override; |
105 | | |
106 | | Status on_after_init_reader(ReaderInitContext* /*ctx*/) override; |
107 | | |
108 | | private: |
109 | | void _init_paimon_profile(); |
110 | | Status _init_deletion_vector(); |
111 | | |
112 | | struct PaimonProfile { |
113 | | RuntimeProfile::Counter* num_delete_rows = nullptr; |
114 | | RuntimeProfile::Counter* delete_files_read_time = nullptr; |
115 | | RuntimeProfile::Counter* parse_deletion_vector_time = nullptr; |
116 | | }; |
117 | | |
118 | | const std::vector<int64_t>* _delete_rows = nullptr; |
119 | | ShardedKVCache* _kv_cache; |
120 | | PaimonProfile _paimon_profile; |
121 | | }; |
122 | | |
123 | | } // namespace doris |