Coverage Report

Created: 2026-06-02 16:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format/jni/jni_reader.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <map>
22
#include <memory>
23
#include <string>
24
#include <unordered_map>
25
#include <unordered_set>
26
#include <vector>
27
28
#include "common/status.h"
29
#include "format/generic_reader.h"
30
#include "format/jni/jni_data_bridge.h"
31
#include "runtime/runtime_profile.h"
32
#include "util/jni-util.h"
33
#include "util/profile_collector.h"
34
#include "util/string_util.h"
35
36
namespace doris {
37
class RuntimeProfile;
38
class RuntimeState;
39
class SlotDescriptor;
40
class Block;
41
} // namespace doris
42
43
namespace doris {
44
45
/**
46
 * JniReader is the base class for all JNI-based readers. It directly manages
47
 * the JNI lifecycle (open/read/close) for Java scanners that extend
48
 * org.apache.doris.common.jni.JniScanner.
49
 *
50
 * Subclasses only need to:
51
 * 1. Build scanner_params/column_names in their constructor
52
 * 2. Pass them to JniReader's constructor
53
 * 3. Call open() in their init_reader()
54
 *
55
 * This class replaces the old JniConnector intermediary.
56
 */
57
class JniReader : public GenericReader {
58
public:
59
    /**
60
     * Constructor for scan mode.
61
     * @param file_slot_descs  Slot descriptors for the output columns
62
     * @param state            Runtime state
63
     * @param profile          Runtime profile for metrics
64
     * @param connector_class  Java scanner class path (e.g. "org/apache/doris/paimon/PaimonJniScanner")
65
     * @param scanner_params   Configuration map passed to Java scanner constructor
66
     * @param column_names     Fields to read (also the required_fields in scanner_params)
67
     * @param self_split_weight  Weight for this split (for profile conditition counter)
68
     */
69
    JniReader(const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* state,
70
              RuntimeProfile* profile, std::string connector_class,
71
              std::map<std::string, std::string> scanner_params,
72
              std::vector<std::string> column_names, int64_t self_split_weight = -1);
73
74
    /**
75
     * Constructor for table-schema-only mode (no data reading).
76
     * @param connector_class  Java scanner class path
77
     * @param scanner_params   Configuration map passed to Java scanner constructor
78
     */
79
    JniReader(std::string connector_class, std::map<std::string, std::string> scanner_params);
80
81
5.53k
    ~JniReader() override = default;
82
83
    /**
84
     * Open the java scanner: set up profile counters, create Java object,
85
     * get method IDs, and call JniScanner#open.
86
     */
87
    Status open(RuntimeState* state, RuntimeProfile* profile);
88
89
5.15k
    Status _get_columns_impl(std::unordered_map<std::string, DataTypePtr>* name_to_type) override {
90
27.0k
        for (const auto& desc : _file_slot_descs) {
91
27.0k
            name_to_type->emplace(desc->col_name(), desc->type());
92
27.0k
        }
93
5.15k
        return Status::OK();
94
5.15k
    }
95
96
    void set_batch_size(size_t batch_size) override;
97
0
    size_t get_batch_size() const override { return _batch_size; }
98
99
    /**
100
     * Read next batch from Java scanner and fill the block.
101
     */
102
    Status _do_get_next_block(Block* block, size_t* read_rows, bool* eof) override;
103
104
    /**
105
     * Get table schema from Java scanner (used by Avro schema discovery).
106
     */
107
    Status get_table_schema(std::string& table_schema_str);
108
109
    /**
110
     * Close the scanner and release JNI resources.
111
     */
112
    Status close() override;
113
114
    /**
115
     * Set column name to block index map from FileScanner to avoid repeated map creation.
116
     */
117
    void set_col_name_to_block_idx(
118
5.21k
            const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx) {
119
5.21k
        _col_name_to_block_idx = col_name_to_block_idx;
120
5.21k
    }
121
122
protected:
123
    Status on_before_init_reader(ReaderInitContext* ctx) override;
124
    Status on_after_read_block(Block* block, size_t* read_rows) override;
125
    void _collect_profile_before_close() override;
126
127
    /**
128
     * Update scanner params and column names after construction.
129
     * Used by Avro which builds params in init_reader/init_schema_reader
130
     * rather than in the constructor.
131
     */
132
    void _update_scanner_params(std::map<std::string, std::string> params,
133
0
                                std::vector<std::string> column_names) {
134
0
        _scanner_params = std::move(params);
135
0
        _column_names = std::move(column_names);
136
0
    }
137
138
    const std::vector<SlotDescriptor*>& _file_slot_descs;
139
    RuntimeState* _state = nullptr;
140
    RuntimeProfile* _profile = nullptr;
141
142
private:
143
    static const std::vector<SlotDescriptor*> _s_empty_slot_descs;
144
145
    Status _fill_partition_columns(Block* block, size_t num_rows);
146
    Status _init_jni_scanner(JNIEnv* env, int batch_size);
147
    Status _fill_block(Block* block, size_t num_rows);
148
    Status _get_statistics(JNIEnv* env, std::map<std::string, std::string>* result);
149
150
    std::string _connector_name;
151
    std::string _connector_class;
152
    std::map<std::string, std::string> _scanner_params;
153
    std::vector<std::string> _column_names;
154
    int32_t _self_split_weight = -1;
155
    bool _is_table_schema = false;
156
157
    RuntimeProfile::Counter* _open_scanner_time = nullptr;
158
    RuntimeProfile::Counter* _java_scan_time = nullptr;
159
    RuntimeProfile::Counter* _java_append_data_time = nullptr;
160
    RuntimeProfile::Counter* _java_create_vector_table_time = nullptr;
161
    RuntimeProfile::Counter* _fill_block_time = nullptr;
162
    RuntimeProfile::ConditionCounter* _max_time_split_weight_counter = nullptr;
163
164
    int64_t _jni_scanner_open_watcher = 0;
165
    int64_t _java_scan_watcher = 0;
166
    int64_t _fill_block_watcher = 0;
167
168
    size_t _has_read = 0;
169
170
    bool _closed = false;
171
    bool _scanner_opened = false;
172
173
    Jni::GlobalClass _jni_scanner_cls;
174
    Jni::GlobalObject _jni_scanner_obj;
175
    Jni::MethodId _jni_scanner_open;
176
    Jni::MethodId _jni_scanner_get_append_data_time;
177
    Jni::MethodId _jni_scanner_get_create_vector_table_time;
178
    Jni::MethodId _jni_scanner_get_next_batch;
179
    Jni::MethodId _jni_scanner_get_table_schema;
180
    Jni::MethodId _jni_scanner_close;
181
    Jni::MethodId _jni_scanner_release_column;
182
    Jni::MethodId _jni_scanner_release_table;
183
    Jni::MethodId _jni_scanner_get_statistics;
184
    Jni::MethodId _jni_scanner_set_batch_size;
185
186
    JniDataBridge::TableMetaAddress _table_meta;
187
    size_t _batch_size = 0;
188
189
    // Column name to block index map, passed from FileScanner to avoid repeated map creation
190
    const std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr;
191
    std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
192
            _partition_values;
193
    std::unordered_map<std::string, bool> _partition_value_is_null;
194
195
6.34k
    void _set_meta(long meta_addr) { _table_meta.set_meta(meta_addr); }
196
};
197
198
/**
199
 * The demo usage of JniReader, showing how to read data from java scanner.
200
 * The java side is also a mock reader that provide values for each type.
201
 * This class will only be retained during the functional testing phase to verify that
202
 * the communication and data exchange with the jvm are correct.
203
 */
204
class MockJniReader : public JniReader {
205
public:
206
    MockJniReader(const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* state,
207
                  RuntimeProfile* profile);
208
209
    ~MockJniReader() override = default;
210
211
    Status init_reader();
212
};
213
214
} // namespace doris