Coverage Report

Created: 2026-06-09 14:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/format_v2/table/hive_reader.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "format_v2/table/hive_reader.h"
19
20
#include <utility>
21
22
#include "format_v2/column_mapper.h"
23
#include "format_v2/file_reader.h"
24
#include "runtime/runtime_state.h"
25
26
namespace doris::hive {
27
28
0
Status HiveReader::init(format::TableReadOptions&& options) {
29
0
    const bool allow_missing_columns = options.allow_missing_columns;
30
0
    const format::FileFormat file_format = options.format;
31
0
    RETURN_IF_ERROR(format::TableReader::init(std::move(options)));
32
33
    // Hive-specific behavior: choose the column matching mode based on file format and the
34
    // matching session variable.
35
    //   - hive_orc_use_column_names / hive_parquet_use_column_names == true
36
    //     => BY_NAME (modern Hive default, match by column name)
37
    //   - those options == false
38
    //     => BY_INDEX (mainly for Hive1 ORC `_col0` / `_col1`, match by top-level position;
39
    //                  Parquet exposes the same switch for consistency)
40
    // The base init path does not accept file-format-specific mapper configuration, so the mapper
41
    // must be replaced here after the base initialization completes.
42
0
    DORIS_CHECK(_runtime_state != nullptr);
43
0
    const auto& query_options = _runtime_state->query_options();
44
0
    bool use_column_names = true;
45
0
    switch (file_format) {
46
0
    case format::FileFormat::ORC:
47
0
        use_column_names = query_options.hive_orc_use_column_names;
48
0
        break;
49
0
    case format::FileFormat::PARQUET:
50
0
        use_column_names = query_options.hive_parquet_use_column_names;
51
0
        break;
52
0
    case format::FileFormat::CSV:
53
        // CSV does not really have a "column name vs position" choice. The format is inherently
54
        // positional, so BY_INDEX is the closest match to the original behavior.
55
0
        use_column_names = false;
56
0
        break;
57
0
    }
58
59
0
    _mode = use_column_names ? format::TableColumnMappingMode::BY_NAME
60
0
                             : format::TableColumnMappingMode::BY_INDEX;
61
0
    _mapper_options.allow_missing_columns = allow_missing_columns;
62
0
    return Status::OK();
63
0
}
64
65
} // namespace doris::hive