be/src/format_v2/table/hive_reader.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "format_v2/table/hive_reader.h" |
19 | | |
20 | | #include <utility> |
21 | | |
22 | | #include "format_v2/column_mapper.h" |
23 | | #include "format_v2/file_reader.h" |
24 | | #include "runtime/runtime_state.h" |
25 | | |
26 | | namespace doris::hive { |
27 | | |
28 | 0 | Status HiveReader::init(format::TableReadOptions&& options) { |
29 | 0 | const bool allow_missing_columns = options.allow_missing_columns; |
30 | 0 | const format::FileFormat file_format = options.format; |
31 | 0 | RETURN_IF_ERROR(format::TableReader::init(std::move(options))); |
32 | | |
33 | | // Hive-specific behavior: choose the column matching mode based on file format and the |
34 | | // matching session variable. |
35 | | // - hive_orc_use_column_names / hive_parquet_use_column_names == true |
36 | | // => BY_NAME (modern Hive default, match by column name) |
37 | | // - those options == false |
38 | | // => BY_INDEX (mainly for Hive1 ORC `_col0` / `_col1`, match by top-level position; |
39 | | // Parquet exposes the same switch for consistency) |
40 | | // The base init path does not accept file-format-specific mapper configuration, so the mapper |
41 | | // must be replaced here after the base initialization completes. |
42 | 0 | DORIS_CHECK(_runtime_state != nullptr); |
43 | 0 | const auto& query_options = _runtime_state->query_options(); |
44 | 0 | bool use_column_names = true; |
45 | 0 | switch (file_format) { |
46 | 0 | case format::FileFormat::ORC: |
47 | 0 | use_column_names = query_options.hive_orc_use_column_names; |
48 | 0 | break; |
49 | 0 | case format::FileFormat::PARQUET: |
50 | 0 | use_column_names = query_options.hive_parquet_use_column_names; |
51 | 0 | break; |
52 | 0 | case format::FileFormat::CSV: |
53 | | // CSV does not really have a "column name vs position" choice. The format is inherently |
54 | | // positional, so BY_INDEX is the closest match to the original behavior. |
55 | 0 | use_column_names = false; |
56 | 0 | break; |
57 | 0 | } |
58 | | |
59 | 0 | _mode = use_column_names ? format::TableColumnMappingMode::BY_NAME |
60 | 0 | : format::TableColumnMappingMode::BY_INDEX; |
61 | 0 | _mapper_options.allow_missing_columns = allow_missing_columns; |
62 | 0 | return Status::OK(); |
63 | 0 | } |
64 | | |
65 | | } // namespace doris::hive |