Coverage Report

Created: 2026-06-23 16:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/util/load_util.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/load_util.h"
19
20
#include <string>
21
22
#include "util/string_util.h"
23
24
namespace doris {
25
void LoadUtil::parse_format(const std::string& format_str, const std::string& compress_type_str,
26
                            TFileFormatType::type* format_type,
27
5.87k
                            TFileCompressType::type* compress_type) {
28
5.87k
    if (format_str.empty()) {
29
1.39k
        parse_format("CSV", compress_type_str, format_type, compress_type);
30
1.39k
        return;
31
1.39k
    }
32
33
    // Assign compress_type
34
4.48k
    if (iequal(compress_type_str, "GZ")) {
35
155
        *compress_type = TFileCompressType::GZ;
36
4.32k
    } else if (iequal(compress_type_str, "LZO")) {
37
2
        *compress_type = TFileCompressType::LZO;
38
4.32k
    } else if (iequal(compress_type_str, "BZ2")) {
39
8
        *compress_type = TFileCompressType::BZ2;
40
4.31k
    } else if (iequal(compress_type_str, "ZSTD")) {
41
6
        *compress_type = TFileCompressType::ZSTD;
42
4.30k
    } else if (iequal(compress_type_str, "LZ4") || iequal(compress_type_str, "LZ4FRAME")) {
43
10
        *compress_type = TFileCompressType::LZ4FRAME;
44
4.29k
    } else if (iequal(compress_type_str, "LZ4_BLOCK")) {
45
1
        *compress_type = TFileCompressType::LZ4BLOCK;
46
4.29k
    } else if (iequal(compress_type_str, "LZOP")) {
47
5
        *compress_type = TFileCompressType::LZO;
48
4.29k
    } else if (iequal(compress_type_str, "SNAPPY_BLOCK")) {
49
1
        *compress_type = TFileCompressType::SNAPPYBLOCK;
50
4.29k
    } else if (iequal(compress_type_str, "DEFLATE")) {
51
3
        *compress_type = TFileCompressType::DEFLATE;
52
4.28k
    } else {
53
4.28k
        *compress_type = TFileCompressType::PLAIN;
54
4.28k
    }
55
56
    // Assign format_type
57
4.48k
    *format_type = TFileFormatType::FORMAT_UNKNOWN;
58
4.48k
    if (iequal(format_str, "CSV")) {
59
1.77k
        if (compress_type_str.empty()) {
60
1.59k
            *format_type = TFileFormatType::FORMAT_CSV_PLAIN;
61
1.59k
        } else if (iequal(compress_type_str, "GZ")) {
62
153
            *format_type = TFileFormatType::FORMAT_CSV_GZ;
63
153
        } else if (iequal(compress_type_str, "LZO")) {
64
2
            *format_type = TFileFormatType::FORMAT_CSV_LZO;
65
21
        } else if (iequal(compress_type_str, "BZ2")) {
66
6
            *format_type = TFileFormatType::FORMAT_CSV_BZ2;
67
15
        } else if (iequal(compress_type_str, "ZSTD")) {
68
4
            *format_type = TFileFormatType::FORMAT_CSV_PLAIN;
69
11
        } else if (iequal(compress_type_str, "LZ4") || iequal(compress_type_str, "LZ4FRAME")) {
70
8
            *format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
71
8
        } else if (iequal(compress_type_str, "LZ4_BLOCK")) {
72
0
            *format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
73
3
        } else if (iequal(compress_type_str, "LZOP")) {
74
2
            *format_type = TFileFormatType::FORMAT_CSV_LZOP;
75
2
        } else if (iequal(compress_type_str, "SNAPPY_BLOCK")) {
76
0
            *format_type = TFileFormatType::FORMAT_CSV_SNAPPYBLOCK;
77
1
        } else if (iequal(compress_type_str, "DEFLATE")) {
78
1
            *format_type = TFileFormatType::FORMAT_CSV_DEFLATE;
79
1
        }
80
2.70k
    } else if (iequal(format_str, "HIVE_TEXT")) {
81
3
        *format_type = TFileFormatType::FORMAT_TEXT;
82
2.70k
    } else if (iequal(format_str, "JSON")) {
83
2.67k
        *format_type = TFileFormatType::FORMAT_JSON;
84
2.67k
    } else if (iequal(format_str, "PARQUET")) {
85
14
        *format_type = TFileFormatType::FORMAT_PARQUET;
86
20
    } else if (iequal(format_str, "ORC")) {
87
10
        *format_type = TFileFormatType::FORMAT_ORC;
88
10
    } else if (iequal(format_str, "WAL")) {
89
0
        *format_type = TFileFormatType::FORMAT_WAL;
90
10
    } else if (iequal(format_str, "ARROW")) {
91
10
        *format_type = TFileFormatType::FORMAT_ARROW;
92
10
    }
93
4.48k
}
94
95
4.44k
bool LoadUtil::is_format_support_streaming(TFileFormatType::type format) {
96
4.44k
    switch (format) {
97
1.57k
    case TFileFormatType::FORMAT_CSV_PLAIN:
98
1.57k
    case TFileFormatType::FORMAT_CSV_BZ2:
99
1.57k
    case TFileFormatType::FORMAT_CSV_DEFLATE:
100
1.73k
    case TFileFormatType::FORMAT_CSV_GZ:
101
1.73k
    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
102
1.73k
    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
103
1.74k
    case TFileFormatType::FORMAT_CSV_LZO:
104
1.74k
    case TFileFormatType::FORMAT_CSV_LZOP:
105
4.40k
    case TFileFormatType::FORMAT_JSON:
106
4.40k
    case TFileFormatType::FORMAT_TEXT:
107
4.40k
    case TFileFormatType::FORMAT_WAL:
108
4.41k
    case TFileFormatType::FORMAT_ARROW:
109
4.41k
        return true;
110
28
    default:
111
28
        return false;
112
4.44k
    }
113
0
    return false;
114
4.44k
}
115
116
bool LoadUtil::is_compressed_load(TFileCompressType::type compress_type,
117
37
                                  TFileFormatType::type format_type) {
118
37
    if (compress_type != TFileCompressType::UNKNOWN && compress_type != TFileCompressType::PLAIN) {
119
12
        return true;
120
12
    }
121
122
25
    switch (format_type) {
123
0
    case TFileFormatType::FORMAT_CSV_BZ2:
124
1
    case TFileFormatType::FORMAT_CSV_DEFLATE:
125
2
    case TFileFormatType::FORMAT_CSV_GZ:
126
2
    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
127
2
    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
128
2
    case TFileFormatType::FORMAT_CSV_LZO:
129
2
    case TFileFormatType::FORMAT_CSV_LZOP:
130
2
    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
131
2
        return true;
132
23
    default:
133
23
        return false;
134
25
    }
135
25
}
136
} // namespace  doris