Coverage Report

Created: 2025-09-14 17:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/util/url_coding.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/url_coding.h"
19
20
#include <curl/curl.h>
21
#include <libbase64.h>
22
23
#include <cmath>
24
#include <sstream>
25
26
namespace doris {
27
28
244
inline unsigned char to_hex(unsigned char x) {
29
244
    return x + (x > 9 ? ('A' - 10) : '0');
30
244
}
31
32
// Adapted from http://dlib.net/dlib/server/server_http.cpp.html
33
6
void url_encode(const std::string_view& in, std::string* out) {
34
6
    std::ostringstream os;
35
250
    for (auto c : in) {
36
        // impl as https://docs.oracle.com/javase/8/docs/api/java/net/URLEncoder.html
37
250
        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ||
38
250
            c == '.' || c == '-' || c == '*' || c == '_') { // allowed
39
128
            os << c;
40
128
        } else if (c == ' ') {
41
0
            os << '+';
42
122
        } else {
43
            ///TODO: In the past, there was an error here involving the modulus operation on a char (signed number).
44
            // When the char data exceeds 128 (UTF-8 byte), it leads to incorrect results. It is actually better to use some third-party libraries here.
45
122
            os << '%' << to_hex((unsigned char)c >> 4) << to_hex((unsigned char)c % 16);
46
122
        }
47
250
    }
48
49
6
    *out = os.str();
50
6
}
51
52
// Adapted from
53
// http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/
54
//   example/http/server3/request_handler.cpp
55
// See http://www.boost.org/LICENSE_1_0.txt for license for this method.
56
0
bool url_decode(const std::string& in, std::string* out) {
57
0
    out->clear();
58
0
    out->reserve(in.size());
59
60
0
    for (size_t i = 0; i < in.size(); ++i) {
61
0
        if (in[i] == '%') {
62
0
            if (i + 3 <= in.size()) {
63
0
                int value = 0;
64
0
                std::istringstream is(in.substr(i + 1, 2));
65
66
0
                if (is >> std::hex >> value) {
67
0
                    (*out) += static_cast<char>(value);
68
0
                    i += 2;
69
0
                } else {
70
0
                    return false;
71
0
                }
72
0
            } else {
73
0
                return false;
74
0
            }
75
0
        } else if (in[i] == '+') {
76
0
            (*out) += ' ';
77
0
        } else {
78
0
            (*out) += in[i];
79
0
        }
80
0
    }
81
82
0
    return true;
83
0
}
84
85
5
void base64_encode(const std::string& in, std::string* out) {
86
5
    out->resize((size_t)(4.0 * std::ceil(in.length() / 3.0)));
87
5
    auto len = base64_encode(reinterpret_cast<const unsigned char*>(in.data()), in.length(),
88
5
                             (unsigned char*)out->data());
89
5
    out->resize(len);
90
5
}
91
92
48
size_t base64_encode(const unsigned char* data, size_t length, unsigned char* encoded_data) {
93
48
    size_t encode_len = 0;
94
#if defined(__aarch64__) || defined(_M_ARM64)
95
    do_base64_encode(reinterpret_cast<const char*>(data), length,
96
                     reinterpret_cast<char*>(encoded_data), &encode_len, BASE64_FORCE_NEON64);
97
#else
98
48
    do_base64_encode(reinterpret_cast<const char*>(data), length,
99
48
                     reinterpret_cast<char*>(encoded_data), &encode_len, 0);
100
48
#endif
101
48
    return encode_len;
102
48
}
103
104
72
int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
105
72
    size_t decode_len = 0;
106
#if defined(__aarch64__) || defined(_M_ARM64)
107
    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, decoded_data,
108
                                &decode_len, BASE64_FORCE_NEON64);
109
#else
110
72
    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, decoded_data,
111
72
                                &decode_len, 0);
112
72
#endif
113
72
    return ret > 0 ? decode_len : -1;
114
72
}
115
116
24
bool base64_decode(const std::string& in, std::string* out) {
117
24
    out->resize(in.length());
118
119
24
    int64_t len = base64_decode(in.data(), in.length(), out->data());
120
24
    if (len < 0) {
121
1
        return false;
122
1
    }
123
23
    out->resize(len);
124
23
    return true;
125
24
}
126
127
0
void escape_for_html(const std::string& in, std::stringstream* out) {
128
0
    for (const auto& c : in) {
129
0
        switch (c) {
130
0
        case '<':
131
0
            (*out) << "&lt;";
132
0
            break;
133
134
0
        case '>':
135
0
            (*out) << "&gt;";
136
0
            break;
137
138
0
        case '&':
139
0
            (*out) << "&amp;";
140
0
            break;
141
142
0
        default:
143
0
            (*out) << c;
144
0
        }
145
0
    }
146
0
}
147
148
0
std::string escape_for_html_to_string(const std::string& in) {
149
0
    std::stringstream str;
150
0
    escape_for_html(in, &str);
151
0
    return str.str();
152
0
}
153
} // namespace doris