Coverage Report

Created: 2025-05-10 16:39

/root/doris/be/src/util/url_coding.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/url_coding.h"
19
20
#include <curl/curl.h>
21
#include <libbase64.h>
22
23
#include <sstream>
24
25
namespace doris {
26
27
244
inline unsigned char to_hex(unsigned char x) {
28
244
    return x + (x > 9 ? ('A' - 10) : '0');
29
244
}
30
31
// Adapted from http://dlib.net/dlib/server/server_http.cpp.html
32
6
void url_encode(const std::string_view& in, std::string* out) {
33
6
    std::ostringstream os;
34
250
    for (auto c : in) {
35
        // impl as https://docs.oracle.com/javase/8/docs/api/java/net/URLEncoder.html
36
250
        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ||
37
250
            c == '.' || c == '-' || c == '*' || c == '_') { // allowed
38
128
            os << c;
39
128
        } else if (c == ' ') {
40
0
            os << '+';
41
122
        } else {
42
            ///TODO: In the past, there was an error here involving the modulus operation on a char (signed number).
43
            // When the char data exceeds 128 (UTF-8 byte), it leads to incorrect results. It is actually better to use some third-party libraries here.
44
122
            os << '%' << to_hex((unsigned char)c >> 4) << to_hex((unsigned char)c % 16);
45
122
        }
46
250
    }
47
48
6
    *out = os.str();
49
6
}
50
51
// Adapted from
52
// http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/
53
//   example/http/server3/request_handler.cpp
54
// See http://www.boost.org/LICENSE_1_0.txt for license for this method.
55
0
bool url_decode(const std::string& in, std::string* out) {
56
0
    out->clear();
57
0
    out->reserve(in.size());
58
59
0
    for (size_t i = 0; i < in.size(); ++i) {
60
0
        if (in[i] == '%') {
61
0
            if (i + 3 <= in.size()) {
62
0
                int value = 0;
63
0
                std::istringstream is(in.substr(i + 1, 2));
64
65
0
                if (is >> std::hex >> value) {
66
0
                    (*out) += static_cast<char>(value);
67
0
                    i += 2;
68
0
                } else {
69
0
                    return false;
70
0
                }
71
0
            } else {
72
0
                return false;
73
0
            }
74
0
        } else if (in[i] == '+') {
75
0
            (*out) += ' ';
76
0
        } else {
77
0
            (*out) += in[i];
78
0
        }
79
0
    }
80
81
0
    return true;
82
0
}
83
84
5
void base64_encode(const std::string& in, std::string* out) {
85
5
    out->resize(size_t(in.length() * (4.0 / 3) + 1));
86
5
    auto len = base64_encode(reinterpret_cast<const unsigned char*>(in.c_str()), in.length(),
87
5
                             (unsigned char*)out->c_str());
88
5
    out->resize(len);
89
5
}
90
91
48
size_t base64_encode(const unsigned char* data, size_t length, unsigned char* encoded_data) {
92
48
    size_t encode_len = 0;
93
#if defined(__aarch64__) || defined(_M_ARM64)
94
    do_base64_encode(reinterpret_cast<const char*>(data), length,
95
                     reinterpret_cast<char*>(encoded_data), &encode_len, BASE64_FORCE_NEON64);
96
#else
97
48
    do_base64_encode(reinterpret_cast<const char*>(data), length,
98
48
                     reinterpret_cast<char*>(encoded_data), &encode_len, 0);
99
48
#endif
100
48
    return encode_len;
101
48
}
102
103
72
int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
104
72
    size_t decode_len = 0;
105
#if defined(__aarch64__) || defined(_M_ARM64)
106
    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, decoded_data,
107
                                &decode_len, BASE64_FORCE_NEON64);
108
#else
109
72
    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, decoded_data,
110
72
                                &decode_len, 0);
111
72
#endif
112
72
    return ret > 0 ? decode_len : -1;
113
72
}
114
115
24
bool base64_decode(const std::string& in, std::string* out) {
116
24
    out->resize(in.length());
117
118
24
    int64_t len = base64_decode(in.c_str(), in.length(), out->data());
119
24
    if (len < 0) {
120
2
        return false;
121
2
    }
122
22
    out->resize(len);
123
22
    return true;
124
24
}
125
126
0
void escape_for_html(const std::string& in, std::stringstream* out) {
127
0
    for (const auto& c : in) {
128
0
        switch (c) {
129
0
        case '<':
130
0
            (*out) << "&lt;";
131
0
            break;
132
133
0
        case '>':
134
0
            (*out) << "&gt;";
135
0
            break;
136
137
0
        case '&':
138
0
            (*out) << "&amp;";
139
0
            break;
140
141
0
        default:
142
0
            (*out) << c;
143
0
        }
144
0
    }
145
0
}
146
147
0
std::string escape_for_html_to_string(const std::string& in) {
148
0
    std::stringstream str;
149
0
    escape_for_html(in, &str);
150
0
    return str.str();
151
0
}
152
} // namespace doris