Coverage Report

Created: 2025-06-07 21:54

/root/doris/be/src/util/url_coding.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "util/url_coding.h"
19
20
#include <libbase64.h>
21
22
#include <sstream>
23
24
namespace doris {
25
26
122
inline unsigned char to_hex(unsigned char x) {
27
122
    return x + (x > 9 ? ('A' - 10) : '0');
28
122
}
29
30
// Adapted from http://dlib.net/dlib/server/server_http.cpp.html
31
3
void url_encode(const std::string_view& in, std::string* out) {
32
3
    std::ostringstream os;
33
125
    for (auto c : in) {
34
        // impl as https://docs.oracle.com/javase/8/docs/api/java/net/URLEncoder.html
35
125
        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') ||
36
125
            c == '.' || c == '-' || c == '*' || c == '_') { // allowed
37
64
            os << c;
38
64
        } else if (c == ' ') {
39
0
            os << '+';
40
61
        } else {
41
            ///TODO: In the past, there was an error here involving the modulus operation on a char (signed number).
42
            // When the char data exceeds 128 (UTF-8 byte), it leads to incorrect results. It is actually better to use some third-party libraries here.
43
61
            os << '%' << to_hex((unsigned char)c >> 4) << to_hex((unsigned char)c % 16);
44
61
        }
45
125
    }
46
47
3
    *out = os.str();
48
3
}
49
50
// Adapted from
51
// http://www.boost.org/doc/libs/1_40_0/doc/html/boost_asio/
52
//   example/http/server3/request_handler.cpp
53
// See http://www.boost.org/LICENSE_1_0.txt for license for this method.
54
0
bool url_decode(const std::string& in, std::string* out) {
55
0
    out->clear();
56
0
    out->reserve(in.size());
57
58
0
    for (size_t i = 0; i < in.size(); ++i) {
59
0
        if (in[i] == '%') {
60
0
            if (i + 3 <= in.size()) {
61
0
                int value = 0;
62
0
                std::istringstream is(in.substr(i + 1, 2));
63
64
0
                if (is >> std::hex >> value) {
65
0
                    (*out) += static_cast<char>(value);
66
0
                    i += 2;
67
0
                } else {
68
0
                    return false;
69
0
                }
70
0
            } else {
71
0
                return false;
72
0
            }
73
0
        } else if (in[i] == '+') {
74
0
            (*out) += ' ';
75
0
        } else {
76
0
            (*out) += in[i];
77
0
        }
78
0
    }
79
80
0
    return true;
81
0
}
82
83
3
void base64_encode(const std::string& in, std::string* out) {
84
3
    out->resize(size_t(in.length() * (4.0 / 3) + 1));
85
3
    auto len = base64_encode(reinterpret_cast<const unsigned char*>(in.c_str()), in.length(),
86
3
                             (unsigned char*)out->c_str());
87
3
    out->resize(len);
88
3
}
89
90
27
size_t base64_encode(const unsigned char* data, size_t length, unsigned char* encoded_data) {
91
27
    size_t encode_len = 0;
92
#if defined(__aarch64__) || defined(_M_ARM64)
93
    do_base64_encode(reinterpret_cast<const char*>(data), length,
94
                     reinterpret_cast<char*>(encoded_data), &encode_len, BASE64_FORCE_NEON64);
95
#else
96
27
    do_base64_encode(reinterpret_cast<const char*>(data), length,
97
27
                     reinterpret_cast<char*>(encoded_data), &encode_len, 0);
98
27
#endif
99
27
    return encode_len;
100
27
}
101
102
47
int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
103
47
    size_t decode_len = 0;
104
#if defined(__aarch64__) || defined(_M_ARM64)
105
    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, decoded_data,
106
                                &decode_len, BASE64_FORCE_NEON64);
107
#else
108
47
    auto ret = do_base64_decode(reinterpret_cast<const char*>(data), length, decoded_data,
109
47
                                &decode_len, 0);
110
47
#endif
111
47
    return ret > 0 ? decode_len : -1;
112
47
}
113
114
11
bool base64_decode(const std::string& in, std::string* out) {
115
11
    out->resize(in.length());
116
117
11
    int64_t len = base64_decode(in.c_str(), in.length(), out->data());
118
11
    if (len < 0) {
119
2
        return false;
120
2
    }
121
9
    out->resize(len);
122
9
    return true;
123
11
}
124
125
0
void escape_for_html(const std::string& in, std::stringstream* out) {
126
0
    for (const auto& c : in) {
127
0
        switch (c) {
128
0
        case '<':
129
0
            (*out) << "&lt;";
130
0
            break;
131
132
0
        case '>':
133
0
            (*out) << "&gt;";
134
0
            break;
135
136
0
        case '&':
137
0
            (*out) << "&amp;";
138
0
            break;
139
140
0
        default:
141
0
            (*out) << c;
142
0
        }
143
0
    }
144
0
}
145
146
0
std::string escape_for_html_to_string(const std::string& in) {
147
0
    std::stringstream str;
148
0
    escape_for_html(in, &str);
149
0
    return str.str();
150
0
}
151
} // namespace doris