Coverage Report

Created: 2026-04-14 17:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_to_jsonb.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "core/assert_cast.h"
19
#include "core/data_type/data_type_jsonb.h"
20
#include "core/data_type/data_type_nullable.h"
21
#include "core/data_type/primitive_type.h"
22
#include "core/data_type_serde/data_type_serde.h"
23
#include "core/string_ref.h"
24
#include "core/value/jsonb_value.h"
25
#include "exprs/function/cast/cast_base.h"
26
#include "exprs/function/cast/cast_to_string.h"
27
#include "util/io_helper.h"
28
#include "util/jsonb_utils.h"
29
#include "util/jsonb_writer.h"
30
31
namespace doris::CastWrapper {
32
33
struct ConvertImplGenericFromJsonb {
34
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
35
                          uint32_t result, size_t input_rows_count,
36
1
                          const NullMap::value_type* null_map = nullptr) {
37
1
        auto data_type_to = block.get_by_position(result).type;
38
1
        auto data_type_serde_to = data_type_to->get_serde();
39
40
1
        DataTypeSerDe::FormatOptions options;
41
1
        options.converted_from_string = true;
42
1
        options.escape_char = '\\';
43
1
        options.timezone = &context->state()->timezone_obj();
44
45
1
        const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
46
1
        const IColumn& col_from = *col_with_type_and_name.column;
47
1
        if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
48
1
            auto col_to = data_type_to->create_column();
49
50
1
            size_t size = col_from.size();
51
1
            col_to->reserve(size);
52
53
1
            ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
54
1
            ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
55
1
            const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
56
1
            const bool is_dst_string = is_string_type(data_type_to->get_primitive_type());
57
2
            for (size_t i = 0; i < size; ++i) {
58
1
                const auto& val = col_from_string->get_data_at(i);
59
1
                const JsonbDocument* doc = nullptr;
60
1
                auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc);
61
1
                if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
62
1
                    (*vec_null_map_to)[i] = 1;
63
1
                    col_to->insert_default();
64
1
                    continue;
65
1
                }
66
67
                // value is NOT necessary to be deleted since JsonbValue will not allocate memory
68
0
                const JsonbValue* value = doc->getValue();
69
0
                if (UNLIKELY(!value)) {
70
0
                    (*vec_null_map_to)[i] = 1;
71
0
                    col_to->insert_default();
72
0
                    continue;
73
0
                }
74
                // Note: here we should handle the null element
75
0
                if (val.size == 0) {
76
0
                    col_to->insert_default();
77
                    // empty string('') is an invalid format for complex type, set null_map to 1
78
0
                    if (is_complex) {
79
0
                        (*vec_null_map_to)[i] = 1;
80
0
                    }
81
0
                    continue;
82
0
                }
83
                // add string to string column
84
0
                if (context->jsonb_string_as_string() && is_dst_string && value->isString()) {
85
0
                    const auto* blob = value->unpack<JsonbBinaryVal>();
86
0
                    assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(*col_to).insert_data(
87
0
                            blob->getBlob(), blob->getBlobLen());
88
0
                    (*vec_null_map_to)[i] = 0;
89
0
                    continue;
90
0
                }
91
0
                std::string input_str;
92
0
                if (context->jsonb_string_as_string() && value->isString()) {
93
0
                    const auto* blob = value->unpack<JsonbBinaryVal>();
94
0
                    input_str = std::string(blob->getBlob(), blob->getBlobLen());
95
0
                } else {
96
0
                    input_str = JsonbToJson::jsonb_to_json_string(val.data, val.size);
97
0
                }
98
0
                if (input_str.empty()) {
99
0
                    col_to->insert_default();
100
0
                    (*vec_null_map_to)[i] = 1;
101
0
                    continue;
102
0
                }
103
0
                StringRef read_buffer((char*)(input_str.data()), input_str.size());
104
0
                st = data_type_serde_to->from_string(read_buffer, *col_to, options);
105
                // if parsing failed, will return null
106
0
                (*vec_null_map_to)[i] = !st.ok();
107
0
                if (!st.ok()) {
108
0
                    col_to->insert_default();
109
0
                }
110
0
            }
111
1
            block.get_by_position(result).column =
112
1
                    ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
113
1
        } else {
114
0
            return Status::RuntimeError(
115
0
                    "Illegal column {} of first argument of conversion function from string",
116
0
                    col_from.get_name());
117
0
        }
118
1
        return Status::OK();
119
1
    }
120
};
121
122
0
inline bool can_cast_json_type(PrimitiveType pt) {
123
0
    return is_int_or_bool(pt) || is_float_or_double(pt) || is_string_type(pt) || is_decimal(pt) ||
124
0
           pt == TYPE_ARRAY || pt == TYPE_STRUCT;
125
0
}
126
127
// check jsonb value type and get to_type value
128
WrapperType create_cast_from_jsonb_wrapper(const DataTypeJsonb& from_type,
129
                                           const DataTypePtr& to_type,
130
113
                                           bool jsonb_string_as_string) {
131
113
    if (is_string_type(to_type->get_primitive_type()) && jsonb_string_as_string) {
132
1
        return ConvertImplGenericFromJsonb::execute;
133
1
    }
134
135
112
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
136
112
              uint32_t result, size_t input_rows_count, const NullMap::value_type*) {
137
112
        CastParameters params;
138
112
        params.is_strict = context->enable_strict_mode();
139
140
112
        auto data_type_to = remove_nullable(block.get_by_position(result).type);
141
112
        auto serde_to = data_type_to->get_serde();
142
143
112
        const auto& col_from_json =
144
112
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
145
146
112
        auto column_to = make_nullable(data_type_to)->create_column();
147
112
        auto& column_to_nullable = assert_cast<ColumnNullable&>(*column_to);
148
149
112
        RETURN_IF_ERROR(serde_to->deserialize_column_from_jsonb_vector(column_to_nullable,
150
112
                                                                       col_from_json, params));
151
152
112
        block.get_by_position(result).column = std::move(column_to);
153
112
        return Status::OK();
154
112
    };
155
113
}
156
157
struct ParseJsonbFromString {
158
4
    static Status parse_json(const StringRef& str, ColumnString& column_string) {
159
4
        if (str.empty()) {
160
0
            return Status::InvalidArgument("Empty string cannot be parsed as jsonb");
161
0
        }
162
4
        JsonBinaryValue value;
163
4
        auto st = (value.from_json_string(str.data, str.size));
164
4
        if (!st.ok()) {
165
2
            return Status::InvalidArgument("Failed to parse json string: {}, error: {}",
166
2
                                           str.to_string(), st.msg());
167
2
        }
168
2
        column_string.insert_data(value.value(), value.size());
169
2
        return Status::OK();
170
4
    }
171
172
    static Status execute_non_strict(const ColumnString& col_from, size_t size,
173
4
                                     ColumnPtr& column_result) {
174
4
        auto col_to = ColumnString::create();
175
4
        auto col_null = ColumnBool::create(size, 0);
176
4
        auto& vec_null_map_to = col_null->get_data();
177
178
8
        for (size_t i = 0; i < size; ++i) {
179
4
            Status st = parse_json(col_from.get_data_at(i), *col_to);
180
4
            vec_null_map_to[i] = !st.ok();
181
4
            if (!st.ok()) [[unlikely]] {
182
2
                col_to->insert_default();
183
2
            }
184
4
        }
185
4
        column_result = ColumnNullable::create(std::move(col_to), std::move(col_null));
186
4
        return Status::OK();
187
4
    }
188
189
    // in both strict or non-strict mode, the return type is nullable column
190
    static Status execute_strict(const ColumnString& col_from, const NullMap::value_type* null_map,
191
0
                                 size_t size, ColumnPtr& column_result) {
192
0
        auto col_to = ColumnString::create();
193
0
        for (size_t i = 0; i < size; ++i) {
194
0
            if (null_map && null_map[i]) {
195
0
                col_to->insert_default();
196
0
                continue;
197
0
            }
198
0
            RETURN_IF_ERROR(parse_json(col_from.get_data_at(i), *col_to));
199
0
        }
200
0
        column_result = ColumnNullable::create(std::move(col_to), ColumnBool::create(size, 0));
201
0
        return Status::OK();
202
0
    }
203
204
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
205
                          uint32_t result, size_t input_rows_count,
206
4
                          const NullMap::value_type* null_map) {
207
4
        const auto& col_from =
208
4
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
209
4
        const auto size = col_from.size();
210
211
4
        ColumnPtr column_result;
212
4
        if (context->enable_strict_mode()) {
213
0
            RETURN_IF_ERROR(execute_strict(col_from, null_map, size, column_result));
214
215
4
        } else {
216
4
            RETURN_IF_ERROR(execute_non_strict(col_from, size, column_result));
217
4
        }
218
4
        block.get_by_position(result).column = std::move(column_result);
219
220
4
        return Status::OK();
221
4
    }
222
};
223
224
// create corresponding jsonb value with type to_type
225
// use jsonb writer to create jsonb value
226
WrapperType create_cast_to_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type,
227
48.1k
                                         bool string_as_jsonb_string) {
228
    // parse string as jsonb
229
48.1k
    if (is_string_type(from_type->get_primitive_type()) && !string_as_jsonb_string) {
230
4
        return ParseJsonbFromString::execute;
231
4
    }
232
233
48.1k
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
234
48.1k
              uint32_t result, size_t input_rows_count, const NullMap::value_type*) {
235
        // same as to_json function
236
48.1k
        auto to_column = ColumnString::create();
237
48.1k
        auto from_type_serde = block.get_by_position(arguments[0]).type->get_serde();
238
48.1k
        auto from_column = block.get_by_position(arguments[0]).column;
239
48.1k
        RETURN_IF_ERROR(
240
48.1k
                from_type_serde->serialize_column_to_jsonb_vector(*from_column, *to_column));
241
48.1k
        block.get_by_position(result).column = std::move(to_column);
242
48.1k
        return Status::OK();
243
48.1k
    };
244
48.1k
}
245
} // namespace doris::CastWrapper