Coverage Report

Created: 2026-04-16 15:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/cast/cast_to_jsonb.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "core/assert_cast.h"
19
#include "core/data_type/data_type_jsonb.h"
20
#include "core/data_type/data_type_nullable.h"
21
#include "core/data_type/primitive_type.h"
22
#include "core/data_type_serde/data_type_serde.h"
23
#include "core/string_ref.h"
24
#include "core/value/jsonb_value.h"
25
#include "exprs/function/cast/cast_base.h"
26
#include "exprs/function/cast/cast_to_string.h"
27
#include "util/jsonb_utils.h"
28
#include "util/jsonb_writer.h"
29
30
namespace doris::CastWrapper {
31
32
struct ConvertImplGenericFromJsonb {
33
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
34
                          uint32_t result, size_t input_rows_count,
35
1
                          const NullMap::value_type* null_map = nullptr) {
36
1
        auto data_type_to = block.get_by_position(result).type;
37
1
        auto data_type_serde_to = data_type_to->get_serde();
38
39
1
        DataTypeSerDe::FormatOptions options;
40
1
        options.converted_from_string = true;
41
1
        options.escape_char = '\\';
42
1
        options.timezone = &context->state()->timezone_obj();
43
44
1
        const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
45
1
        const IColumn& col_from = *col_with_type_and_name.column;
46
1
        if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
47
1
            auto col_to = data_type_to->create_column();
48
49
1
            size_t size = col_from.size();
50
1
            col_to->reserve(size);
51
52
1
            ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
53
1
            ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
54
1
            const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
55
1
            const bool is_dst_string = is_string_type(data_type_to->get_primitive_type());
56
2
            for (size_t i = 0; i < size; ++i) {
57
1
                const auto& val = col_from_string->get_data_at(i);
58
1
                const JsonbDocument* doc = nullptr;
59
1
                auto st = JsonbDocument::checkAndCreateDocument(val.data, val.size, &doc);
60
1
                if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
61
1
                    (*vec_null_map_to)[i] = 1;
62
1
                    col_to->insert_default();
63
1
                    continue;
64
1
                }
65
66
                // value is NOT necessary to be deleted since JsonbValue will not allocate memory
67
0
                const JsonbValue* value = doc->getValue();
68
0
                if (UNLIKELY(!value)) {
69
0
                    (*vec_null_map_to)[i] = 1;
70
0
                    col_to->insert_default();
71
0
                    continue;
72
0
                }
73
                // Note: here we should handle the null element
74
0
                if (val.size == 0) {
75
0
                    col_to->insert_default();
76
                    // empty string('') is an invalid format for complex type, set null_map to 1
77
0
                    if (is_complex) {
78
0
                        (*vec_null_map_to)[i] = 1;
79
0
                    }
80
0
                    continue;
81
0
                }
82
                // add string to string column
83
0
                if (context->jsonb_string_as_string() && is_dst_string && value->isString()) {
84
0
                    const auto* blob = value->unpack<JsonbBinaryVal>();
85
0
                    assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(*col_to).insert_data(
86
0
                            blob->getBlob(), blob->getBlobLen());
87
0
                    (*vec_null_map_to)[i] = 0;
88
0
                    continue;
89
0
                }
90
0
                std::string input_str;
91
0
                if (context->jsonb_string_as_string() && value->isString()) {
92
0
                    const auto* blob = value->unpack<JsonbBinaryVal>();
93
0
                    input_str = std::string(blob->getBlob(), blob->getBlobLen());
94
0
                } else {
95
0
                    input_str = JsonbToJson::jsonb_to_json_string(val.data, val.size);
96
0
                }
97
0
                if (input_str.empty()) {
98
0
                    col_to->insert_default();
99
0
                    (*vec_null_map_to)[i] = 1;
100
0
                    continue;
101
0
                }
102
0
                StringRef read_buffer((char*)(input_str.data()), input_str.size());
103
0
                st = data_type_serde_to->from_string(read_buffer, *col_to, options);
104
                // if parsing failed, will return null
105
0
                (*vec_null_map_to)[i] = !st.ok();
106
0
                if (!st.ok()) {
107
0
                    col_to->insert_default();
108
0
                }
109
0
            }
110
1
            block.get_by_position(result).column =
111
1
                    ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
112
1
        } else {
113
0
            return Status::RuntimeError(
114
0
                    "Illegal column {} of first argument of conversion function from string",
115
0
                    col_from.get_name());
116
0
        }
117
1
        return Status::OK();
118
1
    }
119
};
120
121
0
inline bool can_cast_json_type(PrimitiveType pt) {
122
0
    return is_int_or_bool(pt) || is_float_or_double(pt) || is_string_type(pt) || is_decimal(pt) ||
123
0
           pt == TYPE_ARRAY || pt == TYPE_STRUCT;
124
0
}
125
126
// check jsonb value type and get to_type value
127
WrapperType create_cast_from_jsonb_wrapper(const DataTypeJsonb& from_type,
128
                                           const DataTypePtr& to_type,
129
113
                                           bool jsonb_string_as_string) {
130
113
    if (is_string_type(to_type->get_primitive_type()) && jsonb_string_as_string) {
131
1
        return ConvertImplGenericFromJsonb::execute;
132
1
    }
133
134
112
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
135
112
              uint32_t result, size_t input_rows_count, const NullMap::value_type*) {
136
112
        CastParameters params;
137
112
        params.is_strict = context->enable_strict_mode();
138
139
112
        auto data_type_to = remove_nullable(block.get_by_position(result).type);
140
112
        auto serde_to = data_type_to->get_serde();
141
142
112
        const auto& col_from_json =
143
112
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
144
145
112
        auto column_to = make_nullable(data_type_to)->create_column();
146
112
        auto& column_to_nullable = assert_cast<ColumnNullable&>(*column_to);
147
148
112
        RETURN_IF_ERROR(serde_to->deserialize_column_from_jsonb_vector(column_to_nullable,
149
112
                                                                       col_from_json, params));
150
151
112
        block.get_by_position(result).column = std::move(column_to);
152
112
        return Status::OK();
153
112
    };
154
113
}
155
156
struct ParseJsonbFromString {
157
4
    static Status parse_json(const StringRef& str, ColumnString& column_string) {
158
4
        if (str.empty()) {
159
0
            return Status::InvalidArgument("Empty string cannot be parsed as jsonb");
160
0
        }
161
4
        JsonBinaryValue value;
162
4
        auto st = (value.from_json_string(str.data, str.size));
163
4
        if (!st.ok()) {
164
2
            return Status::InvalidArgument("Failed to parse json string: {}, error: {}",
165
2
                                           str.to_string(), st.msg());
166
2
        }
167
2
        column_string.insert_data(value.value(), value.size());
168
2
        return Status::OK();
169
4
    }
170
171
    static Status execute_non_strict(const ColumnString& col_from, size_t size,
172
4
                                     ColumnPtr& column_result) {
173
4
        auto col_to = ColumnString::create();
174
4
        auto col_null = ColumnBool::create(size, 0);
175
4
        auto& vec_null_map_to = col_null->get_data();
176
177
8
        for (size_t i = 0; i < size; ++i) {
178
4
            Status st = parse_json(col_from.get_data_at(i), *col_to);
179
4
            vec_null_map_to[i] = !st.ok();
180
4
            if (!st.ok()) [[unlikely]] {
181
2
                col_to->insert_default();
182
2
            }
183
4
        }
184
4
        column_result = ColumnNullable::create(std::move(col_to), std::move(col_null));
185
4
        return Status::OK();
186
4
    }
187
188
    // in both strict or non-strict mode, the return type is nullable column
189
    static Status execute_strict(const ColumnString& col_from, const NullMap::value_type* null_map,
190
0
                                 size_t size, ColumnPtr& column_result) {
191
0
        auto col_to = ColumnString::create();
192
0
        for (size_t i = 0; i < size; ++i) {
193
0
            if (null_map && null_map[i]) {
194
0
                col_to->insert_default();
195
0
                continue;
196
0
            }
197
0
            RETURN_IF_ERROR(parse_json(col_from.get_data_at(i), *col_to));
198
0
        }
199
0
        column_result = ColumnNullable::create(std::move(col_to), ColumnBool::create(size, 0));
200
0
        return Status::OK();
201
0
    }
202
203
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
204
                          uint32_t result, size_t input_rows_count,
205
4
                          const NullMap::value_type* null_map) {
206
4
        const auto& col_from =
207
4
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
208
4
        const auto size = col_from.size();
209
210
4
        ColumnPtr column_result;
211
4
        if (context->enable_strict_mode()) {
212
0
            RETURN_IF_ERROR(execute_strict(col_from, null_map, size, column_result));
213
214
4
        } else {
215
4
            RETURN_IF_ERROR(execute_non_strict(col_from, size, column_result));
216
4
        }
217
4
        block.get_by_position(result).column = std::move(column_result);
218
219
4
        return Status::OK();
220
4
    }
221
};
222
223
// create corresponding jsonb value with type to_type
224
// use jsonb writer to create jsonb value
225
WrapperType create_cast_to_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type,
226
48.1k
                                         bool string_as_jsonb_string) {
227
    // parse string as jsonb
228
48.1k
    if (is_string_type(from_type->get_primitive_type()) && !string_as_jsonb_string) {
229
4
        return ParseJsonbFromString::execute;
230
4
    }
231
232
48.1k
    return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
233
48.1k
              uint32_t result, size_t input_rows_count, const NullMap::value_type*) {
234
        // same as to_json function
235
48.1k
        auto to_column = ColumnString::create();
236
48.1k
        auto from_type_serde = block.get_by_position(arguments[0]).type->get_serde();
237
48.1k
        auto from_column = block.get_by_position(arguments[0]).column;
238
48.1k
        RETURN_IF_ERROR(
239
48.1k
                from_type_serde->serialize_column_to_jsonb_vector(*from_column, *to_column));
240
48.1k
        block.get_by_position(result).column = std::move(to_column);
241
48.1k
        return Status::OK();
242
48.1k
    };
243
48.1k
}
244
} // namespace doris::CastWrapper