Coverage Report

Created: 2026-04-11 13:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/es/es_scroll_parser.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exec/es/es_scroll_parser.h"
19
20
#include <absl/strings/substitute.h>
21
#include <cctz/time_zone.h>
22
#include <glog/logging.h>
23
#include <rapidjson/allocators.h>
24
#include <rapidjson/encodings.h>
25
#include <stdint.h>
26
#include <string.h>
27
28
// IWYU pragma: no_include <bits/chrono.h>
29
#include <chrono> // IWYU pragma: keep
30
#include <cstdlib>
31
#include <ostream>
32
#include <string>
33
34
#include "common/status.h"
35
#include "core/binary_cast.hpp"
36
#include "core/column/column.h"
37
#include "core/column/column_nullable.h"
38
#include "core/data_type/data_type_array.h"
39
#include "core/data_type/data_type_nullable.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/data_type/primitive_type.h"
42
#include "core/field.h"
43
#include "core/value/decimalv2_value.h"
44
#include "core/value/jsonb_value.h"
45
#include "core/value/vdatetime_value.h"
46
#include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp"
47
#include "exprs/function/cast/cast_to_datetimev2_impl.hpp"
48
#include "exprs/function/cast/cast_to_datev2_impl.hpp"
49
#include "rapidjson/document.h"
50
#include "rapidjson/rapidjson.h"
51
#include "rapidjson/stringbuffer.h"
52
#include "rapidjson/writer.h"
53
#include "runtime/descriptors.h"
54
#include "util/string_parser.hpp"
55
56
namespace doris {
57
58
static const char* FIELD_SCROLL_ID = "_scroll_id";
59
static const char* FIELD_HITS = "hits";
60
static const char* FIELD_INNER_HITS = "hits";
61
static const char* FIELD_SOURCE = "_source";
62
static const char* FIELD_ID = "_id";
63
64
// get the original json data type
65
0
std::string json_type_to_string(rapidjson::Type type) {
66
0
    switch (type) {
67
0
    case rapidjson::kNumberType:
68
0
        return "Number";
69
0
    case rapidjson::kStringType:
70
0
        return "Varchar/Char";
71
0
    case rapidjson::kArrayType:
72
0
        return "Array";
73
0
    case rapidjson::kObjectType:
74
0
        return "Object";
75
0
    case rapidjson::kNullType:
76
0
        return "Null Type";
77
0
    case rapidjson::kFalseType:
78
0
    case rapidjson::kTrueType:
79
0
        return "True/False";
80
0
    default:
81
0
        return "Unknown Type";
82
0
    }
83
0
}
84
85
// transfer rapidjson::Value to string representation
86
1.50k
std::string json_value_to_string(const rapidjson::Value& value) {
87
1.50k
    rapidjson::StringBuffer scratch_buffer;
88
1.50k
    rapidjson::Writer<rapidjson::StringBuffer> temp_writer(scratch_buffer);
89
1.50k
    value.Accept(temp_writer);
90
1.50k
    return scratch_buffer.GetString();
91
1.50k
}
92
93
static const std::string ERROR_INVALID_COL_DATA =
94
        "Data source returned inconsistent column data. "
95
        "Expected value of type {} based on column metadata. This likely indicates a "
96
        "problem with the data source library.";
97
static const std::string ERROR_MEM_LIMIT_EXCEEDED =
98
        "DataSourceScanNode::$0() failed to allocate "
99
        "$1 bytes for $2.";
100
static const std::string ERROR_COL_DATA_IS_ARRAY =
101
        "Data source returned an array for the type $0"
102
        "based on column metadata.";
103
static const std::string INVALID_NULL_VALUE =
104
        "Invalid null value occurs: Non-null column `$0` contains NULL";
105
106
#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array)                    \
107
4.02k
    do {                                                                     \
108
4.02k
        if (col.IsArray() == is_array) {                                     \
109
0
            std::stringstream ss;                                            \
110
0
            ss << "Expected value of type: " << type_to_string(type)         \
111
0
               << "; but found type: " << json_type_to_string(col.GetType()) \
112
0
               << "; Document slice is : " << json_value_to_string(col);     \
113
0
            return Status::RuntimeError(ss.str());                           \
114
0
        }                                                                    \
115
4.02k
    } while (false)
116
117
#define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type)                            \
118
4.02k
    do {                                                                        \
119
4.02k
        if (!col.IsString()) {                                                  \
120
0
            std::stringstream ss;                                               \
121
0
            ss << "Expected value of type: " << type_to_string(type)            \
122
0
               << "; but found type: " << json_type_to_string(col.GetType())    \
123
0
               << "; Document source slice is : " << json_value_to_string(col); \
124
0
            return Status::RuntimeError(ss.str());                              \
125
0
        }                                                                       \
126
4.02k
    } while (false)
127
128
#define RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col, type)                         \
129
0
    do {                                                                     \
130
0
        if (!col.IsNumber()) {                                               \
131
0
            std::stringstream ss;                                            \
132
0
            ss << "Expected value of type: " << type_to_string(type)         \
133
0
               << "; but found type: " << json_type_to_string(col.GetType()) \
134
0
               << "; Document value is: " << json_value_to_string(col);      \
135
0
            return Status::RuntimeError(ss.str());                           \
136
0
        }                                                                    \
137
0
    } while (false)
138
139
#define RETURN_ERROR_IF_PARSING_FAILED(result, col, type)                       \
140
220
    do {                                                                        \
141
220
        if (result != StringParser::PARSE_SUCCESS) {                            \
142
0
            std::stringstream ss;                                               \
143
0
            ss << "Expected value of type: " << type_to_string(type)            \
144
0
               << "; but found type: " << json_type_to_string(col.GetType())    \
145
0
               << "; Document source slice is : " << json_value_to_string(col); \
146
0
            return Status::RuntimeError(ss.str());                              \
147
0
        }                                                                       \
148
220
    } while (false)
149
150
#define RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type)                     \
151
0
    do {                                                                 \
152
0
        std::stringstream ss;                                            \
153
0
        ss << "Expected value of type: " << type_to_string(type)         \
154
0
           << "; but found type: " << json_type_to_string(col.GetType()) \
155
0
           << "; Document slice is : " << json_value_to_string(col);     \
156
0
        return Status::RuntimeError(ss.str());                           \
157
0
    } while (false)
158
159
template <typename T>
160
Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
161
13.3k
                     bool pure_doc_value) {
162
13.3k
    if (col.IsNumber()) {
163
13.3k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
164
13.3k
        return Status::OK();
165
13.3k
    }
166
167
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
168
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
169
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
170
0
        return Status::OK();
171
0
    }
172
173
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
174
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
175
176
0
    StringParser::ParseResult result;
177
0
    const std::string& val = col.GetString();
178
0
    size_t len = col.GetStringLength();
179
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
180
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
181
182
0
    if (sizeof(T) < 16) {
183
0
        *reinterpret_cast<T*>(slot) = v;
184
0
    } else {
185
0
        DCHECK(sizeof(T) == 16);
186
0
        memcpy(slot, &v, sizeof(v));
187
0
    }
188
189
0
    return Status::OK();
190
0
}
_ZN5doris13get_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
161
2.60k
                     bool pure_doc_value) {
162
2.60k
    if (col.IsNumber()) {
163
2.60k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
164
2.60k
        return Status::OK();
165
2.60k
    }
166
167
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
168
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
169
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
170
0
        return Status::OK();
171
0
    }
172
173
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
174
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
175
176
0
    StringParser::ParseResult result;
177
0
    const std::string& val = col.GetString();
178
0
    size_t len = col.GetStringLength();
179
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
180
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
181
182
0
    if (sizeof(T) < 16) {
183
0
        *reinterpret_cast<T*>(slot) = v;
184
0
    } else {
185
0
        DCHECK(sizeof(T) == 16);
186
0
        memcpy(slot, &v, sizeof(v));
187
0
    }
188
189
0
    return Status::OK();
190
0
}
_ZN5doris13get_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
161
2.60k
                     bool pure_doc_value) {
162
2.60k
    if (col.IsNumber()) {
163
2.60k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
164
2.60k
        return Status::OK();
165
2.60k
    }
166
167
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
168
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
169
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
170
0
        return Status::OK();
171
0
    }
172
173
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
174
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
175
176
0
    StringParser::ParseResult result;
177
0
    const std::string& val = col.GetString();
178
0
    size_t len = col.GetStringLength();
179
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
180
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
181
182
0
    if (sizeof(T) < 16) {
183
0
        *reinterpret_cast<T*>(slot) = v;
184
0
    } else {
185
0
        DCHECK(sizeof(T) == 16);
186
0
        memcpy(slot, &v, sizeof(v));
187
0
    }
188
189
0
    return Status::OK();
190
0
}
_ZN5doris13get_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
161
2.90k
                     bool pure_doc_value) {
162
2.90k
    if (col.IsNumber()) {
163
2.90k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
164
2.90k
        return Status::OK();
165
2.90k
    }
166
167
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
168
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
169
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
170
0
        return Status::OK();
171
0
    }
172
173
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
174
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
175
176
0
    StringParser::ParseResult result;
177
0
    const std::string& val = col.GetString();
178
0
    size_t len = col.GetStringLength();
179
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
180
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
181
182
0
    if (sizeof(T) < 16) {
183
0
        *reinterpret_cast<T*>(slot) = v;
184
0
    } else {
185
0
        DCHECK(sizeof(T) == 16);
186
0
        memcpy(slot, &v, sizeof(v));
187
0
    }
188
189
0
    return Status::OK();
190
0
}
_ZN5doris13get_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
161
3.53k
                     bool pure_doc_value) {
162
3.53k
    if (col.IsNumber()) {
163
3.53k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
164
3.53k
        return Status::OK();
165
3.53k
    }
166
167
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
168
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
169
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
170
0
        return Status::OK();
171
0
    }
172
173
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
174
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
175
176
0
    StringParser::ParseResult result;
177
0
    const std::string& val = col.GetString();
178
0
    size_t len = col.GetStringLength();
179
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
180
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
181
182
0
    if (sizeof(T) < 16) {
183
0
        *reinterpret_cast<T*>(slot) = v;
184
0
    } else {
185
0
        DCHECK(sizeof(T) == 16);
186
0
        memcpy(slot, &v, sizeof(v));
187
0
    }
188
189
0
    return Status::OK();
190
0
}
_ZN5doris13get_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
161
1.66k
                     bool pure_doc_value) {
162
1.66k
    if (col.IsNumber()) {
163
1.66k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
164
1.66k
        return Status::OK();
165
1.66k
    }
166
167
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
168
0
        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
169
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
170
0
        return Status::OK();
171
0
    }
172
173
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
174
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
175
176
0
    StringParser::ParseResult result;
177
0
    const std::string& val = col.GetString();
178
0
    size_t len = col.GetStringLength();
179
0
    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
180
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
181
182
0
    if (sizeof(T) < 16) {
183
0
        *reinterpret_cast<T*>(slot) = v;
184
0
    } else {
185
0
        DCHECK(sizeof(T) == 16);
186
0
        memcpy(slot, &v, sizeof(v));
187
0
    }
188
189
0
    return Status::OK();
190
0
}
191
192
template <PrimitiveType T>
193
Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str,
194
                          typename PrimitiveTypeTraits<T>::CppType* slot,
195
4.38k
                          const cctz::time_zone& time_zone) {
196
4.38k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
197
4.38k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
198
4.38k
    if (is_date_str) {
199
4.15k
        const std::string str_date = col.GetString();
200
4.15k
        int str_length = col.GetStringLength();
201
4.15k
        bool success = false;
202
4.15k
        if (str_length > 19) {
203
602
            std::chrono::system_clock::time_point tp;
204
            // time_zone suffix pattern
205
            // Z/+08:00/-04:30
206
602
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
207
602
            bool ok = false;
208
602
            std::string fmt;
209
602
            re2::StringPiece value;
210
602
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
211
                // with time_zone info
212
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
213
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
214
486
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
215
486
                cctz::time_zone ctz;
216
                // find time_zone by time_zone suffix string
217
486
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
218
486
                ok = cctz::parse(fmt, str_date, ctz, &tp);
219
486
            } else {
220
                // without time_zone info
221
                // 2022-08-08T12:10:10.000
222
116
                fmt = "%Y-%m-%dT%H:%M:%E*S";
223
                // If the time without time_zone info, ES will assume it is UTC time.
224
                // So we parse it in Doris with UTC time zone.
225
116
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
226
116
            }
227
602
            if (ok) {
228
                // The local time zone can change by session variable `time_zone`
229
                // We should use the user specified time zone, not the actual system local time zone.
230
602
                success = true;
231
602
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
232
602
            }
233
3.54k
        } else if (str_length == 19) {
234
            // YYYY-MM-DDTHH:MM:SS
235
1.90k
            if (*(str_date.c_str() + 10) == 'T') {
236
40
                std::chrono::system_clock::time_point tp;
237
40
                const bool ok =
238
40
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
239
40
                if (ok) {
240
40
                    success = true;
241
40
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
242
40
                }
243
1.86k
            } else {
244
                // YYYY-MM-DD HH:MM:SS
245
1.86k
                CastParameters params;
246
1.86k
                if constexpr (is_datetime_v1) {
247
0
                    success = CastToDateOrDatetime::from_string_non_strict_mode<
248
0
                            DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
249
0
                                                           dt_val, nullptr, params);
250
0
                } else if constexpr (T == TYPE_DATEV2) {
251
0
                    success = CastToDateV2::from_string_non_strict_mode(
252
0
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
253
1.86k
                } else {
254
1.86k
                    success = CastToDatetimeV2::from_string_non_strict_mode(
255
1.86k
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
256
1.86k
                }
257
1.86k
            }
258
259
1.90k
        } else if (str_length == 13) {
260
            // string long like "1677895728000"
261
0
            int64_t time_long = std::atol(str_date.c_str());
262
0
            if (time_long > 0) {
263
0
                success = true;
264
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
265
0
            }
266
1.64k
        } else {
267
            // YYYY-MM-DD or others
268
1.64k
            CastParameters params;
269
1.64k
            if constexpr (is_datetime_v1) {
270
0
                success = CastToDateOrDatetime::from_string_non_strict_mode<
271
0
                        DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
272
0
                                                       dt_val, nullptr, params);
273
1.30k
            } else if constexpr (T == TYPE_DATEV2) {
274
1.30k
                success = CastToDateV2::from_string_non_strict_mode(
275
1.30k
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
276
1.30k
            } else {
277
344
                success = CastToDatetimeV2::from_string_non_strict_mode(
278
344
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
279
344
            }
280
1.64k
        }
281
282
4.15k
        if (!success) {
283
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
284
0
        }
285
286
4.15k
    } else {
287
236
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
288
236
    }
289
4.38k
    if constexpr (is_datetime_v1) {
290
0
        if (type == TYPE_DATE) {
291
0
            dt_val.cast_to_date();
292
0
        } else {
293
0
            dt_val.to_datetime();
294
0
        }
295
0
    }
296
297
4.38k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
298
4.38k
    return Status::OK();
299
4.38k
}
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
195
1.30k
                          const cctz::time_zone& time_zone) {
196
1.30k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
197
1.30k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
198
1.30k
    if (is_date_str) {
199
1.30k
        const std::string str_date = col.GetString();
200
1.30k
        int str_length = col.GetStringLength();
201
1.30k
        bool success = false;
202
1.30k
        if (str_length > 19) {
203
0
            std::chrono::system_clock::time_point tp;
204
            // time_zone suffix pattern
205
            // Z/+08:00/-04:30
206
0
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
207
0
            bool ok = false;
208
0
            std::string fmt;
209
0
            re2::StringPiece value;
210
0
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
211
                // with time_zone info
212
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
213
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
214
0
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
215
0
                cctz::time_zone ctz;
216
                // find time_zone by time_zone suffix string
217
0
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
218
0
                ok = cctz::parse(fmt, str_date, ctz, &tp);
219
0
            } else {
220
                // without time_zone info
221
                // 2022-08-08T12:10:10.000
222
0
                fmt = "%Y-%m-%dT%H:%M:%E*S";
223
                // If the time without time_zone info, ES will assume it is UTC time.
224
                // So we parse it in Doris with UTC time zone.
225
0
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
226
0
            }
227
0
            if (ok) {
228
                // The local time zone can change by session variable `time_zone`
229
                // We should use the user specified time zone, not the actual system local time zone.
230
0
                success = true;
231
0
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
232
0
            }
233
1.30k
        } else if (str_length == 19) {
234
            // YYYY-MM-DDTHH:MM:SS
235
0
            if (*(str_date.c_str() + 10) == 'T') {
236
0
                std::chrono::system_clock::time_point tp;
237
0
                const bool ok =
238
0
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
239
0
                if (ok) {
240
0
                    success = true;
241
0
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
242
0
                }
243
0
            } else {
244
                // YYYY-MM-DD HH:MM:SS
245
0
                CastParameters params;
246
                if constexpr (is_datetime_v1) {
247
                    success = CastToDateOrDatetime::from_string_non_strict_mode<
248
                            DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
249
                                                           dt_val, nullptr, params);
250
0
                } else if constexpr (T == TYPE_DATEV2) {
251
0
                    success = CastToDateV2::from_string_non_strict_mode(
252
0
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
253
                } else {
254
                    success = CastToDatetimeV2::from_string_non_strict_mode(
255
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
256
                }
257
0
            }
258
259
1.30k
        } else if (str_length == 13) {
260
            // string long like "1677895728000"
261
0
            int64_t time_long = std::atol(str_date.c_str());
262
0
            if (time_long > 0) {
263
0
                success = true;
264
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
265
0
            }
266
1.30k
        } else {
267
            // YYYY-MM-DD or others
268
1.30k
            CastParameters params;
269
            if constexpr (is_datetime_v1) {
270
                success = CastToDateOrDatetime::from_string_non_strict_mode<
271
                        DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
272
                                                       dt_val, nullptr, params);
273
1.30k
            } else if constexpr (T == TYPE_DATEV2) {
274
1.30k
                success = CastToDateV2::from_string_non_strict_mode(
275
1.30k
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
276
            } else {
277
                success = CastToDatetimeV2::from_string_non_strict_mode(
278
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
279
            }
280
1.30k
        }
281
282
1.30k
        if (!success) {
283
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
284
0
        }
285
286
1.30k
    } else {
287
0
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
288
0
    }
289
    if constexpr (is_datetime_v1) {
290
        if (type == TYPE_DATE) {
291
            dt_val.cast_to_date();
292
        } else {
293
            dt_val.to_datetime();
294
        }
295
    }
296
297
1.30k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
298
1.30k
    return Status::OK();
299
1.30k
}
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
195
3.08k
                          const cctz::time_zone& time_zone) {
196
3.08k
    constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME;
197
3.08k
    typename PrimitiveTypeTraits<T>::CppType dt_val;
198
3.08k
    if (is_date_str) {
199
2.85k
        const std::string str_date = col.GetString();
200
2.85k
        int str_length = col.GetStringLength();
201
2.85k
        bool success = false;
202
2.85k
        if (str_length > 19) {
203
602
            std::chrono::system_clock::time_point tp;
204
            // time_zone suffix pattern
205
            // Z/+08:00/-04:30
206
602
            RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)");
207
602
            bool ok = false;
208
602
            std::string fmt;
209
602
            re2::StringPiece value;
210
602
            if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) {
211
                // with time_zone info
212
                // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00
213
                // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00
214
486
                fmt = "%Y-%m-%dT%H:%M:%E*S%Ez";
215
486
                cctz::time_zone ctz;
216
                // find time_zone by time_zone suffix string
217
486
                TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz);
218
486
                ok = cctz::parse(fmt, str_date, ctz, &tp);
219
486
            } else {
220
                // without time_zone info
221
                // 2022-08-08T12:10:10.000
222
116
                fmt = "%Y-%m-%dT%H:%M:%E*S";
223
                // If the time without time_zone info, ES will assume it is UTC time.
224
                // So we parse it in Doris with UTC time zone.
225
116
                ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp);
226
116
            }
227
602
            if (ok) {
228
                // The local time zone can change by session variable `time_zone`
229
                // We should use the user specified time zone, not the actual system local time zone.
230
602
                success = true;
231
602
                dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
232
602
            }
233
2.24k
        } else if (str_length == 19) {
234
            // YYYY-MM-DDTHH:MM:SS
235
1.90k
            if (*(str_date.c_str() + 10) == 'T') {
236
40
                std::chrono::system_clock::time_point tp;
237
40
                const bool ok =
238
40
                        cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp);
239
40
                if (ok) {
240
40
                    success = true;
241
40
                    dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone);
242
40
                }
243
1.86k
            } else {
244
                // YYYY-MM-DD HH:MM:SS
245
1.86k
                CastParameters params;
246
                if constexpr (is_datetime_v1) {
247
                    success = CastToDateOrDatetime::from_string_non_strict_mode<
248
                            DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
249
                                                           dt_val, nullptr, params);
250
                } else if constexpr (T == TYPE_DATEV2) {
251
                    success = CastToDateV2::from_string_non_strict_mode(
252
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
253
1.86k
                } else {
254
1.86k
                    success = CastToDatetimeV2::from_string_non_strict_mode(
255
1.86k
                            {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
256
1.86k
                }
257
1.86k
            }
258
259
1.90k
        } else if (str_length == 13) {
260
            // string long like "1677895728000"
261
0
            int64_t time_long = std::atol(str_date.c_str());
262
0
            if (time_long > 0) {
263
0
                success = true;
264
0
                dt_val.from_unixtime(time_long / 1000, time_zone);
265
0
            }
266
344
        } else {
267
            // YYYY-MM-DD or others
268
344
            CastParameters params;
269
            if constexpr (is_datetime_v1) {
270
                success = CastToDateOrDatetime::from_string_non_strict_mode<
271
                        DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length},
272
                                                       dt_val, nullptr, params);
273
            } else if constexpr (T == TYPE_DATEV2) {
274
                success = CastToDateV2::from_string_non_strict_mode(
275
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params);
276
344
            } else {
277
344
                success = CastToDatetimeV2::from_string_non_strict_mode(
278
344
                        {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params);
279
344
            }
280
344
        }
281
282
2.85k
        if (!success) {
283
0
            RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
284
0
        }
285
286
2.85k
    } else {
287
236
        dt_val.from_unixtime(col.GetInt64() / 1000, time_zone);
288
236
    }
289
    if constexpr (is_datetime_v1) {
290
        if (type == TYPE_DATE) {
291
            dt_val.cast_to_date();
292
        } else {
293
            dt_val.to_datetime();
294
        }
295
    }
296
297
3.08k
    *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val);
298
3.08k
    return Status::OK();
299
3.08k
}
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
300
301
template <PrimitiveType T>
302
Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
303
                    typename PrimitiveTypeTraits<T>::CppType* slot,
304
4.38k
                    const cctz::time_zone& time_zone) {
305
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
306
4.38k
    if (col.IsNumber()) {
307
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
308
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
309
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
310
148
        return get_date_value_int<T>(col, type, false, slot, time_zone);
311
4.23k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
312
        // this would happened just only when `enable_docvalue_scan = true`
313
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
314
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
315
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
316
        // date field's docvalue
317
432
        if (col[0].IsString()) {
318
344
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
319
344
        }
320
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
321
88
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
322
3.80k
    } else {
323
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
324
3.80k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
325
3.80k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
326
3.80k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
327
3.80k
    }
328
4.38k
}
_ZN5doris12get_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
304
1.30k
                    const cctz::time_zone& time_zone) {
305
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
306
1.30k
    if (col.IsNumber()) {
307
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
308
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
309
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
310
0
        return get_date_value_int<T>(col, type, false, slot, time_zone);
311
1.30k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
312
        // this would happened just only when `enable_docvalue_scan = true`
313
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
314
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
315
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
316
        // date field's docvalue
317
0
        if (col[0].IsString()) {
318
0
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
319
0
        }
320
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
321
0
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
322
1.30k
    } else {
323
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
324
1.30k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
325
1.30k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
326
1.30k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
327
1.30k
    }
328
1.30k
}
_ZN5doris12get_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Line
Count
Source
304
3.08k
                    const cctz::time_zone& time_zone) {
305
    // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
306
3.08k
    if (col.IsNumber()) {
307
        // ES process date/datetime field would use millisecond timestamp for index or docvalue
308
        // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
309
        // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
310
148
        return get_date_value_int<T>(col, type, false, slot, time_zone);
311
2.93k
    } else if (col.IsArray() && pure_doc_value && !col.Empty()) {
312
        // this would happened just only when `enable_docvalue_scan = true`
313
        // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
314
        // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
315
        // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
316
        // date field's docvalue
317
432
        if (col[0].IsString()) {
318
344
            return get_date_value_int<T>(col[0], type, true, slot, time_zone);
319
344
        }
320
        // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
321
88
        return get_date_value_int<T>(col[0], type, false, slot, time_zone);
322
2.50k
    } else {
323
        // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
324
2.50k
        RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
325
2.50k
        RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
326
2.50k
        return get_date_value_int<T>(col, type, true, slot, time_zone);
327
2.50k
    }
328
3.08k
}
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE
329
template <PrimitiveType T>
330
Status fill_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value,
331
1.78k
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
332
1.78k
    typename PrimitiveTypeTraits<T>::CppType data;
333
1.78k
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
334
1.78k
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
335
1.78k
    return Status::OK();
336
1.78k
}
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
_ZN5doris13fill_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE
Line
Count
Source
331
1.78k
                     IColumn* col_ptr, const cctz::time_zone& time_zone) {
332
1.78k
    typename PrimitiveTypeTraits<T>::CppType data;
333
1.78k
    RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone)));
334
1.78k
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
335
1.78k
    return Status::OK();
336
1.78k
}
337
338
template <typename T>
339
Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
340
10.4k
                       bool pure_doc_value) {
341
10.4k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
342
10.4k
    if (col.IsNumber()) {
343
10.4k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
344
10.4k
        return Status::OK();
345
10.4k
    }
346
347
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
348
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
349
0
        return Status::OK();
350
0
    }
351
352
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
353
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
354
355
0
    StringParser::ParseResult result;
356
0
    const std::string& val = col.GetString();
357
0
    size_t len = col.GetStringLength();
358
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
359
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
360
0
    *reinterpret_cast<T*>(slot) = v;
361
362
0
    return Status::OK();
363
0
}
_ZN5doris15get_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
340
5.20k
                       bool pure_doc_value) {
341
5.20k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
342
5.20k
    if (col.IsNumber()) {
343
5.20k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
344
5.20k
        return Status::OK();
345
5.20k
    }
346
347
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
348
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
349
0
        return Status::OK();
350
0
    }
351
352
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
353
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
354
355
0
    StringParser::ParseResult result;
356
0
    const std::string& val = col.GetString();
357
0
    size_t len = col.GetStringLength();
358
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
359
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
360
0
    *reinterpret_cast<T*>(slot) = v;
361
362
0
    return Status::OK();
363
0
}
_ZN5doris15get_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb
Line
Count
Source
340
5.20k
                       bool pure_doc_value) {
341
5.20k
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
342
5.20k
    if (col.IsNumber()) {
343
5.20k
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
344
5.20k
        return Status::OK();
345
5.20k
    }
346
347
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
348
0
        *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
349
0
        return Status::OK();
350
0
    }
351
352
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
353
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
354
355
0
    StringParser::ParseResult result;
356
0
    const std::string& val = col.GetString();
357
0
    size_t len = col.GetStringLength();
358
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
359
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
360
0
    *reinterpret_cast<T*>(slot) = v;
361
362
0
    return Status::OK();
363
0
}
364
365
template <typename T>
366
Status insert_float_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr,
367
344
                          bool pure_doc_value, bool nullable) {
368
344
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
369
344
    if (col.IsNumber() && nullable) {
370
344
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
371
344
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
372
344
        return Status::OK();
373
344
    }
374
375
0
    if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) {
376
0
        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
377
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
378
0
        return Status::OK();
379
0
    }
380
381
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
382
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
383
384
0
    StringParser::ParseResult result;
385
0
    const std::string& val = col.GetString();
386
0
    size_t len = col.GetStringLength();
387
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
388
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
389
390
0
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
391
392
0
    return Status::OK();
393
0
}
_ZN5doris18insert_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
367
344
                          bool pure_doc_value, bool nullable) {
368
344
    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
369
344
    if (col.IsNumber() && nullable) {
370
344
        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
371
344
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
372
344
        return Status::OK();
373
344
    }
374
375
0
    if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) {
376
0
        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
377
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
378
0
        return Status::OK();
379
0
    }
380
381
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
382
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
383
384
0
    StringParser::ParseResult result;
385
0
    const std::string& val = col.GetString();
386
0
    size_t len = col.GetStringLength();
387
0
    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
388
0
    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
389
390
0
    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
391
392
0
    return Status::OK();
393
0
}
Unexecuted instantiation: _ZN5doris18insert_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
394
395
template <typename T>
396
Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr,
397
964
                        bool pure_doc_value, bool nullable) {
398
964
    if (col.IsNumber()) {
399
572
        T value;
400
        // ES allows inserting float and double in int/long types.
401
        // To parse these numbers in Doris, we direct cast them to int types.
402
572
        if (col.IsDouble()) {
403
152
            value = static_cast<T>(col.GetDouble());
404
420
        } else if (col.IsFloat()) {
405
0
            value = static_cast<T>(col.GetFloat());
406
420
        } else {
407
420
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
408
420
        }
409
572
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
410
572
        return Status::OK();
411
572
    }
412
413
392
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
414
220
        StringParser::ParseResult result;
415
220
        std::string val = col_value.GetString();
416
        // ES allows inserting numbers and characters containing decimals in numeric types.
417
        // To parse these numbers in Doris, we remove the decimals here.
418
220
        size_t pos = val.find('.');
419
220
        if (pos != std::string::npos) {
420
84
            val = val.substr(0, pos);
421
84
        }
422
220
        size_t len = val.length();
423
220
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
424
220
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
425
426
220
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
427
220
        return Status::OK();
428
220
    };
Unexecuted instantiation: _ZZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Unexecuted instantiation: _ZZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Unexecuted instantiation: _ZZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
_ZZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
Line
Count
Source
413
220
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
414
220
        StringParser::ParseResult result;
415
220
        std::string val = col_value.GetString();
416
        // ES allows inserting numbers and characters containing decimals in numeric types.
417
        // To parse these numbers in Doris, we remove the decimals here.
418
220
        size_t pos = val.find('.');
419
220
        if (pos != std::string::npos) {
420
84
            val = val.substr(0, pos);
421
84
        }
422
220
        size_t len = val.length();
423
220
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
424
220
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
425
426
220
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
427
220
        return Status::OK();
428
220
    };
Unexecuted instantiation: _ZZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_
429
430
392
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
431
276
        if (col[0].IsNumber()) {
432
172
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
433
172
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
434
172
            return Status::OK();
435
172
        } else {
436
104
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
437
104
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
438
104
            return parse_and_insert_data(col[0]);
439
104
        }
440
276
    }
441
442
116
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
443
116
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
444
116
    return parse_and_insert_data(col);
445
116
}
Unexecuted instantiation: _ZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Unexecuted instantiation: _ZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
_ZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
397
44
                        bool pure_doc_value, bool nullable) {
398
44
    if (col.IsNumber()) {
399
44
        T value;
400
        // ES allows inserting float and double in int/long types.
401
        // To parse these numbers in Doris, we direct cast them to int types.
402
44
        if (col.IsDouble()) {
403
0
            value = static_cast<T>(col.GetDouble());
404
44
        } else if (col.IsFloat()) {
405
0
            value = static_cast<T>(col.GetFloat());
406
44
        } else {
407
44
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
408
44
        }
409
44
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
410
44
        return Status::OK();
411
44
    }
412
413
0
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
414
0
        StringParser::ParseResult result;
415
0
        std::string val = col_value.GetString();
416
        // ES allows inserting numbers and characters containing decimals in numeric types.
417
        // To parse these numbers in Doris, we remove the decimals here.
418
0
        size_t pos = val.find('.');
419
0
        if (pos != std::string::npos) {
420
0
            val = val.substr(0, pos);
421
0
        }
422
0
        size_t len = val.length();
423
0
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
424
0
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
425
426
0
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
427
0
        return Status::OK();
428
0
    };
429
430
0
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
431
0
        if (col[0].IsNumber()) {
432
0
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
433
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
434
0
            return Status::OK();
435
0
        } else {
436
0
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
437
0
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
438
0
            return parse_and_insert_data(col[0]);
439
0
        }
440
0
    }
441
442
0
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
443
0
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
444
0
    return parse_and_insert_data(col);
445
0
}
_ZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
Line
Count
Source
397
920
                        bool pure_doc_value, bool nullable) {
398
920
    if (col.IsNumber()) {
399
528
        T value;
400
        // ES allows inserting float and double in int/long types.
401
        // To parse these numbers in Doris, we direct cast them to int types.
402
528
        if (col.IsDouble()) {
403
152
            value = static_cast<T>(col.GetDouble());
404
376
        } else if (col.IsFloat()) {
405
0
            value = static_cast<T>(col.GetFloat());
406
376
        } else {
407
376
            value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
408
376
        }
409
528
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
410
528
        return Status::OK();
411
528
    }
412
413
392
    auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status {
414
392
        StringParser::ParseResult result;
415
392
        std::string val = col_value.GetString();
416
        // ES allows inserting numbers and characters containing decimals in numeric types.
417
        // To parse these numbers in Doris, we remove the decimals here.
418
392
        size_t pos = val.find('.');
419
392
        if (pos != std::string::npos) {
420
392
            val = val.substr(0, pos);
421
392
        }
422
392
        size_t len = val.length();
423
392
        T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
424
392
        RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type);
425
426
392
        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
427
392
        return Status::OK();
428
392
    };
429
430
392
    if (pure_doc_value && col.IsArray() && !col.Empty()) {
431
276
        if (col[0].IsNumber()) {
432
172
            T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
433
172
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
434
172
            return Status::OK();
435
172
        } else {
436
104
            RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true);
437
104
            RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type);
438
104
            return parse_and_insert_data(col[0]);
439
104
        }
440
276
    }
441
442
116
    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
443
116
    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
444
116
    return parse_and_insert_data(col);
445
116
}
Unexecuted instantiation: _ZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb
446
447
template <PrimitiveType T>
448
Status handle_value(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
449
31.8k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
13.3k
                  T == TYPE_LARGEINT) {
452
13.3k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
13.3k
                                                                                pure_doc_value));
454
13.3k
        return Status::OK();
455
13.3k
    }
456
5.20k
    if constexpr (T == TYPE_FLOAT) {
457
5.20k
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
5.20k
        return Status::OK();
459
5.20k
    }
460
5.20k
    if constexpr (T == TYPE_DOUBLE) {
461
5.20k
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
5.20k
        return Status::OK();
463
5.20k
    }
464
5.50k
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
5.50k
        if (col.IsArray()) {
469
0
            val = json_value_to_string(col);
470
5.50k
        } else if (!col.IsString()) {
471
0
            val = json_value_to_string(col);
472
5.50k
        } else {
473
5.50k
            val = col.GetString();
474
5.50k
        }
475
5.50k
        return Status::OK();
476
5.50k
    }
477
2.60k
    if constexpr (T == TYPE_BOOLEAN) {
478
2.60k
        if (col.IsBool()) {
479
2.60k
            val = col.GetBool();
480
2.60k
            return Status::OK();
481
2.60k
        }
482
483
0
        if (col.IsNumber()) {
484
0
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
0
            return Status::OK();
486
0
        }
487
488
0
        bool is_nested_str = false;
489
0
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
0
            val = col[0].GetBool();
491
0
            return Status::OK();
492
0
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
0
            is_nested_str = true;
494
0
        } else if (pure_doc_value && col.IsArray()) {
495
0
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
0
        }
497
498
0
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
0
        const std::string& str_val = str_col.GetString();
500
0
        size_t val_size = str_col.GetStringLength();
501
0
        StringParser::ParseResult result;
502
0
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
0
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
0
        return Status::OK();
505
0
    }
506
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
31.8k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
5.50k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
                  T == TYPE_LARGEINT) {
452
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
                                                                                pure_doc_value));
454
        return Status::OK();
455
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
5.50k
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
5.50k
        if (col.IsArray()) {
469
0
            val = json_value_to_string(col);
470
5.50k
        } else if (!col.IsString()) {
471
0
            val = json_value_to_string(col);
472
5.50k
        } else {
473
5.50k
            val = col.GetString();
474
5.50k
        }
475
5.50k
        return Status::OK();
476
5.50k
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
5.50k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
5.50k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
2.60k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
2.60k
                  T == TYPE_LARGEINT) {
452
2.60k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
2.60k
                                                                                pure_doc_value));
454
2.60k
        return Status::OK();
455
2.60k
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
2.60k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
2.60k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
2.60k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
2.60k
                  T == TYPE_LARGEINT) {
452
2.60k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
2.60k
                                                                                pure_doc_value));
454
2.60k
        return Status::OK();
455
2.60k
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
2.60k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
2.60k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
2.90k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
2.90k
                  T == TYPE_LARGEINT) {
452
2.90k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
2.90k
                                                                                pure_doc_value));
454
2.90k
        return Status::OK();
455
2.90k
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
2.90k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
2.90k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
3.53k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
3.53k
                  T == TYPE_LARGEINT) {
452
3.53k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
3.53k
                                                                                pure_doc_value));
454
3.53k
        return Status::OK();
455
3.53k
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
3.53k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
3.53k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
1.66k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
1.66k
                  T == TYPE_LARGEINT) {
452
1.66k
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
1.66k
                                                                                pure_doc_value));
454
1.66k
        return Status::OK();
455
1.66k
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
1.66k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
1.66k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
5.20k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
                  T == TYPE_LARGEINT) {
452
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
                                                                                pure_doc_value));
454
        return Status::OK();
455
    }
456
5.20k
    if constexpr (T == TYPE_FLOAT) {
457
5.20k
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
5.20k
        return Status::OK();
459
5.20k
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
5.20k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
5.20k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
5.20k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
                  T == TYPE_LARGEINT) {
452
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
                                                                                pure_doc_value));
454
        return Status::OK();
455
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
5.20k
    if constexpr (T == TYPE_DOUBLE) {
461
5.20k
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
5.20k
        return Status::OK();
463
5.20k
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
    if constexpr (T == TYPE_BOOLEAN) {
478
        if (col.IsBool()) {
479
            val = col.GetBool();
480
            return Status::OK();
481
        }
482
483
        if (col.IsNumber()) {
484
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
            return Status::OK();
486
        }
487
488
        bool is_nested_str = false;
489
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
            val = col[0].GetBool();
491
            return Status::OK();
492
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
            is_nested_str = true;
494
        } else if (pure_doc_value && col.IsArray()) {
495
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
        }
497
498
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
        const std::string& str_val = str_col.GetString();
500
        size_t val_size = str_col.GetStringLength();
501
        StringParser::ParseResult result;
502
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
        return Status::OK();
505
    }
506
5.20k
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
5.20k
}
_ZN5doris12handle_valueILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE
Line
Count
Source
449
2.60k
                    typename PrimitiveTypeTraits<T>::CppType& val) {
450
    if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT ||
451
                  T == TYPE_LARGEINT) {
452
        RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val,
453
                                                                                pure_doc_value));
454
        return Status::OK();
455
    }
456
    if constexpr (T == TYPE_FLOAT) {
457
        RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value));
458
        return Status::OK();
459
    }
460
    if constexpr (T == TYPE_DOUBLE) {
461
        RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value));
462
        return Status::OK();
463
    }
464
    if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) {
465
        // When ES mapping is keyword/text but actual data is an array,
466
        // serialize the array to JSON string instead of throwing an error.
467
        // This is valid in ES since any field can hold array values.
468
        if (col.IsArray()) {
469
            val = json_value_to_string(col);
470
        } else if (!col.IsString()) {
471
            val = json_value_to_string(col);
472
        } else {
473
            val = col.GetString();
474
        }
475
        return Status::OK();
476
    }
477
2.60k
    if constexpr (T == TYPE_BOOLEAN) {
478
2.60k
        if (col.IsBool()) {
479
2.60k
            val = col.GetBool();
480
2.60k
            return Status::OK();
481
2.60k
        }
482
483
0
        if (col.IsNumber()) {
484
0
            val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt());
485
0
            return Status::OK();
486
0
        }
487
488
0
        bool is_nested_str = false;
489
0
        if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
490
0
            val = col[0].GetBool();
491
0
            return Status::OK();
492
0
        } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
493
0
            is_nested_str = true;
494
0
        } else if (pure_doc_value && col.IsArray()) {
495
0
            return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
496
0
        }
497
498
0
        const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
499
0
        const std::string& str_val = str_col.GetString();
500
0
        size_t val_size = str_col.GetStringLength();
501
0
        StringParser::ParseResult result;
502
0
        val = StringParser::string_to_bool(str_val.c_str(), val_size, &result);
503
0
        RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type);
504
0
        return Status::OK();
505
0
    }
506
0
    throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T));
507
2.60k
}
508
509
template <PrimitiveType T>
510
Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type,
511
31.8k
                             bool pure_doc_value, Array& array) {
512
31.8k
    typename PrimitiveTypeTraits<T>::CppType val;
513
31.8k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
31.8k
    array.push_back(Field::create_field<T>(val));
515
31.8k
    return Status::OK();
516
31.8k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
5.50k
                             bool pure_doc_value, Array& array) {
512
5.50k
    typename PrimitiveTypeTraits<T>::CppType val;
513
5.50k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
5.50k
    array.push_back(Field::create_field<T>(val));
515
5.50k
    return Status::OK();
516
5.50k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
2.60k
                             bool pure_doc_value, Array& array) {
512
2.60k
    typename PrimitiveTypeTraits<T>::CppType val;
513
2.60k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
2.60k
    array.push_back(Field::create_field<T>(val));
515
2.60k
    return Status::OK();
516
2.60k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
2.60k
                             bool pure_doc_value, Array& array) {
512
2.60k
    typename PrimitiveTypeTraits<T>::CppType val;
513
2.60k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
2.60k
    array.push_back(Field::create_field<T>(val));
515
2.60k
    return Status::OK();
516
2.60k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
2.90k
                             bool pure_doc_value, Array& array) {
512
2.90k
    typename PrimitiveTypeTraits<T>::CppType val;
513
2.90k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
2.90k
    array.push_back(Field::create_field<T>(val));
515
2.90k
    return Status::OK();
516
2.90k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
3.53k
                             bool pure_doc_value, Array& array) {
512
3.53k
    typename PrimitiveTypeTraits<T>::CppType val;
513
3.53k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
3.53k
    array.push_back(Field::create_field<T>(val));
515
3.53k
    return Status::OK();
516
3.53k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
1.66k
                             bool pure_doc_value, Array& array) {
512
1.66k
    typename PrimitiveTypeTraits<T>::CppType val;
513
1.66k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
1.66k
    array.push_back(Field::create_field<T>(val));
515
1.66k
    return Status::OK();
516
1.66k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
5.20k
                             bool pure_doc_value, Array& array) {
512
5.20k
    typename PrimitiveTypeTraits<T>::CppType val;
513
5.20k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
5.20k
    array.push_back(Field::create_field<T>(val));
515
5.20k
    return Status::OK();
516
5.20k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
5.20k
                             bool pure_doc_value, Array& array) {
512
5.20k
    typename PrimitiveTypeTraits<T>::CppType val;
513
5.20k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
5.20k
    array.push_back(Field::create_field<T>(val));
515
5.20k
    return Status::OK();
516
5.20k
}
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
511
2.60k
                             bool pure_doc_value, Array& array) {
512
2.60k
    typename PrimitiveTypeTraits<T>::CppType val;
513
2.60k
    RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val));
514
2.60k
    array.push_back(Field::create_field<T>(val));
515
2.60k
    return Status::OK();
516
2.60k
}
517
518
template <PrimitiveType T>
519
Status process_column_array(const rapidjson::Value& col, PrimitiveType sub_type,
520
8.64k
                            bool pure_doc_value, Array& array) {
521
31.7k
    for (const auto& sub_col : col.GetArray()) {
522
31.7k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
31.7k
    }
524
8.64k
    return Status::OK();
525
8.64k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
2.04k
                            bool pure_doc_value, Array& array) {
521
5.45k
    for (const auto& sub_col : col.GetArray()) {
522
5.45k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
5.45k
    }
524
2.04k
    return Status::OK();
525
2.04k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
650
                            bool pure_doc_value, Array& array) {
521
2.60k
    for (const auto& sub_col : col.GetArray()) {
522
2.60k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
2.60k
    }
524
650
    return Status::OK();
525
650
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
650
                            bool pure_doc_value, Array& array) {
521
2.60k
    for (const auto& sub_col : col.GetArray()) {
522
2.60k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
2.60k
    }
524
650
    return Status::OK();
525
650
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
746
                            bool pure_doc_value, Array& array) {
521
2.85k
    for (const auto& sub_col : col.GetArray()) {
522
2.85k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
2.85k
    }
524
746
    return Status::OK();
525
746
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
884
                            bool pure_doc_value, Array& array) {
521
3.53k
    for (const auto& sub_col : col.GetArray()) {
522
3.53k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
3.53k
    }
524
884
    return Status::OK();
525
884
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
416
                            bool pure_doc_value, Array& array) {
521
1.66k
    for (const auto& sub_col : col.GetArray()) {
522
1.66k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
1.66k
    }
524
416
    return Status::OK();
525
416
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
1.30k
                            bool pure_doc_value, Array& array) {
521
5.20k
    for (const auto& sub_col : col.GetArray()) {
522
5.20k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
5.20k
    }
524
1.30k
    return Status::OK();
525
1.30k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
1.30k
                            bool pure_doc_value, Array& array) {
521
5.20k
    for (const auto& sub_col : col.GetArray()) {
522
5.20k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
5.20k
    }
524
1.30k
    return Status::OK();
525
1.30k
}
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
520
650
                            bool pure_doc_value, Array& array) {
521
2.60k
    for (const auto& sub_col : col.GetArray()) {
522
2.60k
        RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array));
523
2.60k
    }
524
650
    return Status::OK();
525
650
}
526
527
template <PrimitiveType T>
528
Status process_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
529
8.73k
                      Array& array) {
530
8.73k
    if (!col.IsArray()) {
531
96
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
8.64k
    } else {
533
8.64k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
8.64k
    }
535
8.73k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
2.09k
                      Array& array) {
530
2.09k
    if (!col.IsArray()) {
531
48
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
2.04k
    } else {
533
2.04k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
2.04k
    }
535
2.09k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
650
                      Array& array) {
530
650
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
650
    } else {
533
650
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
650
    }
535
650
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
650
                      Array& array) {
530
650
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
650
    } else {
533
650
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
650
    }
535
650
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
794
                      Array& array) {
530
794
    if (!col.IsArray()) {
531
48
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
746
    } else {
533
746
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
746
    }
535
794
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
884
                      Array& array) {
530
884
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
884
    } else {
533
884
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
884
    }
535
884
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
416
                      Array& array) {
530
416
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
416
    } else {
533
416
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
416
    }
535
416
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
1.30k
                      Array& array) {
530
1.30k
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
1.30k
    } else {
533
1.30k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
1.30k
    }
535
1.30k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
1.30k
                      Array& array) {
530
1.30k
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
1.30k
    } else {
533
1.30k
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
1.30k
    }
535
1.30k
}
_ZN5doris14process_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE
Line
Count
Source
529
650
                      Array& array) {
530
650
    if (!col.IsArray()) {
531
0
        return process_single_column<T>(col, sub_type, pure_doc_value, array);
532
650
    } else {
533
650
        return process_column_array<T>(col, sub_type, pure_doc_value, array);
534
650
    }
535
650
}
536
537
template <PrimitiveType T>
538
Status process_date_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value,
539
1.30k
                           Array& array, const cctz::time_zone& time_zone) {
540
1.30k
    if (!col.IsArray()) {
541
0
        typename PrimitiveTypeTraits<T>::CppType data;
542
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
543
0
        array.push_back(Field::create_field<T>(data));
544
1.30k
    } else {
545
2.60k
        for (const auto& sub_col : col.GetArray()) {
546
2.60k
            typename PrimitiveTypeTraits<T>::CppType data;
547
2.60k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
548
2.60k
            array.push_back(Field::create_field<T>(data));
549
2.60k
        }
550
1.30k
    }
551
1.30k
    return Status::OK();
552
1.30k
}
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE
Line
Count
Source
539
650
                           Array& array, const cctz::time_zone& time_zone) {
540
650
    if (!col.IsArray()) {
541
0
        typename PrimitiveTypeTraits<T>::CppType data;
542
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
543
0
        array.push_back(Field::create_field<T>(data));
544
650
    } else {
545
1.30k
        for (const auto& sub_col : col.GetArray()) {
546
1.30k
            typename PrimitiveTypeTraits<T>::CppType data;
547
1.30k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
548
1.30k
            array.push_back(Field::create_field<T>(data));
549
1.30k
        }
550
650
    }
551
650
    return Status::OK();
552
650
}
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE
Line
Count
Source
539
650
                           Array& array, const cctz::time_zone& time_zone) {
540
650
    if (!col.IsArray()) {
541
0
        typename PrimitiveTypeTraits<T>::CppType data;
542
0
        RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone)));
543
0
        array.push_back(Field::create_field<T>(data));
544
650
    } else {
545
1.30k
        for (const auto& sub_col : col.GetArray()) {
546
1.30k
            typename PrimitiveTypeTraits<T>::CppType data;
547
1.30k
            RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone)));
548
1.30k
            array.push_back(Field::create_field<T>(data));
549
1.30k
        }
550
650
    }
551
650
    return Status::OK();
552
650
}
553
554
Status process_jsonb_column(const rapidjson::Value& col, PrimitiveType sub_type,
555
144
                            bool pure_doc_value, Array& array) {
556
144
    if (!col.IsArray()) {
557
0
        JsonBinaryValue jsonb_value;
558
0
        RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col)));
559
0
        JsonbField json(jsonb_value.value(), jsonb_value.size());
560
0
        array.push_back(Field::create_field<TYPE_JSONB>(std::move(json)));
561
144
    } else {
562
304
        for (const auto& sub_col : col.GetArray()) {
563
304
            JsonBinaryValue jsonb_value;
564
304
            RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(sub_col)));
565
304
            JsonbField json(jsonb_value.value(), jsonb_value.size());
566
304
            array.push_back(Field::create_field<TYPE_JSONB>(json));
567
304
        }
568
144
    }
569
144
    return Status::OK();
570
144
}
571
572
Status ScrollParser::parse_column(const rapidjson::Value& col, PrimitiveType sub_type,
573
                                  bool pure_doc_value, Array& array,
574
10.1k
                                  const cctz::time_zone& time_zone) {
575
10.1k
    switch (sub_type) {
576
0
    case TYPE_CHAR:
577
0
    case TYPE_VARCHAR:
578
2.09k
    case TYPE_STRING:
579
2.09k
        return process_column<TYPE_STRING>(col, sub_type, pure_doc_value, array);
580
650
    case TYPE_TINYINT:
581
650
        return process_column<TYPE_TINYINT>(col, sub_type, pure_doc_value, array);
582
650
    case TYPE_SMALLINT:
583
650
        return process_column<TYPE_SMALLINT>(col, sub_type, pure_doc_value, array);
584
794
    case TYPE_INT:
585
794
        return process_column<TYPE_INT>(col, sub_type, pure_doc_value, array);
586
884
    case TYPE_BIGINT:
587
884
        return process_column<TYPE_BIGINT>(col, sub_type, pure_doc_value, array);
588
416
    case TYPE_LARGEINT:
589
416
        return process_column<TYPE_LARGEINT>(col, sub_type, pure_doc_value, array);
590
1.30k
    case TYPE_FLOAT:
591
1.30k
        return process_column<TYPE_FLOAT>(col, sub_type, pure_doc_value, array);
592
1.30k
    case TYPE_DOUBLE:
593
1.30k
        return process_column<TYPE_DOUBLE>(col, sub_type, pure_doc_value, array);
594
650
    case TYPE_BOOLEAN:
595
650
        return process_column<TYPE_BOOLEAN>(col, sub_type, pure_doc_value, array);
596
    // date/datetime v2 is the default type for catalog table,
597
    // see https://github.com/apache/doris/pull/16304
598
    // No need to support date and datetime types.
599
650
    case TYPE_DATEV2: {
600
650
        return process_date_column<TYPE_DATEV2>(col, sub_type, pure_doc_value, array, time_zone);
601
0
    }
602
650
    case TYPE_DATETIMEV2: {
603
650
        return process_date_column<TYPE_DATETIMEV2>(col, sub_type, pure_doc_value, array,
604
650
                                                    time_zone);
605
0
    }
606
144
    case TYPE_JSONB: {
607
144
        return process_jsonb_column(col, sub_type, pure_doc_value, array);
608
0
    }
609
0
    default:
610
0
        LOG(ERROR) << "Do not support Array type: " << sub_type;
611
0
        return Status::InternalError("Unsupported type");
612
10.1k
    }
613
10.1k
}
614
615
640
ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {}
616
617
640
ScrollParser::~ScrollParser() = default;
618
619
640
Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) {
620
    // rely on `_size !=0 ` to determine whether scroll ends
621
640
    _size = 0;
622
640
    _document_node.Parse(scroll_result.c_str(), scroll_result.length());
623
640
    if (_document_node.HasParseError()) {
624
0
        return Status::InternalError("Parsing json error, json is: {}", scroll_result);
625
0
    }
626
627
640
    if (!exactly_once && !_document_node.HasMember(FIELD_SCROLL_ID)) {
628
0
        LOG(WARNING) << "Document has not a scroll id field scroll response:" << scroll_result;
629
0
        return Status::InternalError("Document has not a scroll id field");
630
0
    }
631
632
640
    if (!exactly_once) {
633
640
        const rapidjson::Value& scroll_node = _document_node[FIELD_SCROLL_ID];
634
640
        _scroll_id = scroll_node.GetString();
635
640
    }
636
    // { hits: { total : 2, "hits" : [ {}, {}, {} ]}}
637
640
    const rapidjson::Value& outer_hits_node = _document_node[FIELD_HITS];
638
    // if has no inner hits, there has no data in this index
639
640
    if (!outer_hits_node.HasMember(FIELD_INNER_HITS)) {
640
4
        return Status::OK();
641
4
    }
642
636
    const rapidjson::Value& inner_hits_node = outer_hits_node[FIELD_INNER_HITS];
643
    // this happened just the end of scrolling
644
636
    if (!inner_hits_node.IsArray()) {
645
0
        return Status::OK();
646
0
    }
647
636
    _inner_hits_node.CopyFrom(inner_hits_node, _document_node.GetAllocator());
648
    // how many documents contains in this batch
649
636
    _size = _inner_hits_node.Size();
650
636
    return Status::OK();
651
636
}
652
653
1.27k
int ScrollParser::get_size() const {
654
1.27k
    return _size;
655
1.27k
}
656
657
640
const std::string& ScrollParser::get_scroll_id() {
658
640
    return _scroll_id;
659
640
}
660
661
Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
662
                                  std::vector<MutableColumnPtr>& columns, bool* line_eof,
663
                                  const std::map<std::string, std::string>& docvalue_context,
664
2.82k
                                  const cctz::time_zone& time_zone) {
665
2.82k
    *line_eof = true;
666
667
2.82k
    if (_size <= 0 || _line_index >= _size) {
668
636
        return Status::OK();
669
636
    }
670
671
2.18k
    const rapidjson::Value& obj = _inner_hits_node[_line_index++];
672
2.18k
    bool pure_doc_value = false;
673
2.18k
    if (obj.HasMember("fields")) {
674
712
        pure_doc_value = true;
675
712
    }
676
    // obj may be neither have `_source` nor `fields` field.
677
2.18k
    const rapidjson::Value* line = nullptr;
678
2.18k
    if (obj.HasMember(FIELD_SOURCE)) {
679
1.46k
        line = &obj[FIELD_SOURCE];
680
1.46k
    } else if (obj.HasMember("fields")) {
681
712
        line = &obj["fields"];
682
712
    }
683
684
19.0k
    for (int i = 0; i < tuple_desc->slots().size(); ++i) {
685
16.8k
        const SlotDescriptor* slot_desc = tuple_desc->slots()[i];
686
16.8k
        auto* col_ptr = columns[i].get();
687
688
16.8k
        if (slot_desc->col_name() == FIELD_ID) {
689
            // actually this branch will not be reached, this is guaranteed by Doris FE.
690
0
            if (pure_doc_value) {
691
0
                return Status::RuntimeError("obtain `_id` is not supported in doc_values mode");
692
0
            }
693
            // obj[FIELD_ID] must not be NULL
694
0
            std::string _id = obj[FIELD_ID].GetString();
695
0
            size_t len = _id.length();
696
697
0
            col_ptr->insert_data(const_cast<const char*>(_id.data()), len);
698
0
            continue;
699
0
        }
700
701
16.8k
        const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str()
702
16.8k
                                              : slot_desc->col_name().c_str();
703
704
16.8k
        if (line == nullptr || line->FindMember(col_name) == line->MemberEnd()) {
705
554
            if (slot_desc->is_nullable()) {
706
554
                auto* nullable_column = reinterpret_cast<ColumnNullable*>(col_ptr);
707
554
                nullable_column->insert_data(nullptr, 0);
708
554
                continue;
709
554
            } else {
710
0
                std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name);
711
0
                return Status::RuntimeError(details);
712
0
            }
713
554
        }
714
715
16.2k
        const rapidjson::Value& col = (*line)[col_name];
716
717
16.2k
        auto type = slot_desc->type()->get_primitive_type();
718
719
        // when the column value is null, the subsequent type casting will report an error
720
16.2k
        if (col.IsNull() && slot_desc->is_nullable()) {
721
4
            col_ptr->insert_data(nullptr, 0);
722
4
            continue;
723
16.2k
        } else if (col.IsNull() && !slot_desc->is_nullable()) {
724
0
            std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name);
725
0
            return Status::RuntimeError(details);
726
0
        }
727
16.2k
        switch (type) {
728
0
        case TYPE_CHAR:
729
0
        case TYPE_VARCHAR:
730
1.88k
        case TYPE_STRING: {
731
            // sometimes elasticsearch user post some not-string value to Elasticsearch Index.
732
            // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
733
            // this may be a tricky, but we can work around this issue
734
1.88k
            std::string val;
735
1.88k
            if (pure_doc_value) {
736
4
                if (col.Empty()) {
737
0
                    break;
738
4
                } else if (col.Size() > 1) {
739
                    // doc_values with multiple elements means actual array data
740
                    // in ES keyword/text field, serialize as JSON array string
741
0
                    val = json_value_to_string(col);
742
4
                } else if (!col[0].IsString()) {
743
0
                    val = json_value_to_string(col[0]);
744
4
                } else {
745
4
                    val = col[0].GetString();
746
4
                }
747
1.88k
            } else {
748
                // When ES mapping is keyword/text but actual data is an array,
749
                // serialize the array to JSON string instead of throwing an error.
750
                // This is valid in ES since any field can hold array values.
751
1.88k
                if (col.IsArray()) {
752
64
                    val = json_value_to_string(col);
753
1.81k
                } else if (!col.IsString()) {
754
0
                    val = json_value_to_string(col);
755
1.81k
                } else {
756
1.81k
                    val = col.GetString();
757
1.81k
                }
758
1.88k
            }
759
1.88k
            size_t val_size = val.length();
760
1.88k
            col_ptr->insert_data(const_cast<const char*>(val.data()), val_size);
761
1.88k
            break;
762
1.88k
        }
763
764
0
        case TYPE_TINYINT: {
765
0
            RETURN_IF_ERROR(insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value,
766
0
                                                     slot_desc->is_nullable()));
767
0
            break;
768
0
        }
769
770
0
        case TYPE_SMALLINT: {
771
0
            RETURN_IF_ERROR(insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value,
772
0
                                                      slot_desc->is_nullable()));
773
0
            break;
774
0
        }
775
776
44
        case TYPE_INT: {
777
44
            RETURN_IF_ERROR(insert_int_value<int32_t>(col, type, col_ptr, pure_doc_value,
778
44
                                                      slot_desc->is_nullable()));
779
44
            break;
780
44
        }
781
782
920
        case TYPE_BIGINT: {
783
920
            RETURN_IF_ERROR(insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value,
784
920
                                                      slot_desc->is_nullable()));
785
920
            break;
786
920
        }
787
788
920
        case TYPE_LARGEINT: {
789
0
            RETURN_IF_ERROR(insert_int_value<__int128>(col, type, col_ptr, pure_doc_value,
790
0
                                                       slot_desc->is_nullable()));
791
0
            break;
792
0
        }
793
794
344
        case TYPE_DOUBLE: {
795
344
            RETURN_IF_ERROR(insert_float_value<double>(col, type, col_ptr, pure_doc_value,
796
344
                                                       slot_desc->is_nullable()));
797
344
            break;
798
344
        }
799
800
344
        case TYPE_FLOAT: {
801
0
            RETURN_IF_ERROR(insert_float_value<float>(col, type, col_ptr, pure_doc_value,
802
0
                                                      slot_desc->is_nullable()));
803
0
            break;
804
0
        }
805
806
0
        case TYPE_BOOLEAN: {
807
0
            if (col.IsBool()) {
808
0
                int8_t val = col.GetBool();
809
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
810
0
                break;
811
0
            }
812
813
0
            if (col.IsNumber()) {
814
0
                int8_t val = static_cast<int8_t>(col.GetInt());
815
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
816
0
                break;
817
0
            }
818
819
0
            bool is_nested_str = false;
820
0
            if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) {
821
0
                int8_t val = col[0].GetBool();
822
0
                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
823
0
                break;
824
0
            } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) {
825
0
                is_nested_str = true;
826
0
            } else if (pure_doc_value && col.IsArray()) {
827
0
                return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN");
828
0
            }
829
830
0
            const rapidjson::Value& str_col = is_nested_str ? col[0] : col;
831
832
0
            RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
833
834
0
            const std::string& val = str_col.GetString();
835
0
            size_t val_size = str_col.GetStringLength();
836
0
            StringParser::ParseResult result;
837
0
            bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
838
0
            RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
839
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
840
0
            break;
841
0
        }
842
0
        case TYPE_DECIMALV2: {
843
0
            DecimalV2Value data;
844
845
0
            if (col.IsDouble()) {
846
0
                data.assign_from_double(col.GetDouble());
847
0
            } else {
848
0
                std::string val;
849
0
                if (pure_doc_value) {
850
0
                    if (col.Empty()) {
851
0
                        break;
852
0
                    } else if (!col[0].IsString()) {
853
0
                        val = json_value_to_string(col[0]);
854
0
                    } else {
855
0
                        val = col[0].GetString();
856
0
                    }
857
0
                } else {
858
0
                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true);
859
0
                    if (!col.IsString()) {
860
0
                        val = json_value_to_string(col);
861
0
                    } else {
862
0
                        val = col.GetString();
863
0
                    }
864
0
                }
865
0
                data.parse_from_str(val.data(), static_cast<int32_t>(val.length()));
866
0
            }
867
0
            col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
868
0
            break;
869
0
        }
870
871
0
        case TYPE_DATE:
872
0
            RETURN_IF_ERROR(
873
0
                    fill_date_int<TYPE_DATE>(col, type, pure_doc_value, col_ptr, time_zone));
874
0
            break;
875
0
        case TYPE_DATETIME:
876
0
            RETURN_IF_ERROR(
877
0
                    fill_date_int<TYPE_DATETIME>(col, type, pure_doc_value, col_ptr, time_zone));
878
0
            break;
879
0
        case TYPE_DATEV2:
880
0
            RETURN_IF_ERROR(
881
0
                    fill_date_int<TYPE_DATEV2>(col, type, pure_doc_value, col_ptr, time_zone));
882
0
            break;
883
1.78k
        case TYPE_DATETIMEV2: {
884
1.78k
            RETURN_IF_ERROR(
885
1.78k
                    fill_date_int<TYPE_DATETIMEV2>(col, type, pure_doc_value, col_ptr, time_zone));
886
1.78k
            break;
887
1.78k
        }
888
10.1k
        case TYPE_ARRAY: {
889
10.1k
            Array array;
890
10.1k
            const auto& sub_type = assert_cast<const DataTypeArray*>(
891
10.1k
                                           remove_nullable(tuple_desc->slots()[i]->type()).get())
892
10.1k
                                           ->get_nested_type()
893
10.1k
                                           ->get_primitive_type();
894
10.1k
            RETURN_IF_ERROR(parse_column(col, sub_type, pure_doc_value, array, time_zone));
895
10.1k
            col_ptr->insert(Field::create_field<TYPE_ARRAY>(array));
896
10.1k
            break;
897
10.1k
        }
898
1.13k
        case TYPE_JSONB: {
899
1.13k
            JsonBinaryValue jsonb_value;
900
1.13k
            RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col)));
901
1.13k
            JsonbField json(jsonb_value.value(), jsonb_value.size());
902
1.13k
            col_ptr->insert(Field::create_field<TYPE_JSONB>(json));
903
1.13k
            break;
904
1.13k
        }
905
0
        default: {
906
0
            LOG(ERROR) << "Unsupported data type: " << type_to_string(type);
907
0
            DCHECK(false);
908
0
            break;
909
1.13k
        }
910
16.2k
        }
911
16.2k
    }
912
913
2.18k
    *line_eof = false;
914
2.18k
    return Status::OK();
915
2.18k
}
916
} // namespace doris