be/src/exec/es/es_scroll_parser.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exec/es/es_scroll_parser.h" |
19 | | |
20 | | #include <absl/strings/substitute.h> |
21 | | #include <cctz/time_zone.h> |
22 | | #include <glog/logging.h> |
23 | | #include <rapidjson/allocators.h> |
24 | | #include <rapidjson/encodings.h> |
25 | | #include <stdint.h> |
26 | | #include <string.h> |
27 | | |
28 | | // IWYU pragma: no_include <bits/chrono.h> |
29 | | #include <chrono> // IWYU pragma: keep |
30 | | #include <cstdlib> |
31 | | #include <ostream> |
32 | | #include <string> |
33 | | |
34 | | #include "common/status.h" |
35 | | #include "core/binary_cast.hpp" |
36 | | #include "core/column/column.h" |
37 | | #include "core/column/column_nullable.h" |
38 | | #include "core/data_type/data_type_array.h" |
39 | | #include "core/data_type/data_type_nullable.h" |
40 | | #include "core/data_type/define_primitive_type.h" |
41 | | #include "core/data_type/primitive_type.h" |
42 | | #include "core/field.h" |
43 | | #include "core/value/decimalv2_value.h" |
44 | | #include "core/value/jsonb_value.h" |
45 | | #include "core/value/vdatetime_value.h" |
46 | | #include "exprs/function/cast/cast_to_date_or_datetime_impl.hpp" |
47 | | #include "exprs/function/cast/cast_to_datetimev2_impl.hpp" |
48 | | #include "exprs/function/cast/cast_to_datev2_impl.hpp" |
49 | | #include "rapidjson/document.h" |
50 | | #include "rapidjson/rapidjson.h" |
51 | | #include "rapidjson/stringbuffer.h" |
52 | | #include "rapidjson/writer.h" |
53 | | #include "runtime/descriptors.h" |
54 | | #include "util/string_parser.hpp" |
55 | | |
56 | | namespace doris { |
57 | | |
58 | | static const char* FIELD_SCROLL_ID = "_scroll_id"; |
59 | | static const char* FIELD_HITS = "hits"; |
60 | | static const char* FIELD_INNER_HITS = "hits"; |
61 | | static const char* FIELD_SOURCE = "_source"; |
62 | | static const char* FIELD_ID = "_id"; |
63 | | |
64 | | // get the original json data type |
65 | 0 | std::string json_type_to_string(rapidjson::Type type) { |
66 | 0 | switch (type) { |
67 | 0 | case rapidjson::kNumberType: |
68 | 0 | return "Number"; |
69 | 0 | case rapidjson::kStringType: |
70 | 0 | return "Varchar/Char"; |
71 | 0 | case rapidjson::kArrayType: |
72 | 0 | return "Array"; |
73 | 0 | case rapidjson::kObjectType: |
74 | 0 | return "Object"; |
75 | 0 | case rapidjson::kNullType: |
76 | 0 | return "Null Type"; |
77 | 0 | case rapidjson::kFalseType: |
78 | 0 | case rapidjson::kTrueType: |
79 | 0 | return "True/False"; |
80 | 0 | default: |
81 | 0 | return "Unknown Type"; |
82 | 0 | } |
83 | 0 | } |
84 | | |
85 | | // transfer rapidjson::Value to string representation |
86 | 1.50k | std::string json_value_to_string(const rapidjson::Value& value) { |
87 | 1.50k | rapidjson::StringBuffer scratch_buffer; |
88 | 1.50k | rapidjson::Writer<rapidjson::StringBuffer> temp_writer(scratch_buffer); |
89 | 1.50k | value.Accept(temp_writer); |
90 | 1.50k | return scratch_buffer.GetString(); |
91 | 1.50k | } |
92 | | |
93 | | static const std::string ERROR_INVALID_COL_DATA = |
94 | | "Data source returned inconsistent column data. " |
95 | | "Expected value of type {} based on column metadata. This likely indicates a " |
96 | | "problem with the data source library."; |
97 | | static const std::string ERROR_MEM_LIMIT_EXCEEDED = |
98 | | "DataSourceScanNode::$0() failed to allocate " |
99 | | "$1 bytes for $2."; |
100 | | static const std::string ERROR_COL_DATA_IS_ARRAY = |
101 | | "Data source returned an array for the type $0" |
102 | | "based on column metadata."; |
103 | | static const std::string INVALID_NULL_VALUE = |
104 | | "Invalid null value occurs: Non-null column `$0` contains NULL"; |
105 | | |
106 | | #define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array) \ |
107 | 4.02k | do { \ |
108 | 4.02k | if (col.IsArray() == is_array) { \ |
109 | 0 | std::stringstream ss; \ |
110 | 0 | ss << "Expected value of type: " << type_to_string(type) \ |
111 | 0 | << "; but found type: " << json_type_to_string(col.GetType()) \ |
112 | 0 | << "; Document slice is : " << json_value_to_string(col); \ |
113 | 0 | return Status::RuntimeError(ss.str()); \ |
114 | 0 | } \ |
115 | 4.02k | } while (false) |
116 | | |
117 | | #define RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type) \ |
118 | 4.02k | do { \ |
119 | 4.02k | if (!col.IsString()) { \ |
120 | 0 | std::stringstream ss; \ |
121 | 0 | ss << "Expected value of type: " << type_to_string(type) \ |
122 | 0 | << "; but found type: " << json_type_to_string(col.GetType()) \ |
123 | 0 | << "; Document source slice is : " << json_value_to_string(col); \ |
124 | 0 | return Status::RuntimeError(ss.str()); \ |
125 | 0 | } \ |
126 | 4.02k | } while (false) |
127 | | |
128 | | #define RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col, type) \ |
129 | 0 | do { \ |
130 | 0 | if (!col.IsNumber()) { \ |
131 | 0 | std::stringstream ss; \ |
132 | 0 | ss << "Expected value of type: " << type_to_string(type) \ |
133 | 0 | << "; but found type: " << json_type_to_string(col.GetType()) \ |
134 | 0 | << "; Document value is: " << json_value_to_string(col); \ |
135 | 0 | return Status::RuntimeError(ss.str()); \ |
136 | 0 | } \ |
137 | 0 | } while (false) |
138 | | |
139 | | #define RETURN_ERROR_IF_PARSING_FAILED(result, col, type) \ |
140 | 220 | do { \ |
141 | 220 | if (result != StringParser::PARSE_SUCCESS) { \ |
142 | 0 | std::stringstream ss; \ |
143 | 0 | ss << "Expected value of type: " << type_to_string(type) \ |
144 | 0 | << "; but found type: " << json_type_to_string(col.GetType()) \ |
145 | 0 | << "; Document source slice is : " << json_value_to_string(col); \ |
146 | 0 | return Status::RuntimeError(ss.str()); \ |
147 | 0 | } \ |
148 | 220 | } while (false) |
149 | | |
150 | | #define RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type) \ |
151 | 0 | do { \ |
152 | 0 | std::stringstream ss; \ |
153 | 0 | ss << "Expected value of type: " << type_to_string(type) \ |
154 | 0 | << "; but found type: " << json_type_to_string(col.GetType()) \ |
155 | 0 | << "; Document slice is : " << json_value_to_string(col); \ |
156 | 0 | return Status::RuntimeError(ss.str()); \ |
157 | 0 | } while (false) |
158 | | |
159 | | template <typename T> |
160 | | Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot, |
161 | 13.3k | bool pure_doc_value) { |
162 | 13.3k | if (col.IsNumber()) { |
163 | 13.3k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); |
164 | 13.3k | return Status::OK(); |
165 | 13.3k | } |
166 | | |
167 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { |
168 | 0 | RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); |
169 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); |
170 | 0 | return Status::OK(); |
171 | 0 | } |
172 | | |
173 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
174 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); |
175 | | |
176 | 0 | StringParser::ParseResult result; |
177 | 0 | const std::string& val = col.GetString(); |
178 | 0 | size_t len = col.GetStringLength(); |
179 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); |
180 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); |
181 | | |
182 | 0 | if (sizeof(T) < 16) { |
183 | 0 | *reinterpret_cast<T*>(slot) = v; |
184 | 0 | } else { |
185 | 0 | DCHECK(sizeof(T) == 16); |
186 | 0 | memcpy(slot, &v, sizeof(v)); |
187 | 0 | } |
188 | |
|
189 | 0 | return Status::OK(); |
190 | 0 | } _ZN5doris13get_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 161 | 2.60k | bool pure_doc_value) { | 162 | 2.60k | if (col.IsNumber()) { | 163 | 2.60k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 164 | 2.60k | return Status::OK(); | 165 | 2.60k | } | 166 | | | 167 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 168 | 0 | RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); | 169 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 170 | 0 | return Status::OK(); | 171 | 0 | } | 172 | | | 173 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 174 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 175 | | | 176 | 0 | StringParser::ParseResult result; | 177 | 0 | const std::string& val = col.GetString(); | 178 | 0 | size_t len = col.GetStringLength(); | 179 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 180 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 181 | | | 182 | 0 | if (sizeof(T) < 16) { | 183 | 0 | *reinterpret_cast<T*>(slot) = v; | 184 | 0 | } else { | 185 | 0 | DCHECK(sizeof(T) == 16); | 186 | 0 | memcpy(slot, &v, sizeof(v)); | 187 | 0 | } | 188 | |
| 189 | 0 | return Status::OK(); | 190 | 0 | } |
_ZN5doris13get_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 161 | 2.60k | bool pure_doc_value) { | 162 | 2.60k | if (col.IsNumber()) { | 163 | 2.60k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 164 | 2.60k | return Status::OK(); | 165 | 2.60k | } | 166 | | | 167 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 168 | 0 | RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); | 169 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 170 | 0 | return Status::OK(); | 171 | 0 | } | 172 | | | 173 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 174 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 175 | | | 176 | 0 | StringParser::ParseResult result; | 177 | 0 | const std::string& val = col.GetString(); | 178 | 0 | size_t len = col.GetStringLength(); | 179 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 180 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 181 | | | 182 | 0 | if (sizeof(T) < 16) { | 183 | 0 | *reinterpret_cast<T*>(slot) = v; | 184 | 0 | } else { | 185 | 0 | DCHECK(sizeof(T) == 16); | 186 | 0 | memcpy(slot, &v, sizeof(v)); | 187 | 0 | } | 188 | |
| 189 | 0 | return Status::OK(); | 190 | 0 | } |
_ZN5doris13get_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 161 | 2.90k | bool pure_doc_value) { | 162 | 2.90k | if (col.IsNumber()) { | 163 | 2.90k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 164 | 2.90k | return Status::OK(); | 165 | 2.90k | } | 166 | | | 167 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 168 | 0 | RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); | 169 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 170 | 0 | return Status::OK(); | 171 | 0 | } | 172 | | | 173 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 174 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 175 | | | 176 | 0 | StringParser::ParseResult result; | 177 | 0 | const std::string& val = col.GetString(); | 178 | 0 | size_t len = col.GetStringLength(); | 179 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 180 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 181 | | | 182 | 0 | if (sizeof(T) < 16) { | 183 | 0 | *reinterpret_cast<T*>(slot) = v; | 184 | 0 | } else { | 185 | 0 | DCHECK(sizeof(T) == 16); | 186 | 0 | memcpy(slot, &v, sizeof(v)); | 187 | 0 | } | 188 | |
| 189 | 0 | return Status::OK(); | 190 | 0 | } |
_ZN5doris13get_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 161 | 3.53k | bool pure_doc_value) { | 162 | 3.53k | if (col.IsNumber()) { | 163 | 3.53k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 164 | 3.53k | return Status::OK(); | 165 | 3.53k | } | 166 | | | 167 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 168 | 0 | RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); | 169 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 170 | 0 | return Status::OK(); | 171 | 0 | } | 172 | | | 173 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 174 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 175 | | | 176 | 0 | StringParser::ParseResult result; | 177 | 0 | const std::string& val = col.GetString(); | 178 | 0 | size_t len = col.GetStringLength(); | 179 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 180 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 181 | | | 182 | 0 | if (sizeof(T) < 16) { | 183 | 0 | *reinterpret_cast<T*>(slot) = v; | 184 | 0 | } else { | 185 | 0 | DCHECK(sizeof(T) == 16); | 186 | 0 | memcpy(slot, &v, sizeof(v)); | 187 | 0 | } | 188 | |
| 189 | 0 | return Status::OK(); | 190 | 0 | } |
_ZN5doris13get_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 161 | 1.66k | bool pure_doc_value) { | 162 | 1.66k | if (col.IsNumber()) { | 163 | 1.66k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 164 | 1.66k | return Status::OK(); | 165 | 1.66k | } | 166 | | | 167 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 168 | 0 | RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type); | 169 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 170 | 0 | return Status::OK(); | 171 | 0 | } | 172 | | | 173 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 174 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 175 | | | 176 | 0 | StringParser::ParseResult result; | 177 | 0 | const std::string& val = col.GetString(); | 178 | 0 | size_t len = col.GetStringLength(); | 179 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 180 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 181 | | | 182 | 0 | if (sizeof(T) < 16) { | 183 | 0 | *reinterpret_cast<T*>(slot) = v; | 184 | 0 | } else { | 185 | 0 | DCHECK(sizeof(T) == 16); | 186 | 0 | memcpy(slot, &v, sizeof(v)); | 187 | 0 | } | 188 | |
| 189 | 0 | return Status::OK(); | 190 | 0 | } |
|
191 | | |
192 | | template <PrimitiveType T> |
193 | | Status get_date_value_int(const rapidjson::Value& col, PrimitiveType type, bool is_date_str, |
194 | | typename PrimitiveTypeTraits<T>::CppType* slot, |
195 | 4.38k | const cctz::time_zone& time_zone) { |
196 | 4.38k | constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME; |
197 | 4.38k | typename PrimitiveTypeTraits<T>::CppType dt_val; |
198 | 4.38k | if (is_date_str) { |
199 | 4.15k | const std::string str_date = col.GetString(); |
200 | 4.15k | int str_length = col.GetStringLength(); |
201 | 4.15k | bool success = false; |
202 | 4.15k | if (str_length > 19) { |
203 | 602 | std::chrono::system_clock::time_point tp; |
204 | | // time_zone suffix pattern |
205 | | // Z/+08:00/-04:30 |
206 | 602 | RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)"); |
207 | 602 | bool ok = false; |
208 | 602 | std::string fmt; |
209 | 602 | re2::StringPiece value; |
210 | 602 | if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) { |
211 | | // with time_zone info |
212 | | // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00 |
213 | | // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00 |
214 | 486 | fmt = "%Y-%m-%dT%H:%M:%E*S%Ez"; |
215 | 486 | cctz::time_zone ctz; |
216 | | // find time_zone by time_zone suffix string |
217 | 486 | TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz); |
218 | 486 | ok = cctz::parse(fmt, str_date, ctz, &tp); |
219 | 486 | } else { |
220 | | // without time_zone info |
221 | | // 2022-08-08T12:10:10.000 |
222 | 116 | fmt = "%Y-%m-%dT%H:%M:%E*S"; |
223 | | // If the time without time_zone info, ES will assume it is UTC time. |
224 | | // So we parse it in Doris with UTC time zone. |
225 | 116 | ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp); |
226 | 116 | } |
227 | 602 | if (ok) { |
228 | | // The local time zone can change by session variable `time_zone` |
229 | | // We should use the user specified time zone, not the actual system local time zone. |
230 | 602 | success = true; |
231 | 602 | dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone); |
232 | 602 | } |
233 | 3.54k | } else if (str_length == 19) { |
234 | | // YYYY-MM-DDTHH:MM:SS |
235 | 1.90k | if (*(str_date.c_str() + 10) == 'T') { |
236 | 40 | std::chrono::system_clock::time_point tp; |
237 | 40 | const bool ok = |
238 | 40 | cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp); |
239 | 40 | if (ok) { |
240 | 40 | success = true; |
241 | 40 | dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone); |
242 | 40 | } |
243 | 1.86k | } else { |
244 | | // YYYY-MM-DD HH:MM:SS |
245 | 1.86k | CastParameters params; |
246 | 1.86k | if constexpr (is_datetime_v1) { |
247 | 0 | success = CastToDateOrDatetime::from_string_non_strict_mode< |
248 | 0 | DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length}, |
249 | 0 | dt_val, nullptr, params); |
250 | 0 | } else if constexpr (T == TYPE_DATEV2) { |
251 | 0 | success = CastToDateV2::from_string_non_strict_mode( |
252 | 0 | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params); |
253 | 1.86k | } else { |
254 | 1.86k | success = CastToDatetimeV2::from_string_non_strict_mode( |
255 | 1.86k | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params); |
256 | 1.86k | } |
257 | 1.86k | } |
258 | | |
259 | 1.90k | } else if (str_length == 13) { |
260 | | // string long like "1677895728000" |
261 | 0 | int64_t time_long = std::atol(str_date.c_str()); |
262 | 0 | if (time_long > 0) { |
263 | 0 | success = true; |
264 | 0 | dt_val.from_unixtime(time_long / 1000, time_zone); |
265 | 0 | } |
266 | 1.64k | } else { |
267 | | // YYYY-MM-DD or others |
268 | 1.64k | CastParameters params; |
269 | 1.64k | if constexpr (is_datetime_v1) { |
270 | 0 | success = CastToDateOrDatetime::from_string_non_strict_mode< |
271 | 0 | DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length}, |
272 | 0 | dt_val, nullptr, params); |
273 | 1.30k | } else if constexpr (T == TYPE_DATEV2) { |
274 | 1.30k | success = CastToDateV2::from_string_non_strict_mode( |
275 | 1.30k | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params); |
276 | 1.30k | } else { |
277 | 344 | success = CastToDatetimeV2::from_string_non_strict_mode( |
278 | 344 | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params); |
279 | 344 | } |
280 | 1.64k | } |
281 | | |
282 | 4.15k | if (!success) { |
283 | 0 | RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); |
284 | 0 | } |
285 | | |
286 | 4.15k | } else { |
287 | 236 | dt_val.from_unixtime(col.GetInt64() / 1000, time_zone); |
288 | 236 | } |
289 | 4.38k | if constexpr (is_datetime_v1) { |
290 | 0 | if (type == TYPE_DATE) { |
291 | 0 | dt_val.cast_to_date(); |
292 | 0 | } else { |
293 | 0 | dt_val.to_datetime(); |
294 | 0 | } |
295 | 0 | } |
296 | | |
297 | 4.38k | *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val); |
298 | 4.38k | return Status::OK(); |
299 | 4.38k | } _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE Line | Count | Source | 195 | 1.30k | const cctz::time_zone& time_zone) { | 196 | 1.30k | constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME; | 197 | 1.30k | typename PrimitiveTypeTraits<T>::CppType dt_val; | 198 | 1.30k | if (is_date_str) { | 199 | 1.30k | const std::string str_date = col.GetString(); | 200 | 1.30k | int str_length = col.GetStringLength(); | 201 | 1.30k | bool success = false; | 202 | 1.30k | if (str_length > 19) { | 203 | 0 | std::chrono::system_clock::time_point tp; | 204 | | // time_zone suffix pattern | 205 | | // Z/+08:00/-04:30 | 206 | 0 | RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)"); | 207 | 0 | bool ok = false; | 208 | 0 | std::string fmt; | 209 | 0 | re2::StringPiece value; | 210 | 0 | if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) { | 211 | | // with time_zone info | 212 | | // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00 | 213 | | // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00 | 214 | 0 | fmt = "%Y-%m-%dT%H:%M:%E*S%Ez"; | 215 | 0 | cctz::time_zone ctz; | 216 | | // find time_zone by time_zone suffix string | 217 | 0 | TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz); | 218 | 0 | ok = cctz::parse(fmt, str_date, ctz, &tp); | 219 | 0 | } else { | 220 | | // without time_zone info | 221 | | // 2022-08-08T12:10:10.000 | 222 | 0 | fmt = "%Y-%m-%dT%H:%M:%E*S"; | 223 | | // If the time without time_zone info, ES will assume it is UTC time. | 224 | | // So we parse it in Doris with UTC time zone. | 225 | 0 | ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp); | 226 | 0 | } | 227 | 0 | if (ok) { | 228 | | // The local time zone can change by session variable `time_zone` | 229 | | // We should use the user specified time zone, not the actual system local time zone. | 230 | 0 | success = true; | 231 | 0 | dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone); | 232 | 0 | } | 233 | 1.30k | } else if (str_length == 19) { | 234 | | // YYYY-MM-DDTHH:MM:SS | 235 | 0 | if (*(str_date.c_str() + 10) == 'T') { | 236 | 0 | std::chrono::system_clock::time_point tp; | 237 | 0 | const bool ok = | 238 | 0 | cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp); | 239 | 0 | if (ok) { | 240 | 0 | success = true; | 241 | 0 | dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone); | 242 | 0 | } | 243 | 0 | } else { | 244 | | // YYYY-MM-DD HH:MM:SS | 245 | 0 | CastParameters params; | 246 | | if constexpr (is_datetime_v1) { | 247 | | success = CastToDateOrDatetime::from_string_non_strict_mode< | 248 | | DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length}, | 249 | | dt_val, nullptr, params); | 250 | 0 | } else if constexpr (T == TYPE_DATEV2) { | 251 | 0 | success = CastToDateV2::from_string_non_strict_mode( | 252 | 0 | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params); | 253 | | } else { | 254 | | success = CastToDatetimeV2::from_string_non_strict_mode( | 255 | | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params); | 256 | | } | 257 | 0 | } | 258 | |
| 259 | 1.30k | } else if (str_length == 13) { | 260 | | // string long like "1677895728000" | 261 | 0 | int64_t time_long = std::atol(str_date.c_str()); | 262 | 0 | if (time_long > 0) { | 263 | 0 | success = true; | 264 | 0 | dt_val.from_unixtime(time_long / 1000, time_zone); | 265 | 0 | } | 266 | 1.30k | } else { | 267 | | // YYYY-MM-DD or others | 268 | 1.30k | CastParameters params; | 269 | | if constexpr (is_datetime_v1) { | 270 | | success = CastToDateOrDatetime::from_string_non_strict_mode< | 271 | | DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length}, | 272 | | dt_val, nullptr, params); | 273 | 1.30k | } else if constexpr (T == TYPE_DATEV2) { | 274 | 1.30k | success = CastToDateV2::from_string_non_strict_mode( | 275 | 1.30k | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params); | 276 | | } else { | 277 | | success = CastToDatetimeV2::from_string_non_strict_mode( | 278 | | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params); | 279 | | } | 280 | 1.30k | } | 281 | | | 282 | 1.30k | if (!success) { | 283 | 0 | RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); | 284 | 0 | } | 285 | | | 286 | 1.30k | } else { | 287 | 0 | dt_val.from_unixtime(col.GetInt64() / 1000, time_zone); | 288 | 0 | } | 289 | | if constexpr (is_datetime_v1) { | 290 | | if (type == TYPE_DATE) { | 291 | | dt_val.cast_to_date(); | 292 | | } else { | 293 | | dt_val.to_datetime(); | 294 | | } | 295 | | } | 296 | | | 297 | 1.30k | *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val); | 298 | 1.30k | return Status::OK(); | 299 | 1.30k | } |
_ZN5doris18get_date_value_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE Line | Count | Source | 195 | 3.08k | const cctz::time_zone& time_zone) { | 196 | 3.08k | constexpr bool is_datetime_v1 = T == TYPE_DATE || T == TYPE_DATETIME; | 197 | 3.08k | typename PrimitiveTypeTraits<T>::CppType dt_val; | 198 | 3.08k | if (is_date_str) { | 199 | 2.85k | const std::string str_date = col.GetString(); | 200 | 2.85k | int str_length = col.GetStringLength(); | 201 | 2.85k | bool success = false; | 202 | 2.85k | if (str_length > 19) { | 203 | 602 | std::chrono::system_clock::time_point tp; | 204 | | // time_zone suffix pattern | 205 | | // Z/+08:00/-04:30 | 206 | 602 | RE2 time_zone_pattern(R"([+-]\d{2}:?\d{2}|Z)"); | 207 | 602 | bool ok = false; | 208 | 602 | std::string fmt; | 209 | 602 | re2::StringPiece value; | 210 | 602 | if (time_zone_pattern.Match(str_date, 0, str_date.size(), RE2::UNANCHORED, &value, 1)) { | 211 | | // with time_zone info | 212 | | // YYYY-MM-DDTHH:MM:SSZ or YYYY-MM-DDTHH:MM:SS+08:00 | 213 | | // or 2022-08-08T12:10:10.000Z or YYYY-MM-DDTHH:MM:SS-08:00 | 214 | 486 | fmt = "%Y-%m-%dT%H:%M:%E*S%Ez"; | 215 | 486 | cctz::time_zone ctz; | 216 | | // find time_zone by time_zone suffix string | 217 | 486 | TimezoneUtils::find_cctz_time_zone(value.as_string(), ctz); | 218 | 486 | ok = cctz::parse(fmt, str_date, ctz, &tp); | 219 | 486 | } else { | 220 | | // without time_zone info | 221 | | // 2022-08-08T12:10:10.000 | 222 | 116 | fmt = "%Y-%m-%dT%H:%M:%E*S"; | 223 | | // If the time without time_zone info, ES will assume it is UTC time. | 224 | | // So we parse it in Doris with UTC time zone. | 225 | 116 | ok = cctz::parse(fmt, str_date, cctz::utc_time_zone(), &tp); | 226 | 116 | } | 227 | 602 | if (ok) { | 228 | | // The local time zone can change by session variable `time_zone` | 229 | | // We should use the user specified time zone, not the actual system local time zone. | 230 | 602 | success = true; | 231 | 602 | dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone); | 232 | 602 | } | 233 | 2.24k | } else if (str_length == 19) { | 234 | | // YYYY-MM-DDTHH:MM:SS | 235 | 1.90k | if (*(str_date.c_str() + 10) == 'T') { | 236 | 40 | std::chrono::system_clock::time_point tp; | 237 | 40 | const bool ok = | 238 | 40 | cctz::parse("%Y-%m-%dT%H:%M:%S", str_date, cctz::utc_time_zone(), &tp); | 239 | 40 | if (ok) { | 240 | 40 | success = true; | 241 | 40 | dt_val.from_unixtime(std::chrono::system_clock::to_time_t(tp), time_zone); | 242 | 40 | } | 243 | 1.86k | } else { | 244 | | // YYYY-MM-DD HH:MM:SS | 245 | 1.86k | CastParameters params; | 246 | | if constexpr (is_datetime_v1) { | 247 | | success = CastToDateOrDatetime::from_string_non_strict_mode< | 248 | | DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length}, | 249 | | dt_val, nullptr, params); | 250 | | } else if constexpr (T == TYPE_DATEV2) { | 251 | | success = CastToDateV2::from_string_non_strict_mode( | 252 | | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params); | 253 | 1.86k | } else { | 254 | 1.86k | success = CastToDatetimeV2::from_string_non_strict_mode( | 255 | 1.86k | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params); | 256 | 1.86k | } | 257 | 1.86k | } | 258 | | | 259 | 1.90k | } else if (str_length == 13) { | 260 | | // string long like "1677895728000" | 261 | 0 | int64_t time_long = std::atol(str_date.c_str()); | 262 | 0 | if (time_long > 0) { | 263 | 0 | success = true; | 264 | 0 | dt_val.from_unixtime(time_long / 1000, time_zone); | 265 | 0 | } | 266 | 344 | } else { | 267 | | // YYYY-MM-DD or others | 268 | 344 | CastParameters params; | 269 | | if constexpr (is_datetime_v1) { | 270 | | success = CastToDateOrDatetime::from_string_non_strict_mode< | 271 | | DatelikeTargetType::DATE_TIME>({str_date.c_str(), (size_t)str_length}, | 272 | | dt_val, nullptr, params); | 273 | | } else if constexpr (T == TYPE_DATEV2) { | 274 | | success = CastToDateV2::from_string_non_strict_mode( | 275 | | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, params); | 276 | 344 | } else { | 277 | 344 | success = CastToDatetimeV2::from_string_non_strict_mode( | 278 | 344 | {str_date.c_str(), (size_t)str_length}, dt_val, nullptr, -1, params); | 279 | 344 | } | 280 | 344 | } | 281 | | | 282 | 2.85k | if (!success) { | 283 | 0 | RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type); | 284 | 0 | } | 285 | | | 286 | 2.85k | } else { | 287 | 236 | dt_val.from_unixtime(col.GetInt64() / 1000, time_zone); | 288 | 236 | } | 289 | | if constexpr (is_datetime_v1) { | 290 | | if (type == TYPE_DATE) { | 291 | | dt_val.cast_to_date(); | 292 | | } else { | 293 | | dt_val.to_datetime(); | 294 | | } | 295 | | } | 296 | | | 297 | 3.08k | *slot = *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(&dt_val); | 298 | 3.08k | return Status::OK(); | 299 | 3.08k | } |
Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE Unexecuted instantiation: _ZN5doris18get_date_value_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE |
300 | | |
301 | | template <PrimitiveType T> |
302 | | Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value, |
303 | | typename PrimitiveTypeTraits<T>::CppType* slot, |
304 | 4.38k | const cctz::time_zone& time_zone) { |
305 | | // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source |
306 | 4.38k | if (col.IsNumber()) { |
307 | | // ES process date/datetime field would use millisecond timestamp for index or docvalue |
308 | | // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms |
309 | | // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds |
310 | 148 | return get_date_value_int<T>(col, type, false, slot, time_zone); |
311 | 4.23k | } else if (col.IsArray() && pure_doc_value && !col.Empty()) { |
312 | | // this would happened just only when `enable_docvalue_scan = true` |
313 | | // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose |
314 | | // a standard date-format for date field as `2020-06-16T00:00:00.000Z` |
315 | | // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for |
316 | | // date field's docvalue |
317 | 432 | if (col[0].IsString()) { |
318 | 344 | return get_date_value_int<T>(col[0], type, true, slot, time_zone); |
319 | 344 | } |
320 | | // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds |
321 | 88 | return get_date_value_int<T>(col[0], type, false, slot, time_zone); |
322 | 3.80k | } else { |
323 | | // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source |
324 | 3.80k | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
325 | 3.80k | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); |
326 | 3.80k | return get_date_value_int<T>(col, type, true, slot, time_zone); |
327 | 3.80k | } |
328 | 4.38k | } _ZN5doris12get_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE Line | Count | Source | 304 | 1.30k | const cctz::time_zone& time_zone) { | 305 | | // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source | 306 | 1.30k | if (col.IsNumber()) { | 307 | | // ES process date/datetime field would use millisecond timestamp for index or docvalue | 308 | | // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms | 309 | | // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds | 310 | 0 | return get_date_value_int<T>(col, type, false, slot, time_zone); | 311 | 1.30k | } else if (col.IsArray() && pure_doc_value && !col.Empty()) { | 312 | | // this would happened just only when `enable_docvalue_scan = true` | 313 | | // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose | 314 | | // a standard date-format for date field as `2020-06-16T00:00:00.000Z` | 315 | | // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for | 316 | | // date field's docvalue | 317 | 0 | if (col[0].IsString()) { | 318 | 0 | return get_date_value_int<T>(col[0], type, true, slot, time_zone); | 319 | 0 | } | 320 | | // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds | 321 | 0 | return get_date_value_int<T>(col[0], type, false, slot, time_zone); | 322 | 1.30k | } else { | 323 | | // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source | 324 | 1.30k | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 325 | 1.30k | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 326 | 1.30k | return get_date_value_int<T>(col, type, true, slot, time_zone); | 327 | 1.30k | } | 328 | 1.30k | } |
_ZN5doris12get_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE Line | Count | Source | 304 | 3.08k | const cctz::time_zone& time_zone) { | 305 | | // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source | 306 | 3.08k | if (col.IsNumber()) { | 307 | | // ES process date/datetime field would use millisecond timestamp for index or docvalue | 308 | | // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms | 309 | | // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds | 310 | 148 | return get_date_value_int<T>(col, type, false, slot, time_zone); | 311 | 2.93k | } else if (col.IsArray() && pure_doc_value && !col.Empty()) { | 312 | | // this would happened just only when `enable_docvalue_scan = true` | 313 | | // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose | 314 | | // a standard date-format for date field as `2020-06-16T00:00:00.000Z` | 315 | | // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for | 316 | | // date field's docvalue | 317 | 432 | if (col[0].IsString()) { | 318 | 344 | return get_date_value_int<T>(col[0], type, true, slot, time_zone); | 319 | 344 | } | 320 | | // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds | 321 | 88 | return get_date_value_int<T>(col[0], type, false, slot, time_zone); | 322 | 2.50k | } else { | 323 | | // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source | 324 | 2.50k | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 325 | 2.50k | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 326 | 2.50k | return get_date_value_int<T>(col, type, true, slot, time_zone); | 327 | 2.50k | } | 328 | 3.08k | } |
Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE Unexecuted instantiation: _ZN5doris12get_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_19PrimitiveTypeTraitsIXT_EE7CppTypeERKN4cctz9time_zoneE |
329 | | template <PrimitiveType T> |
330 | | Status fill_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_doc_value, |
331 | 1.78k | IColumn* col_ptr, const cctz::time_zone& time_zone) { |
332 | 1.78k | typename PrimitiveTypeTraits<T>::CppType data; |
333 | 1.78k | RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone))); |
334 | 1.78k | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0); |
335 | 1.78k | return Status::OK(); |
336 | 1.78k | } Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE11EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE12EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE Unexecuted instantiation: _ZN5doris13fill_date_intILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE _ZN5doris13fill_date_intILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bPNS_7IColumnERKN4cctz9time_zoneE Line | Count | Source | 331 | 1.78k | IColumn* col_ptr, const cctz::time_zone& time_zone) { | 332 | 1.78k | typename PrimitiveTypeTraits<T>::CppType data; | 333 | 1.78k | RETURN_IF_ERROR((get_date_int<T>(col, type, pure_doc_value, &data, time_zone))); | 334 | 1.78k | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0); | 335 | 1.78k | return Status::OK(); | 336 | 1.78k | } |
|
337 | | |
338 | | template <typename T> |
339 | | Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot, |
340 | 10.4k | bool pure_doc_value) { |
341 | 10.4k | static_assert(sizeof(T) == 4 || sizeof(T) == 8); |
342 | 10.4k | if (col.IsNumber()) { |
343 | 10.4k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); |
344 | 10.4k | return Status::OK(); |
345 | 10.4k | } |
346 | | |
347 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { |
348 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); |
349 | 0 | return Status::OK(); |
350 | 0 | } |
351 | | |
352 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
353 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); |
354 | | |
355 | 0 | StringParser::ParseResult result; |
356 | 0 | const std::string& val = col.GetString(); |
357 | 0 | size_t len = col.GetStringLength(); |
358 | 0 | T v = StringParser::string_to_float<T>(val.c_str(), len, &result); |
359 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); |
360 | 0 | *reinterpret_cast<T*>(slot) = v; |
361 | |
|
362 | 0 | return Status::OK(); |
363 | 0 | } _ZN5doris15get_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 340 | 5.20k | bool pure_doc_value) { | 341 | 5.20k | static_assert(sizeof(T) == 4 || sizeof(T) == 8); | 342 | 5.20k | if (col.IsNumber()) { | 343 | 5.20k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); | 344 | 5.20k | return Status::OK(); | 345 | 5.20k | } | 346 | | | 347 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 348 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); | 349 | 0 | return Status::OK(); | 350 | 0 | } | 351 | | | 352 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 353 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 354 | | | 355 | 0 | StringParser::ParseResult result; | 356 | 0 | const std::string& val = col.GetString(); | 357 | 0 | size_t len = col.GetStringLength(); | 358 | 0 | T v = StringParser::string_to_float<T>(val.c_str(), len, &result); | 359 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 360 | 0 | *reinterpret_cast<T*>(slot) = v; | 361 | |
| 362 | 0 | return Status::OK(); | 363 | 0 | } |
_ZN5doris15get_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPvb Line | Count | Source | 340 | 5.20k | bool pure_doc_value) { | 341 | 5.20k | static_assert(sizeof(T) == 4 || sizeof(T) == 8); | 342 | 5.20k | if (col.IsNumber()) { | 343 | 5.20k | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); | 344 | 5.20k | return Status::OK(); | 345 | 5.20k | } | 346 | | | 347 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 348 | 0 | *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); | 349 | 0 | return Status::OK(); | 350 | 0 | } | 351 | | | 352 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 353 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 354 | | | 355 | 0 | StringParser::ParseResult result; | 356 | 0 | const std::string& val = col.GetString(); | 357 | 0 | size_t len = col.GetStringLength(); | 358 | 0 | T v = StringParser::string_to_float<T>(val.c_str(), len, &result); | 359 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 360 | 0 | *reinterpret_cast<T*>(slot) = v; | 361 | |
| 362 | 0 | return Status::OK(); | 363 | 0 | } |
|
364 | | |
365 | | template <typename T> |
366 | | Status insert_float_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr, |
367 | 344 | bool pure_doc_value, bool nullable) { |
368 | 344 | static_assert(sizeof(T) == 4 || sizeof(T) == 8); |
369 | 344 | if (col.IsNumber() && nullable) { |
370 | 344 | T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); |
371 | 344 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); |
372 | 344 | return Status::OK(); |
373 | 344 | } |
374 | | |
375 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) { |
376 | 0 | T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); |
377 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); |
378 | 0 | return Status::OK(); |
379 | 0 | } |
380 | | |
381 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
382 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); |
383 | | |
384 | 0 | StringParser::ParseResult result; |
385 | 0 | const std::string& val = col.GetString(); |
386 | 0 | size_t len = col.GetStringLength(); |
387 | 0 | T v = StringParser::string_to_float<T>(val.c_str(), len, &result); |
388 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); |
389 | | |
390 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0); |
391 | |
|
392 | 0 | return Status::OK(); |
393 | 0 | } _ZN5doris18insert_float_valueIdEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb Line | Count | Source | 367 | 344 | bool pure_doc_value, bool nullable) { | 368 | 344 | static_assert(sizeof(T) == 4 || sizeof(T) == 8); | 369 | 344 | if (col.IsNumber() && nullable) { | 370 | 344 | T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble()); | 371 | 344 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); | 372 | 344 | return Status::OK(); | 373 | 344 | } | 374 | | | 375 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty() && nullable) { | 376 | 0 | T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble()); | 377 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); | 378 | 0 | return Status::OK(); | 379 | 0 | } | 380 | | | 381 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 382 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 383 | | | 384 | 0 | StringParser::ParseResult result; | 385 | 0 | const std::string& val = col.GetString(); | 386 | 0 | size_t len = col.GetStringLength(); | 387 | 0 | T v = StringParser::string_to_float<T>(val.c_str(), len, &result); | 388 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col, type); | 389 | | | 390 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0); | 391 | |
| 392 | 0 | return Status::OK(); | 393 | 0 | } |
Unexecuted instantiation: _ZN5doris18insert_float_valueIfEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb |
394 | | |
395 | | template <typename T> |
396 | | Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, IColumn* col_ptr, |
397 | 964 | bool pure_doc_value, bool nullable) { |
398 | 964 | if (col.IsNumber()) { |
399 | 572 | T value; |
400 | | // ES allows inserting float and double in int/long types. |
401 | | // To parse these numbers in Doris, we direct cast them to int types. |
402 | 572 | if (col.IsDouble()) { |
403 | 152 | value = static_cast<T>(col.GetDouble()); |
404 | 420 | } else if (col.IsFloat()) { |
405 | 0 | value = static_cast<T>(col.GetFloat()); |
406 | 420 | } else { |
407 | 420 | value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); |
408 | 420 | } |
409 | 572 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); |
410 | 572 | return Status::OK(); |
411 | 572 | } |
412 | | |
413 | 392 | auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status { |
414 | 220 | StringParser::ParseResult result; |
415 | 220 | std::string val = col_value.GetString(); |
416 | | // ES allows inserting numbers and characters containing decimals in numeric types. |
417 | | // To parse these numbers in Doris, we remove the decimals here. |
418 | 220 | size_t pos = val.find('.'); |
419 | 220 | if (pos != std::string::npos) { |
420 | 84 | val = val.substr(0, pos); |
421 | 84 | } |
422 | 220 | size_t len = val.length(); |
423 | 220 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); |
424 | 220 | RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type); |
425 | | |
426 | 220 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0); |
427 | 220 | return Status::OK(); |
428 | 220 | }; Unexecuted instantiation: _ZZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_ Unexecuted instantiation: _ZZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_ Unexecuted instantiation: _ZZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_ _ZZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_ Line | Count | Source | 413 | 220 | auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status { | 414 | 220 | StringParser::ParseResult result; | 415 | 220 | std::string val = col_value.GetString(); | 416 | | // ES allows inserting numbers and characters containing decimals in numeric types. | 417 | | // To parse these numbers in Doris, we remove the decimals here. | 418 | 220 | size_t pos = val.find('.'); | 419 | 220 | if (pos != std::string::npos) { | 420 | 84 | val = val.substr(0, pos); | 421 | 84 | } | 422 | 220 | size_t len = val.length(); | 423 | 220 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 424 | 220 | RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type); | 425 | | | 426 | 220 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0); | 427 | 220 | return Status::OK(); | 428 | 220 | }; |
Unexecuted instantiation: _ZZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbbENKUlSB_E_clESB_ |
429 | | |
430 | 392 | if (pure_doc_value && col.IsArray() && !col.Empty()) { |
431 | 276 | if (col[0].IsNumber()) { |
432 | 172 | T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); |
433 | 172 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); |
434 | 172 | return Status::OK(); |
435 | 172 | } else { |
436 | 104 | RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true); |
437 | 104 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type); |
438 | 104 | return parse_and_insert_data(col[0]); |
439 | 104 | } |
440 | 276 | } |
441 | | |
442 | 116 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
443 | 116 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); |
444 | 116 | return parse_and_insert_data(col); |
445 | 116 | } Unexecuted instantiation: _ZN5doris16insert_int_valueIaEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb Unexecuted instantiation: _ZN5doris16insert_int_valueIsEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb _ZN5doris16insert_int_valueIiEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb Line | Count | Source | 397 | 44 | bool pure_doc_value, bool nullable) { | 398 | 44 | if (col.IsNumber()) { | 399 | 44 | T value; | 400 | | // ES allows inserting float and double in int/long types. | 401 | | // To parse these numbers in Doris, we direct cast them to int types. | 402 | 44 | if (col.IsDouble()) { | 403 | 0 | value = static_cast<T>(col.GetDouble()); | 404 | 44 | } else if (col.IsFloat()) { | 405 | 0 | value = static_cast<T>(col.GetFloat()); | 406 | 44 | } else { | 407 | 44 | value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 408 | 44 | } | 409 | 44 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); | 410 | 44 | return Status::OK(); | 411 | 44 | } | 412 | | | 413 | 0 | auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status { | 414 | 0 | StringParser::ParseResult result; | 415 | 0 | std::string val = col_value.GetString(); | 416 | | // ES allows inserting numbers and characters containing decimals in numeric types. | 417 | | // To parse these numbers in Doris, we remove the decimals here. | 418 | 0 | size_t pos = val.find('.'); | 419 | 0 | if (pos != std::string::npos) { | 420 | 0 | val = val.substr(0, pos); | 421 | 0 | } | 422 | 0 | size_t len = val.length(); | 423 | 0 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 424 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type); | 425 | |
| 426 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0); | 427 | 0 | return Status::OK(); | 428 | 0 | }; | 429 | |
| 430 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 431 | 0 | if (col[0].IsNumber()) { | 432 | 0 | T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 433 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); | 434 | 0 | return Status::OK(); | 435 | 0 | } else { | 436 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true); | 437 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type); | 438 | 0 | return parse_and_insert_data(col[0]); | 439 | 0 | } | 440 | 0 | } | 441 | | | 442 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 443 | 0 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 444 | 0 | return parse_and_insert_data(col); | 445 | 0 | } |
_ZN5doris16insert_int_valueIlEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb Line | Count | Source | 397 | 920 | bool pure_doc_value, bool nullable) { | 398 | 920 | if (col.IsNumber()) { | 399 | 528 | T value; | 400 | | // ES allows inserting float and double in int/long types. | 401 | | // To parse these numbers in Doris, we direct cast them to int types. | 402 | 528 | if (col.IsDouble()) { | 403 | 152 | value = static_cast<T>(col.GetDouble()); | 404 | 376 | } else if (col.IsFloat()) { | 405 | 0 | value = static_cast<T>(col.GetFloat()); | 406 | 376 | } else { | 407 | 376 | value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64()); | 408 | 376 | } | 409 | 528 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); | 410 | 528 | return Status::OK(); | 411 | 528 | } | 412 | | | 413 | 392 | auto parse_and_insert_data = [&](const rapidjson::Value& col_value) -> Status { | 414 | 392 | StringParser::ParseResult result; | 415 | 392 | std::string val = col_value.GetString(); | 416 | | // ES allows inserting numbers and characters containing decimals in numeric types. | 417 | | // To parse these numbers in Doris, we remove the decimals here. | 418 | 392 | size_t pos = val.find('.'); | 419 | 392 | if (pos != std::string::npos) { | 420 | 392 | val = val.substr(0, pos); | 421 | 392 | } | 422 | 392 | size_t len = val.length(); | 423 | 392 | T v = StringParser::string_to_int<T>(val.c_str(), len, &result); | 424 | 392 | RETURN_ERROR_IF_PARSING_FAILED(result, col_value, type); | 425 | | | 426 | 392 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0); | 427 | 392 | return Status::OK(); | 428 | 392 | }; | 429 | | | 430 | 392 | if (pure_doc_value && col.IsArray() && !col.Empty()) { | 431 | 276 | if (col[0].IsNumber()) { | 432 | 172 | T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64()); | 433 | 172 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0); | 434 | 172 | return Status::OK(); | 435 | 172 | } else { | 436 | 104 | RETURN_ERROR_IF_COL_IS_ARRAY(col[0], type, true); | 437 | 104 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col[0], type); | 438 | 104 | return parse_and_insert_data(col[0]); | 439 | 104 | } | 440 | 276 | } | 441 | | | 442 | 116 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); | 443 | 116 | RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); | 444 | 116 | return parse_and_insert_data(col); | 445 | 116 | } |
Unexecuted instantiation: _ZN5doris16insert_int_valueInEENS_6StatusERKN9rapidjson12GenericValueINS2_4UTF8IcEENS2_19MemoryPoolAllocatorINS2_12CrtAllocatorEEEEENS_13PrimitiveTypeEPNS_7IColumnEbb |
446 | | |
447 | | template <PrimitiveType T> |
448 | | Status handle_value(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value, |
449 | 31.8k | typename PrimitiveTypeTraits<T>::CppType& val) { |
450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || |
451 | 13.3k | T == TYPE_LARGEINT) { |
452 | 13.3k | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, |
453 | 13.3k | pure_doc_value)); |
454 | 13.3k | return Status::OK(); |
455 | 13.3k | } |
456 | 5.20k | if constexpr (T == TYPE_FLOAT) { |
457 | 5.20k | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); |
458 | 5.20k | return Status::OK(); |
459 | 5.20k | } |
460 | 5.20k | if constexpr (T == TYPE_DOUBLE) { |
461 | 5.20k | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); |
462 | 5.20k | return Status::OK(); |
463 | 5.20k | } |
464 | 5.50k | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { |
465 | | // When ES mapping is keyword/text but actual data is an array, |
466 | | // serialize the array to JSON string instead of throwing an error. |
467 | | // This is valid in ES since any field can hold array values. |
468 | 5.50k | if (col.IsArray()) { |
469 | 0 | val = json_value_to_string(col); |
470 | 5.50k | } else if (!col.IsString()) { |
471 | 0 | val = json_value_to_string(col); |
472 | 5.50k | } else { |
473 | 5.50k | val = col.GetString(); |
474 | 5.50k | } |
475 | 5.50k | return Status::OK(); |
476 | 5.50k | } |
477 | 2.60k | if constexpr (T == TYPE_BOOLEAN) { |
478 | 2.60k | if (col.IsBool()) { |
479 | 2.60k | val = col.GetBool(); |
480 | 2.60k | return Status::OK(); |
481 | 2.60k | } |
482 | | |
483 | 0 | if (col.IsNumber()) { |
484 | 0 | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); |
485 | 0 | return Status::OK(); |
486 | 0 | } |
487 | | |
488 | 0 | bool is_nested_str = false; |
489 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { |
490 | 0 | val = col[0].GetBool(); |
491 | 0 | return Status::OK(); |
492 | 0 | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { |
493 | 0 | is_nested_str = true; |
494 | 0 | } else if (pure_doc_value && col.IsArray()) { |
495 | 0 | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); |
496 | 0 | } |
497 | | |
498 | 0 | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; |
499 | 0 | const std::string& str_val = str_col.GetString(); |
500 | 0 | size_t val_size = str_col.GetStringLength(); |
501 | 0 | StringParser::ParseResult result; |
502 | 0 | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); |
503 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); |
504 | 0 | return Status::OK(); |
505 | 0 | } |
506 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); |
507 | 31.8k | } _ZN5doris12handle_valueILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 5.50k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | | T == TYPE_LARGEINT) { | 452 | | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | | pure_doc_value)); | 454 | | return Status::OK(); | 455 | | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | 5.50k | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | 5.50k | if (col.IsArray()) { | 469 | 0 | val = json_value_to_string(col); | 470 | 5.50k | } else if (!col.IsString()) { | 471 | 0 | val = json_value_to_string(col); | 472 | 5.50k | } else { | 473 | 5.50k | val = col.GetString(); | 474 | 5.50k | } | 475 | 5.50k | return Status::OK(); | 476 | 5.50k | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 5.50k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 5.50k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 2.60k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | 2.60k | T == TYPE_LARGEINT) { | 452 | 2.60k | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | 2.60k | pure_doc_value)); | 454 | 2.60k | return Status::OK(); | 455 | 2.60k | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 2.60k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 2.60k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 2.60k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | 2.60k | T == TYPE_LARGEINT) { | 452 | 2.60k | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | 2.60k | pure_doc_value)); | 454 | 2.60k | return Status::OK(); | 455 | 2.60k | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 2.60k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 2.60k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 2.90k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | 2.90k | T == TYPE_LARGEINT) { | 452 | 2.90k | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | 2.90k | pure_doc_value)); | 454 | 2.90k | return Status::OK(); | 455 | 2.90k | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 2.90k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 2.90k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 3.53k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | 3.53k | T == TYPE_LARGEINT) { | 452 | 3.53k | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | 3.53k | pure_doc_value)); | 454 | 3.53k | return Status::OK(); | 455 | 3.53k | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 3.53k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 3.53k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 1.66k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | 1.66k | T == TYPE_LARGEINT) { | 452 | 1.66k | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | 1.66k | pure_doc_value)); | 454 | 1.66k | return Status::OK(); | 455 | 1.66k | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 1.66k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 1.66k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 5.20k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | | T == TYPE_LARGEINT) { | 452 | | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | | pure_doc_value)); | 454 | | return Status::OK(); | 455 | | } | 456 | 5.20k | if constexpr (T == TYPE_FLOAT) { | 457 | 5.20k | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | 5.20k | return Status::OK(); | 459 | 5.20k | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 5.20k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 5.20k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 5.20k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | | T == TYPE_LARGEINT) { | 452 | | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | | pure_doc_value)); | 454 | | return Status::OK(); | 455 | | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | 5.20k | if constexpr (T == TYPE_DOUBLE) { | 461 | 5.20k | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | 5.20k | return Status::OK(); | 463 | 5.20k | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | | if constexpr (T == TYPE_BOOLEAN) { | 478 | | if (col.IsBool()) { | 479 | | val = col.GetBool(); | 480 | | return Status::OK(); | 481 | | } | 482 | | | 483 | | if (col.IsNumber()) { | 484 | | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | | return Status::OK(); | 486 | | } | 487 | | | 488 | | bool is_nested_str = false; | 489 | | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | | val = col[0].GetBool(); | 491 | | return Status::OK(); | 492 | | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | | is_nested_str = true; | 494 | | } else if (pure_doc_value && col.IsArray()) { | 495 | | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | | } | 497 | | | 498 | | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | | const std::string& str_val = str_col.GetString(); | 500 | | size_t val_size = str_col.GetStringLength(); | 501 | | StringParser::ParseResult result; | 502 | | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | | return Status::OK(); | 505 | | } | 506 | 5.20k | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 5.20k | } |
_ZN5doris12handle_valueILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_19PrimitiveTypeTraitsIXT_EE7CppTypeE Line | Count | Source | 449 | 2.60k | typename PrimitiveTypeTraits<T>::CppType& val) { | 450 | | if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT || T == TYPE_BIGINT || | 451 | | T == TYPE_LARGEINT) { | 452 | | RETURN_IF_ERROR(get_int_value<typename PrimitiveTypeTraits<T>::CppType>(col, sub_type, &val, | 453 | | pure_doc_value)); | 454 | | return Status::OK(); | 455 | | } | 456 | | if constexpr (T == TYPE_FLOAT) { | 457 | | RETURN_IF_ERROR(get_float_value<float>(col, sub_type, &val, pure_doc_value)); | 458 | | return Status::OK(); | 459 | | } | 460 | | if constexpr (T == TYPE_DOUBLE) { | 461 | | RETURN_IF_ERROR(get_float_value<double>(col, sub_type, &val, pure_doc_value)); | 462 | | return Status::OK(); | 463 | | } | 464 | | if constexpr (T == TYPE_STRING || T == TYPE_CHAR || T == TYPE_VARCHAR) { | 465 | | // When ES mapping is keyword/text but actual data is an array, | 466 | | // serialize the array to JSON string instead of throwing an error. | 467 | | // This is valid in ES since any field can hold array values. | 468 | | if (col.IsArray()) { | 469 | | val = json_value_to_string(col); | 470 | | } else if (!col.IsString()) { | 471 | | val = json_value_to_string(col); | 472 | | } else { | 473 | | val = col.GetString(); | 474 | | } | 475 | | return Status::OK(); | 476 | | } | 477 | 2.60k | if constexpr (T == TYPE_BOOLEAN) { | 478 | 2.60k | if (col.IsBool()) { | 479 | 2.60k | val = col.GetBool(); | 480 | 2.60k | return Status::OK(); | 481 | 2.60k | } | 482 | | | 483 | 0 | if (col.IsNumber()) { | 484 | 0 | val = static_cast<typename PrimitiveTypeTraits<T>::CppType>(col.GetInt()); | 485 | 0 | return Status::OK(); | 486 | 0 | } | 487 | | | 488 | 0 | bool is_nested_str = false; | 489 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { | 490 | 0 | val = col[0].GetBool(); | 491 | 0 | return Status::OK(); | 492 | 0 | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { | 493 | 0 | is_nested_str = true; | 494 | 0 | } else if (pure_doc_value && col.IsArray()) { | 495 | 0 | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); | 496 | 0 | } | 497 | | | 498 | 0 | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; | 499 | 0 | const std::string& str_val = str_col.GetString(); | 500 | 0 | size_t val_size = str_col.GetStringLength(); | 501 | 0 | StringParser::ParseResult result; | 502 | 0 | val = StringParser::string_to_bool(str_val.c_str(), val_size, &result); | 503 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, sub_type); | 504 | 0 | return Status::OK(); | 505 | 0 | } | 506 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "Un-supported type: {}", type_to_string(T)); | 507 | 2.60k | } |
|
508 | | |
509 | | template <PrimitiveType T> |
510 | | Status process_single_column(const rapidjson::Value& col, PrimitiveType sub_type, |
511 | 31.8k | bool pure_doc_value, Array& array) { |
512 | 31.8k | typename PrimitiveTypeTraits<T>::CppType val; |
513 | 31.8k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); |
514 | 31.8k | array.push_back(Field::create_field<T>(val)); |
515 | 31.8k | return Status::OK(); |
516 | 31.8k | } _ZN5doris21process_single_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 5.50k | bool pure_doc_value, Array& array) { | 512 | 5.50k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 5.50k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 5.50k | array.push_back(Field::create_field<T>(val)); | 515 | 5.50k | return Status::OK(); | 516 | 5.50k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 2.60k | bool pure_doc_value, Array& array) { | 512 | 2.60k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 2.60k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 2.60k | array.push_back(Field::create_field<T>(val)); | 515 | 2.60k | return Status::OK(); | 516 | 2.60k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 2.60k | bool pure_doc_value, Array& array) { | 512 | 2.60k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 2.60k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 2.60k | array.push_back(Field::create_field<T>(val)); | 515 | 2.60k | return Status::OK(); | 516 | 2.60k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 2.90k | bool pure_doc_value, Array& array) { | 512 | 2.90k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 2.90k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 2.90k | array.push_back(Field::create_field<T>(val)); | 515 | 2.90k | return Status::OK(); | 516 | 2.90k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 3.53k | bool pure_doc_value, Array& array) { | 512 | 3.53k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 3.53k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 3.53k | array.push_back(Field::create_field<T>(val)); | 515 | 3.53k | return Status::OK(); | 516 | 3.53k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 1.66k | bool pure_doc_value, Array& array) { | 512 | 1.66k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 1.66k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 1.66k | array.push_back(Field::create_field<T>(val)); | 515 | 1.66k | return Status::OK(); | 516 | 1.66k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 5.20k | bool pure_doc_value, Array& array) { | 512 | 5.20k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 5.20k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 5.20k | array.push_back(Field::create_field<T>(val)); | 515 | 5.20k | return Status::OK(); | 516 | 5.20k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 5.20k | bool pure_doc_value, Array& array) { | 512 | 5.20k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 5.20k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 5.20k | array.push_back(Field::create_field<T>(val)); | 515 | 5.20k | return Status::OK(); | 516 | 5.20k | } |
_ZN5doris21process_single_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 511 | 2.60k | bool pure_doc_value, Array& array) { | 512 | 2.60k | typename PrimitiveTypeTraits<T>::CppType val; | 513 | 2.60k | RETURN_IF_ERROR(handle_value<T>(col, sub_type, pure_doc_value, val)); | 514 | 2.60k | array.push_back(Field::create_field<T>(val)); | 515 | 2.60k | return Status::OK(); | 516 | 2.60k | } |
|
517 | | |
518 | | template <PrimitiveType T> |
519 | | Status process_column_array(const rapidjson::Value& col, PrimitiveType sub_type, |
520 | 8.64k | bool pure_doc_value, Array& array) { |
521 | 31.7k | for (const auto& sub_col : col.GetArray()) { |
522 | 31.7k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); |
523 | 31.7k | } |
524 | 8.64k | return Status::OK(); |
525 | 8.64k | } _ZN5doris20process_column_arrayILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 2.04k | bool pure_doc_value, Array& array) { | 521 | 5.45k | for (const auto& sub_col : col.GetArray()) { | 522 | 5.45k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 5.45k | } | 524 | 2.04k | return Status::OK(); | 525 | 2.04k | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 650 | bool pure_doc_value, Array& array) { | 521 | 2.60k | for (const auto& sub_col : col.GetArray()) { | 522 | 2.60k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 2.60k | } | 524 | 650 | return Status::OK(); | 525 | 650 | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 650 | bool pure_doc_value, Array& array) { | 521 | 2.60k | for (const auto& sub_col : col.GetArray()) { | 522 | 2.60k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 2.60k | } | 524 | 650 | return Status::OK(); | 525 | 650 | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 746 | bool pure_doc_value, Array& array) { | 521 | 2.85k | for (const auto& sub_col : col.GetArray()) { | 522 | 2.85k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 2.85k | } | 524 | 746 | return Status::OK(); | 525 | 746 | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 884 | bool pure_doc_value, Array& array) { | 521 | 3.53k | for (const auto& sub_col : col.GetArray()) { | 522 | 3.53k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 3.53k | } | 524 | 884 | return Status::OK(); | 525 | 884 | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 416 | bool pure_doc_value, Array& array) { | 521 | 1.66k | for (const auto& sub_col : col.GetArray()) { | 522 | 1.66k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 1.66k | } | 524 | 416 | return Status::OK(); | 525 | 416 | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 1.30k | bool pure_doc_value, Array& array) { | 521 | 5.20k | for (const auto& sub_col : col.GetArray()) { | 522 | 5.20k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 5.20k | } | 524 | 1.30k | return Status::OK(); | 525 | 1.30k | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 1.30k | bool pure_doc_value, Array& array) { | 521 | 5.20k | for (const auto& sub_col : col.GetArray()) { | 522 | 5.20k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 5.20k | } | 524 | 1.30k | return Status::OK(); | 525 | 1.30k | } |
_ZN5doris20process_column_arrayILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 520 | 650 | bool pure_doc_value, Array& array) { | 521 | 2.60k | for (const auto& sub_col : col.GetArray()) { | 522 | 2.60k | RETURN_IF_ERROR(process_single_column<T>(sub_col, sub_type, pure_doc_value, array)); | 523 | 2.60k | } | 524 | 650 | return Status::OK(); | 525 | 650 | } |
|
526 | | |
527 | | template <PrimitiveType T> |
528 | | Status process_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value, |
529 | 8.73k | Array& array) { |
530 | 8.73k | if (!col.IsArray()) { |
531 | 96 | return process_single_column<T>(col, sub_type, pure_doc_value, array); |
532 | 8.64k | } else { |
533 | 8.64k | return process_column_array<T>(col, sub_type, pure_doc_value, array); |
534 | 8.64k | } |
535 | 8.73k | } _ZN5doris14process_columnILNS_13PrimitiveTypeE23EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 2.09k | Array& array) { | 530 | 2.09k | if (!col.IsArray()) { | 531 | 48 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 2.04k | } else { | 533 | 2.04k | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 2.04k | } | 535 | 2.09k | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE3EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 650 | Array& array) { | 530 | 650 | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 650 | } else { | 533 | 650 | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 650 | } | 535 | 650 | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE4EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 650 | Array& array) { | 530 | 650 | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 650 | } else { | 533 | 650 | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 650 | } | 535 | 650 | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE5EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 794 | Array& array) { | 530 | 794 | if (!col.IsArray()) { | 531 | 48 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 746 | } else { | 533 | 746 | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 746 | } | 535 | 794 | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE6EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 884 | Array& array) { | 530 | 884 | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 884 | } else { | 533 | 884 | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 884 | } | 535 | 884 | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE7EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 416 | Array& array) { | 530 | 416 | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 416 | } else { | 533 | 416 | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 416 | } | 535 | 416 | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE8EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 1.30k | Array& array) { | 530 | 1.30k | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 1.30k | } else { | 533 | 1.30k | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 1.30k | } | 535 | 1.30k | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE9EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 1.30k | Array& array) { | 530 | 1.30k | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 1.30k | } else { | 533 | 1.30k | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 1.30k | } | 535 | 1.30k | } |
_ZN5doris14process_columnILNS_13PrimitiveTypeE2EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayE Line | Count | Source | 529 | 650 | Array& array) { | 530 | 650 | if (!col.IsArray()) { | 531 | 0 | return process_single_column<T>(col, sub_type, pure_doc_value, array); | 532 | 650 | } else { | 533 | 650 | return process_column_array<T>(col, sub_type, pure_doc_value, array); | 534 | 650 | } | 535 | 650 | } |
|
536 | | |
537 | | template <PrimitiveType T> |
538 | | Status process_date_column(const rapidjson::Value& col, PrimitiveType sub_type, bool pure_doc_value, |
539 | 1.30k | Array& array, const cctz::time_zone& time_zone) { |
540 | 1.30k | if (!col.IsArray()) { |
541 | 0 | typename PrimitiveTypeTraits<T>::CppType data; |
542 | 0 | RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone))); |
543 | 0 | array.push_back(Field::create_field<T>(data)); |
544 | 1.30k | } else { |
545 | 2.60k | for (const auto& sub_col : col.GetArray()) { |
546 | 2.60k | typename PrimitiveTypeTraits<T>::CppType data; |
547 | 2.60k | RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone))); |
548 | 2.60k | array.push_back(Field::create_field<T>(data)); |
549 | 2.60k | } |
550 | 1.30k | } |
551 | 1.30k | return Status::OK(); |
552 | 1.30k | } _ZN5doris19process_date_columnILNS_13PrimitiveTypeE25EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE Line | Count | Source | 539 | 650 | Array& array, const cctz::time_zone& time_zone) { | 540 | 650 | if (!col.IsArray()) { | 541 | 0 | typename PrimitiveTypeTraits<T>::CppType data; | 542 | 0 | RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone))); | 543 | 0 | array.push_back(Field::create_field<T>(data)); | 544 | 650 | } else { | 545 | 1.30k | for (const auto& sub_col : col.GetArray()) { | 546 | 1.30k | typename PrimitiveTypeTraits<T>::CppType data; | 547 | 1.30k | RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone))); | 548 | 1.30k | array.push_back(Field::create_field<T>(data)); | 549 | 1.30k | } | 550 | 650 | } | 551 | 650 | return Status::OK(); | 552 | 650 | } |
_ZN5doris19process_date_columnILNS_13PrimitiveTypeE26EEENS_6StatusERKN9rapidjson12GenericValueINS3_4UTF8IcEENS3_19MemoryPoolAllocatorINS3_12CrtAllocatorEEEEES1_bRNS_5ArrayERKN4cctz9time_zoneE Line | Count | Source | 539 | 650 | Array& array, const cctz::time_zone& time_zone) { | 540 | 650 | if (!col.IsArray()) { | 541 | 0 | typename PrimitiveTypeTraits<T>::CppType data; | 542 | 0 | RETURN_IF_ERROR((get_date_int<T>(col, sub_type, pure_doc_value, &data, time_zone))); | 543 | 0 | array.push_back(Field::create_field<T>(data)); | 544 | 650 | } else { | 545 | 1.30k | for (const auto& sub_col : col.GetArray()) { | 546 | 1.30k | typename PrimitiveTypeTraits<T>::CppType data; | 547 | 1.30k | RETURN_IF_ERROR((get_date_int<T>(sub_col, sub_type, pure_doc_value, &data, time_zone))); | 548 | 1.30k | array.push_back(Field::create_field<T>(data)); | 549 | 1.30k | } | 550 | 650 | } | 551 | 650 | return Status::OK(); | 552 | 650 | } |
|
553 | | |
554 | | Status process_jsonb_column(const rapidjson::Value& col, PrimitiveType sub_type, |
555 | 144 | bool pure_doc_value, Array& array) { |
556 | 144 | if (!col.IsArray()) { |
557 | 0 | JsonBinaryValue jsonb_value; |
558 | 0 | RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col))); |
559 | 0 | JsonbField json(jsonb_value.value(), jsonb_value.size()); |
560 | 0 | array.push_back(Field::create_field<TYPE_JSONB>(std::move(json))); |
561 | 144 | } else { |
562 | 304 | for (const auto& sub_col : col.GetArray()) { |
563 | 304 | JsonBinaryValue jsonb_value; |
564 | 304 | RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(sub_col))); |
565 | 304 | JsonbField json(jsonb_value.value(), jsonb_value.size()); |
566 | 304 | array.push_back(Field::create_field<TYPE_JSONB>(json)); |
567 | 304 | } |
568 | 144 | } |
569 | 144 | return Status::OK(); |
570 | 144 | } |
571 | | |
572 | | Status ScrollParser::parse_column(const rapidjson::Value& col, PrimitiveType sub_type, |
573 | | bool pure_doc_value, Array& array, |
574 | 10.1k | const cctz::time_zone& time_zone) { |
575 | 10.1k | switch (sub_type) { |
576 | 0 | case TYPE_CHAR: |
577 | 0 | case TYPE_VARCHAR: |
578 | 2.09k | case TYPE_STRING: |
579 | 2.09k | return process_column<TYPE_STRING>(col, sub_type, pure_doc_value, array); |
580 | 650 | case TYPE_TINYINT: |
581 | 650 | return process_column<TYPE_TINYINT>(col, sub_type, pure_doc_value, array); |
582 | 650 | case TYPE_SMALLINT: |
583 | 650 | return process_column<TYPE_SMALLINT>(col, sub_type, pure_doc_value, array); |
584 | 794 | case TYPE_INT: |
585 | 794 | return process_column<TYPE_INT>(col, sub_type, pure_doc_value, array); |
586 | 884 | case TYPE_BIGINT: |
587 | 884 | return process_column<TYPE_BIGINT>(col, sub_type, pure_doc_value, array); |
588 | 416 | case TYPE_LARGEINT: |
589 | 416 | return process_column<TYPE_LARGEINT>(col, sub_type, pure_doc_value, array); |
590 | 1.30k | case TYPE_FLOAT: |
591 | 1.30k | return process_column<TYPE_FLOAT>(col, sub_type, pure_doc_value, array); |
592 | 1.30k | case TYPE_DOUBLE: |
593 | 1.30k | return process_column<TYPE_DOUBLE>(col, sub_type, pure_doc_value, array); |
594 | 650 | case TYPE_BOOLEAN: |
595 | 650 | return process_column<TYPE_BOOLEAN>(col, sub_type, pure_doc_value, array); |
596 | | // date/datetime v2 is the default type for catalog table, |
597 | | // see https://github.com/apache/doris/pull/16304 |
598 | | // No need to support date and datetime types. |
599 | 650 | case TYPE_DATEV2: { |
600 | 650 | return process_date_column<TYPE_DATEV2>(col, sub_type, pure_doc_value, array, time_zone); |
601 | 0 | } |
602 | 650 | case TYPE_DATETIMEV2: { |
603 | 650 | return process_date_column<TYPE_DATETIMEV2>(col, sub_type, pure_doc_value, array, |
604 | 650 | time_zone); |
605 | 0 | } |
606 | 144 | case TYPE_JSONB: { |
607 | 144 | return process_jsonb_column(col, sub_type, pure_doc_value, array); |
608 | 0 | } |
609 | 0 | default: |
610 | 0 | LOG(ERROR) << "Do not support Array type: " << sub_type; |
611 | 0 | return Status::InternalError("Unsupported type"); |
612 | 10.1k | } |
613 | 10.1k | } |
614 | | |
615 | 640 | ScrollParser::ScrollParser(bool doc_value_mode) : _size(0), _line_index(0) {} |
616 | | |
617 | 640 | ScrollParser::~ScrollParser() = default; |
618 | | |
619 | 640 | Status ScrollParser::parse(const std::string& scroll_result, bool exactly_once) { |
620 | | // rely on `_size !=0 ` to determine whether scroll ends |
621 | 640 | _size = 0; |
622 | 640 | _document_node.Parse(scroll_result.c_str(), scroll_result.length()); |
623 | 640 | if (_document_node.HasParseError()) { |
624 | 0 | return Status::InternalError("Parsing json error, json is: {}", scroll_result); |
625 | 0 | } |
626 | | |
627 | 640 | if (!exactly_once && !_document_node.HasMember(FIELD_SCROLL_ID)) { |
628 | 0 | LOG(WARNING) << "Document has not a scroll id field scroll response:" << scroll_result; |
629 | 0 | return Status::InternalError("Document has not a scroll id field"); |
630 | 0 | } |
631 | | |
632 | 640 | if (!exactly_once) { |
633 | 640 | const rapidjson::Value& scroll_node = _document_node[FIELD_SCROLL_ID]; |
634 | 640 | _scroll_id = scroll_node.GetString(); |
635 | 640 | } |
636 | | // { hits: { total : 2, "hits" : [ {}, {}, {} ]}} |
637 | 640 | const rapidjson::Value& outer_hits_node = _document_node[FIELD_HITS]; |
638 | | // if has no inner hits, there has no data in this index |
639 | 640 | if (!outer_hits_node.HasMember(FIELD_INNER_HITS)) { |
640 | 4 | return Status::OK(); |
641 | 4 | } |
642 | 636 | const rapidjson::Value& inner_hits_node = outer_hits_node[FIELD_INNER_HITS]; |
643 | | // this happened just the end of scrolling |
644 | 636 | if (!inner_hits_node.IsArray()) { |
645 | 0 | return Status::OK(); |
646 | 0 | } |
647 | 636 | _inner_hits_node.CopyFrom(inner_hits_node, _document_node.GetAllocator()); |
648 | | // how many documents contains in this batch |
649 | 636 | _size = _inner_hits_node.Size(); |
650 | 636 | return Status::OK(); |
651 | 636 | } |
652 | | |
653 | 1.27k | int ScrollParser::get_size() const { |
654 | 1.27k | return _size; |
655 | 1.27k | } |
656 | | |
657 | 640 | const std::string& ScrollParser::get_scroll_id() { |
658 | 640 | return _scroll_id; |
659 | 640 | } |
660 | | |
661 | | Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, |
662 | | std::vector<MutableColumnPtr>& columns, bool* line_eof, |
663 | | const std::map<std::string, std::string>& docvalue_context, |
664 | 2.82k | const cctz::time_zone& time_zone) { |
665 | 2.82k | *line_eof = true; |
666 | | |
667 | 2.82k | if (_size <= 0 || _line_index >= _size) { |
668 | 636 | return Status::OK(); |
669 | 636 | } |
670 | | |
671 | 2.18k | const rapidjson::Value& obj = _inner_hits_node[_line_index++]; |
672 | 2.18k | bool pure_doc_value = false; |
673 | 2.18k | if (obj.HasMember("fields")) { |
674 | 712 | pure_doc_value = true; |
675 | 712 | } |
676 | | // obj may be neither have `_source` nor `fields` field. |
677 | 2.18k | const rapidjson::Value* line = nullptr; |
678 | 2.18k | if (obj.HasMember(FIELD_SOURCE)) { |
679 | 1.46k | line = &obj[FIELD_SOURCE]; |
680 | 1.46k | } else if (obj.HasMember("fields")) { |
681 | 712 | line = &obj["fields"]; |
682 | 712 | } |
683 | | |
684 | 19.0k | for (int i = 0; i < tuple_desc->slots().size(); ++i) { |
685 | 16.8k | const SlotDescriptor* slot_desc = tuple_desc->slots()[i]; |
686 | 16.8k | auto* col_ptr = columns[i].get(); |
687 | | |
688 | 16.8k | if (slot_desc->col_name() == FIELD_ID) { |
689 | | // actually this branch will not be reached, this is guaranteed by Doris FE. |
690 | 0 | if (pure_doc_value) { |
691 | 0 | return Status::RuntimeError("obtain `_id` is not supported in doc_values mode"); |
692 | 0 | } |
693 | | // obj[FIELD_ID] must not be NULL |
694 | 0 | std::string _id = obj[FIELD_ID].GetString(); |
695 | 0 | size_t len = _id.length(); |
696 | |
|
697 | 0 | col_ptr->insert_data(const_cast<const char*>(_id.data()), len); |
698 | 0 | continue; |
699 | 0 | } |
700 | | |
701 | 16.8k | const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str() |
702 | 16.8k | : slot_desc->col_name().c_str(); |
703 | | |
704 | 16.8k | if (line == nullptr || line->FindMember(col_name) == line->MemberEnd()) { |
705 | 554 | if (slot_desc->is_nullable()) { |
706 | 554 | auto* nullable_column = reinterpret_cast<ColumnNullable*>(col_ptr); |
707 | 554 | nullable_column->insert_data(nullptr, 0); |
708 | 554 | continue; |
709 | 554 | } else { |
710 | 0 | std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name); |
711 | 0 | return Status::RuntimeError(details); |
712 | 0 | } |
713 | 554 | } |
714 | | |
715 | 16.2k | const rapidjson::Value& col = (*line)[col_name]; |
716 | | |
717 | 16.2k | auto type = slot_desc->type()->get_primitive_type(); |
718 | | |
719 | | // when the column value is null, the subsequent type casting will report an error |
720 | 16.2k | if (col.IsNull() && slot_desc->is_nullable()) { |
721 | 4 | col_ptr->insert_data(nullptr, 0); |
722 | 4 | continue; |
723 | 16.2k | } else if (col.IsNull() && !slot_desc->is_nullable()) { |
724 | 0 | std::string details = absl::Substitute(INVALID_NULL_VALUE, col_name); |
725 | 0 | return Status::RuntimeError(details); |
726 | 0 | } |
727 | 16.2k | switch (type) { |
728 | 0 | case TYPE_CHAR: |
729 | 0 | case TYPE_VARCHAR: |
730 | 1.88k | case TYPE_STRING: { |
731 | | // sometimes elasticsearch user post some not-string value to Elasticsearch Index. |
732 | | // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation |
733 | | // this may be a tricky, but we can work around this issue |
734 | 1.88k | std::string val; |
735 | 1.88k | if (pure_doc_value) { |
736 | 4 | if (col.Empty()) { |
737 | 0 | break; |
738 | 4 | } else if (col.Size() > 1) { |
739 | | // doc_values with multiple elements means actual array data |
740 | | // in ES keyword/text field, serialize as JSON array string |
741 | 0 | val = json_value_to_string(col); |
742 | 4 | } else if (!col[0].IsString()) { |
743 | 0 | val = json_value_to_string(col[0]); |
744 | 4 | } else { |
745 | 4 | val = col[0].GetString(); |
746 | 4 | } |
747 | 1.88k | } else { |
748 | | // When ES mapping is keyword/text but actual data is an array, |
749 | | // serialize the array to JSON string instead of throwing an error. |
750 | | // This is valid in ES since any field can hold array values. |
751 | 1.88k | if (col.IsArray()) { |
752 | 64 | val = json_value_to_string(col); |
753 | 1.81k | } else if (!col.IsString()) { |
754 | 0 | val = json_value_to_string(col); |
755 | 1.81k | } else { |
756 | 1.81k | val = col.GetString(); |
757 | 1.81k | } |
758 | 1.88k | } |
759 | 1.88k | size_t val_size = val.length(); |
760 | 1.88k | col_ptr->insert_data(const_cast<const char*>(val.data()), val_size); |
761 | 1.88k | break; |
762 | 1.88k | } |
763 | | |
764 | 0 | case TYPE_TINYINT: { |
765 | 0 | RETURN_IF_ERROR(insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, |
766 | 0 | slot_desc->is_nullable())); |
767 | 0 | break; |
768 | 0 | } |
769 | | |
770 | 0 | case TYPE_SMALLINT: { |
771 | 0 | RETURN_IF_ERROR(insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, |
772 | 0 | slot_desc->is_nullable())); |
773 | 0 | break; |
774 | 0 | } |
775 | | |
776 | 44 | case TYPE_INT: { |
777 | 44 | RETURN_IF_ERROR(insert_int_value<int32_t>(col, type, col_ptr, pure_doc_value, |
778 | 44 | slot_desc->is_nullable())); |
779 | 44 | break; |
780 | 44 | } |
781 | | |
782 | 920 | case TYPE_BIGINT: { |
783 | 920 | RETURN_IF_ERROR(insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, |
784 | 920 | slot_desc->is_nullable())); |
785 | 920 | break; |
786 | 920 | } |
787 | | |
788 | 920 | case TYPE_LARGEINT: { |
789 | 0 | RETURN_IF_ERROR(insert_int_value<__int128>(col, type, col_ptr, pure_doc_value, |
790 | 0 | slot_desc->is_nullable())); |
791 | 0 | break; |
792 | 0 | } |
793 | | |
794 | 344 | case TYPE_DOUBLE: { |
795 | 344 | RETURN_IF_ERROR(insert_float_value<double>(col, type, col_ptr, pure_doc_value, |
796 | 344 | slot_desc->is_nullable())); |
797 | 344 | break; |
798 | 344 | } |
799 | | |
800 | 344 | case TYPE_FLOAT: { |
801 | 0 | RETURN_IF_ERROR(insert_float_value<float>(col, type, col_ptr, pure_doc_value, |
802 | 0 | slot_desc->is_nullable())); |
803 | 0 | break; |
804 | 0 | } |
805 | | |
806 | 0 | case TYPE_BOOLEAN: { |
807 | 0 | if (col.IsBool()) { |
808 | 0 | int8_t val = col.GetBool(); |
809 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0); |
810 | 0 | break; |
811 | 0 | } |
812 | | |
813 | 0 | if (col.IsNumber()) { |
814 | 0 | int8_t val = static_cast<int8_t>(col.GetInt()); |
815 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0); |
816 | 0 | break; |
817 | 0 | } |
818 | | |
819 | 0 | bool is_nested_str = false; |
820 | 0 | if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsBool()) { |
821 | 0 | int8_t val = col[0].GetBool(); |
822 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0); |
823 | 0 | break; |
824 | 0 | } else if (pure_doc_value && col.IsArray() && !col.Empty() && col[0].IsString()) { |
825 | 0 | is_nested_str = true; |
826 | 0 | } else if (pure_doc_value && col.IsArray()) { |
827 | 0 | return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); |
828 | 0 | } |
829 | | |
830 | 0 | const rapidjson::Value& str_col = is_nested_str ? col[0] : col; |
831 | |
|
832 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
833 | | |
834 | 0 | const std::string& val = str_col.GetString(); |
835 | 0 | size_t val_size = str_col.GetStringLength(); |
836 | 0 | StringParser::ParseResult result; |
837 | 0 | bool b = StringParser::string_to_bool(val.c_str(), val_size, &result); |
838 | 0 | RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type); |
839 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0); |
840 | 0 | break; |
841 | 0 | } |
842 | 0 | case TYPE_DECIMALV2: { |
843 | 0 | DecimalV2Value data; |
844 | |
|
845 | 0 | if (col.IsDouble()) { |
846 | 0 | data.assign_from_double(col.GetDouble()); |
847 | 0 | } else { |
848 | 0 | std::string val; |
849 | 0 | if (pure_doc_value) { |
850 | 0 | if (col.Empty()) { |
851 | 0 | break; |
852 | 0 | } else if (!col[0].IsString()) { |
853 | 0 | val = json_value_to_string(col[0]); |
854 | 0 | } else { |
855 | 0 | val = col[0].GetString(); |
856 | 0 | } |
857 | 0 | } else { |
858 | 0 | RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); |
859 | 0 | if (!col.IsString()) { |
860 | 0 | val = json_value_to_string(col); |
861 | 0 | } else { |
862 | 0 | val = col.GetString(); |
863 | 0 | } |
864 | 0 | } |
865 | 0 | data.parse_from_str(val.data(), static_cast<int32_t>(val.length())); |
866 | 0 | } |
867 | 0 | col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&data)), 0); |
868 | 0 | break; |
869 | 0 | } |
870 | | |
871 | 0 | case TYPE_DATE: |
872 | 0 | RETURN_IF_ERROR( |
873 | 0 | fill_date_int<TYPE_DATE>(col, type, pure_doc_value, col_ptr, time_zone)); |
874 | 0 | break; |
875 | 0 | case TYPE_DATETIME: |
876 | 0 | RETURN_IF_ERROR( |
877 | 0 | fill_date_int<TYPE_DATETIME>(col, type, pure_doc_value, col_ptr, time_zone)); |
878 | 0 | break; |
879 | 0 | case TYPE_DATEV2: |
880 | 0 | RETURN_IF_ERROR( |
881 | 0 | fill_date_int<TYPE_DATEV2>(col, type, pure_doc_value, col_ptr, time_zone)); |
882 | 0 | break; |
883 | 1.78k | case TYPE_DATETIMEV2: { |
884 | 1.78k | RETURN_IF_ERROR( |
885 | 1.78k | fill_date_int<TYPE_DATETIMEV2>(col, type, pure_doc_value, col_ptr, time_zone)); |
886 | 1.78k | break; |
887 | 1.78k | } |
888 | 10.1k | case TYPE_ARRAY: { |
889 | 10.1k | Array array; |
890 | 10.1k | const auto& sub_type = assert_cast<const DataTypeArray*>( |
891 | 10.1k | remove_nullable(tuple_desc->slots()[i]->type()).get()) |
892 | 10.1k | ->get_nested_type() |
893 | 10.1k | ->get_primitive_type(); |
894 | 10.1k | RETURN_IF_ERROR(parse_column(col, sub_type, pure_doc_value, array, time_zone)); |
895 | 10.1k | col_ptr->insert(Field::create_field<TYPE_ARRAY>(array)); |
896 | 10.1k | break; |
897 | 10.1k | } |
898 | 1.13k | case TYPE_JSONB: { |
899 | 1.13k | JsonBinaryValue jsonb_value; |
900 | 1.13k | RETURN_IF_ERROR(jsonb_value.from_json_string(json_value_to_string(col))); |
901 | 1.13k | JsonbField json(jsonb_value.value(), jsonb_value.size()); |
902 | 1.13k | col_ptr->insert(Field::create_field<TYPE_JSONB>(json)); |
903 | 1.13k | break; |
904 | 1.13k | } |
905 | 0 | default: { |
906 | 0 | LOG(ERROR) << "Unsupported data type: " << type_to_string(type); |
907 | 0 | DCHECK(false); |
908 | 0 | break; |
909 | 1.13k | } |
910 | 16.2k | } |
911 | 16.2k | } |
912 | | |
913 | 2.18k | *line_eof = false; |
914 | 2.18k | return Status::OK(); |
915 | 2.18k | } |
916 | | } // namespace doris |