ContentFileEstimator.java
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.datasource.iceberg.cache;
import org.apache.iceberg.ContentFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.StructLike;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
/**
* Utility to estimate the JVM weight of Iceberg {@link ContentFile} objects.
*/
public final class ContentFileEstimator {
private static final long LIST_BASE_WEIGHT = 48L;
private static final long OBJECT_REFERENCE_WEIGHT = 8L;
private static final long CONTENT_FILE_BASE_WEIGHT = 256L;
private static final long STRING_BASE_WEIGHT = 40L;
private static final long CHAR_BYTES = 2L;
private static final long BYTE_BUFFER_BASE_WEIGHT = 16L;
private static final long MAP_BASE_WEIGHT = 48L;
private static final long MAP_ENTRY_OVERHEAD = 24L;
private static final long LONG_OBJECT_WEIGHT = 24L;
private static final long INT_OBJECT_WEIGHT = 16L;
private static final long PARTITION_BASE_WEIGHT = 48L;
private static final long PARTITION_VALUE_BASE_WEIGHT = 8L;
private ContentFileEstimator() {
}
public static long estimate(List<? extends ContentFile<?>> files) {
return listReferenceWeight(files) + estimateContentFilesWeight(files);
}
private static long listReferenceWeight(List<?> files) {
if (files == null || files.isEmpty()) {
return 0L;
}
return LIST_BASE_WEIGHT + (long) files.size() * OBJECT_REFERENCE_WEIGHT;
}
private static long estimateContentFilesWeight(List<? extends ContentFile<?>> files) {
long total = 0L;
if (files == null) {
return 0L;
}
for (ContentFile<?> file : files) {
total += estimateContentFileWeight(file);
}
return total;
}
private static long estimateContentFileWeight(ContentFile<?> file) {
if (file == null) {
return 0L;
}
long weight = CONTENT_FILE_BASE_WEIGHT;
weight += charSequenceWeight(file.path());
weight += stringWeight(file.manifestLocation());
weight += byteBufferWeight(file.keyMetadata());
weight += partitionWeight(file.partition());
weight += numericMapWeight(file.columnSizes());
weight += numericMapWeight(file.valueCounts());
weight += numericMapWeight(file.nullValueCounts());
weight += numericMapWeight(file.nanValueCounts());
weight += byteBufferMapWeight(file.lowerBounds());
weight += byteBufferMapWeight(file.upperBounds());
weight += listWeight(file.splitOffsets(), LONG_OBJECT_WEIGHT);
weight += listWeight(file.equalityFieldIds(), INT_OBJECT_WEIGHT);
weight += optionalLongWeight(file.pos());
weight += optionalLongWeight(file.dataSequenceNumber());
weight += optionalLongWeight(file.fileSequenceNumber());
weight += optionalLongWeight(file.firstRowId());
weight += optionalIntWeight(file.sortOrderId());
if (file instanceof DeleteFile) {
DeleteFile deleteFile = (DeleteFile) file;
weight += stringWeight(deleteFile.referencedDataFile());
weight += optionalLongWeight(deleteFile.contentOffset());
weight += optionalLongWeight(deleteFile.contentSizeInBytes());
}
return weight;
}
private static long listWeight(List<? extends Number> list, long elementWeight) {
if (list == null || list.isEmpty()) {
return 0L;
}
return LIST_BASE_WEIGHT + (long) list.size() * (OBJECT_REFERENCE_WEIGHT + elementWeight);
}
private static long numericMapWeight(Map<Integer, Long> map) {
if (map == null || map.isEmpty()) {
return 0L;
}
return MAP_BASE_WEIGHT + (long) map.size() * (MAP_ENTRY_OVERHEAD + LONG_OBJECT_WEIGHT);
}
private static long byteBufferMapWeight(Map<Integer, ByteBuffer> map) {
if (map == null || map.isEmpty()) {
return 0L;
}
long weight = MAP_BASE_WEIGHT + (long) map.size() * MAP_ENTRY_OVERHEAD;
for (ByteBuffer buffer : map.values()) {
weight += byteBufferWeight(buffer);
}
return weight;
}
private static long partitionWeight(StructLike partition) {
if (partition == null) {
return 0L;
}
long weight = PARTITION_BASE_WEIGHT + (long) partition.size() * PARTITION_VALUE_BASE_WEIGHT;
for (int i = 0; i < partition.size(); i++) {
Object value = partition.get(i, Object.class);
weight += estimateValueWeight(value);
}
return weight;
}
private static long estimateValueWeight(Object value) {
if (value == null) {
return 0L;
}
if (value instanceof CharSequence) {
return charSequenceWeight((CharSequence) value);
} else if (value instanceof byte[]) {
return BYTE_BUFFER_BASE_WEIGHT + ((byte[]) value).length;
} else if (value instanceof ByteBuffer) {
return byteBufferWeight((ByteBuffer) value);
} else if (value instanceof Long || value instanceof Double) {
return LONG_OBJECT_WEIGHT;
} else if (value instanceof Integer || value instanceof Float) {
return INT_OBJECT_WEIGHT;
} else if (value instanceof Short || value instanceof Character) {
return 4L;
} else if (value instanceof Boolean) {
return 1L;
}
return OBJECT_REFERENCE_WEIGHT;
}
private static long charSequenceWeight(CharSequence value) {
if (value == null) {
return 0L;
}
return STRING_BASE_WEIGHT + (long) value.length() * CHAR_BYTES;
}
private static long stringWeight(String value) {
if (value == null) {
return 0L;
}
return STRING_BASE_WEIGHT + (long) value.length() * CHAR_BYTES;
}
private static long byteBufferWeight(ByteBuffer buffer) {
if (buffer == null) {
return 0L;
}
return BYTE_BUFFER_BASE_WEIGHT + buffer.remaining();
}
private static long optionalLongWeight(Long value) {
return value == null ? 0L : LONG_OBJECT_WEIGHT;
}
private static long optionalIntWeight(Integer value) {
return value == null ? 0L : INT_OBJECT_WEIGHT;
}
}