From 9a6092ddbbd694ee3962b549b333f1acf0abfd2f Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 29 Sep 2022 17:21:28 +0100 Subject: [PATCH 1/9] Create SizeEstimator.java --- .../exec/store/openTSDB/SizeEstimator.java | 192 ++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java new file mode 100644 index 00000000000..844424d9517 --- /dev/null +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.openTSDB; + +import org.apache.drill.exec.store.openTSDB.schema.OpenTSDBSchemaFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.management.MBeanServer; +import java.lang.management.ManagementFactory; +import java.lang.reflect.Array; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.ConcurrentHashMap; + +class SizeEstimator { + + private static final Logger logger = LoggerFactory.getLogger(SizeEstimator.class); + + /** + * The state of an ongoing size estimation. Contains a stack of objects to visit as well as an + * IdentityHashMap of visited objects, and provides utility methods for enqueueing new objects + * to visit. + */ + private static class SearchState { + + private final IdentityHashMap visited; + final LinkedList stack = new LinkedList<>(); + long size = 0L; + + SearchState(IdentityHashMap visited) { + this.visited = visited; + } + + void enqueue(Object obj) { + if (obj != null && !visited.containsKey(obj)) { + visited.put(obj, null); + stack.add(obj); + } + } + + boolean isFinished() { + return stack.isEmpty(); + } + + Object dequeue() { + Object elem = stack.removeLast(); + return elem; + } + } + + /** + * Cached information about each class. We remember two things: the "shell size" of the class + * (size of all non-static fields plus the java.lang.Object size), and any fields that are + * pointers to objects. + */ + private static class ClassInfo { + private final long shellSize; + private final List pointerFields; + + ClassInfo(final long shellSize, final List pointerFields) { + this.shellSize = shellSize; + this.pointerFields = pointerFields; + } + + long getShellSize() { + return shellSize; + } + + List getPointerFields() { + return pointerFields; + } + } + + // Sizes of primitive types + private static final int BYTE_SIZE = 1; + private static final int BOOLEAN_SIZE = 1; + private static final int CHAR_SIZE = 2; + private static final int SHORT_SIZE = 2; + private static final int INT_SIZE = 4; + private static final int LONG_SIZE = 8; + private static final int FLOAT_SIZE = 4; + private static final int DOUBLE_SIZE = 8; + + // Alignment boundary for objects + // TODO: Is this arch dependent ? + private static final int ALIGN_SIZE = 8; + + // A cache of ClassInfo objects for each class + private static final ConcurrentHashMap, ClassInfo> classInfos = new ConcurrentHashMap<>(); + + // Object and pointer sizes are arch dependent + private static boolean is64bit = false; + + // Size of an object reference + // Based on https://wikis.oracle.com/display/HotSpotInternals/CompressedOops + private static boolean isCompressedOops = false; + private static int pointerSize = 4; + + // Minimum size of a java.lang.Object + private static int objectSize = 8; + + static { + // Sets object size, pointer size based on architecture and CompressedOops settings + // from the JVM. + is64bit = System.getProperty("os.arch").contains("64"); + isCompressedOops = getIsCompressedOops(); + + objectSize = !is64bit ? 8 : (!isCompressedOops ? 16 : 12); + pointerSize = (is64bit && !isCompressedOops) ? 8 : 4; + classInfos.clear(); + classInfos.put(Object.class, new ClassInfo(objectSize, Collections.emptyList())); + } + + private static boolean getIsCompressedOops() { + // This is only used by tests to override the detection of compressed oops. The test + // actually uses a system property instead of a SparkConf, so we'll stick with that. + if (System.getProperty("spark.test.useCompressedOops") != null) { + return Boolean.getBoolean("spark.test.useCompressedOops"); + } + + try { + final String hotSpotMBeanName = "com.sun.management:type=HotSpotDiagnostic"; + final MBeanServer server = ManagementFactory.getPlatformMBeanServer(); + + // NOTE: This should throw an exception in non-Sun JVMs + final Class hotSpotMBeanClass = Class.forName("com.sun.management.HotSpotDiagnosticMXBean"); + final Method getVMMethod = hotSpotMBeanClass.getDeclaredMethod("getVMOption", + Class.forName("java.lang.String")); + + final Object bean = ManagementFactory.newPlatformMXBeanProxy(server, + hotSpotMBeanName, hotSpotMBeanClass); + // TODO: We could use reflection on the VMOption returned ? + return getVMMethod.invoke(bean, "UseCompressedOops").toString().contains("true"); + } catch(Exception e) { + // Guess whether they've enabled UseCompressedOops based on whether maxMemory < 32 GB + final boolean guess = Runtime.getRuntime().maxMemory() < (32L*1024*1024*1024); + logger.warn("Failed to check whether UseCompressedOops is set; assuming {}", guess); + return guess; + } + } + + private long primitiveSize(Class cls) { + if (cls == byte.class) { + return BYTE_SIZE; + } else if (cls == boolean.class) { + return BOOLEAN_SIZE; + } else if (cls == char.class) { + return CHAR_SIZE; + } else if (cls == short.class) { + return SHORT_SIZE; + } else if (cls == int.class) { + return INT_SIZE; + } else if (cls == long.class) { + return LONG_SIZE; + } else if (cls == float.class) { + return FLOAT_SIZE; + } else if (cls == double.class) { + return DOUBLE_SIZE; + } else { + throw new IllegalArgumentException( + "Non-primitive class " + cls + " passed to primitiveSize()"); + } + } + + private long alignSize(long size) { + final long rem = size % ALIGN_SIZE; + return (rem == 0) ? size : (size + ALIGN_SIZE - rem); + } +} From 43528c21adec24f109398126a57f1a51806d6ca1 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 29 Sep 2022 21:05:07 +0100 Subject: [PATCH 2/9] wip --- contrib/storage-opentsdb/pom.xml | 5 - .../store/openTSDB/OpenTSDBGroupScan.java | 1 - .../exec/store/openTSDB/SizeEstimator.java | 191 ++++++++++++++++-- 3 files changed, 179 insertions(+), 18 deletions(-) diff --git a/contrib/storage-opentsdb/pom.xml b/contrib/storage-opentsdb/pom.xml index e8bde23195f..cece448922f 100644 --- a/contrib/storage-opentsdb/pom.xml +++ b/contrib/storage-opentsdb/pom.xml @@ -75,11 +75,6 @@ converter-jackson ${retrofit.version} - - com.madhukaraphatak - java-sizeof_2.11 - 0.1 - diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/OpenTSDBGroupScan.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/OpenTSDBGroupScan.java index 680090368b6..8969a2d1b1e 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/OpenTSDBGroupScan.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/OpenTSDBGroupScan.java @@ -24,7 +24,6 @@ import com.fasterxml.jackson.annotation.JsonTypeName; import org.apache.drill.shaded.guava.com.google.common.base.Preconditions; import org.apache.drill.shaded.guava.com.google.common.collect.Lists; -import com.madhukaraphatak.sizeof.SizeEstimator; import org.apache.drill.common.exceptions.ExecutionSetupException; import org.apache.drill.common.expression.SchemaPath; import org.apache.drill.exec.physical.base.AbstractGroupScan; diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java index 844424d9517..7ab8e02dc6f 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -17,27 +17,26 @@ */ package org.apache.drill.exec.store.openTSDB; -import org.apache.drill.exec.store.openTSDB.schema.OpenTSDBSchemaFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.management.MBeanServer; import java.lang.management.ManagementFactory; -import java.lang.reflect.Array; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.lang.reflect.Modifier; -import java.util.ArrayList; -import java.util.Collections; +import java.util.HashSet; import java.util.IdentityHashMap; import java.util.LinkedList; -import java.util.List; import java.util.Random; import java.util.concurrent.ConcurrentHashMap; class SizeEstimator { private static final Logger logger = LoggerFactory.getLogger(SizeEstimator.class); + // Estimate the size of arrays larger than ARRAY_SIZE_FOR_SAMPLING by sampling. + private static final int ARRAY_SIZE_FOR_SAMPLING = 400; + private static final int ARRAY_SAMPLE_SIZE = 100; // should be lower than ARRAY_SIZE_FOR_SAMPLING /** * The state of an ongoing size estimation. Contains a stack of objects to visit as well as an @@ -78,9 +77,9 @@ Object dequeue() { */ private static class ClassInfo { private final long shellSize; - private final List pointerFields; + private final LinkedList pointerFields; - ClassInfo(final long shellSize, final List pointerFields) { + ClassInfo(final long shellSize, final LinkedList pointerFields) { this.shellSize = shellSize; this.pointerFields = pointerFields; } @@ -89,7 +88,7 @@ long getShellSize() { return shellSize; } - List getPointerFields() { + LinkedList getPointerFields() { return pointerFields; } } @@ -125,13 +124,14 @@ List getPointerFields() { static { // Sets object size, pointer size based on architecture and CompressedOops settings // from the JVM. - is64bit = System.getProperty("os.arch").contains("64"); + final String arch = System.getProperty("os.arch"); + is64bit = arch.contains("64") || arch.contains("s390x"); isCompressedOops = getIsCompressedOops(); objectSize = !is64bit ? 8 : (!isCompressedOops ? 16 : 12); pointerSize = (is64bit && !isCompressedOops) ? 8 : 4; classInfos.clear(); - classInfos.put(Object.class, new ClassInfo(objectSize, Collections.emptyList())); + classInfos.put(Object.class, new ClassInfo(objectSize, new LinkedList<>())); } private static boolean getIsCompressedOops() { @@ -141,6 +141,11 @@ private static boolean getIsCompressedOops() { return Boolean.getBoolean("spark.test.useCompressedOops"); } + final String javaVendor = System.getProperty("java.vendor"); + if (javaVendor.contains("IBM") || javaVendor.contains("OpenJ9")) { + return System.getProperty("java.vm.info").contains("Compressed Ref"); + } + try { final String hotSpotMBeanName = "com.sun.management:type=HotSpotDiagnostic"; final MBeanServer server = ManagementFactory.getPlatformMBeanServer(); @@ -162,7 +167,125 @@ private static boolean getIsCompressedOops() { } } - private long primitiveSize(Class cls) { + /** + * Estimate the number of bytes that the given object takes up on the JVM heap. The estimate + * includes space taken up by objects referenced by the given object, their references, and so on + * and so forth. + * + * This is useful for determining the amount of heap space a broadcast variable will occupy on + * each executor or the amount of space each object will take when caching objects in + * deserialized form. This is not the same as the serialized size of the object, which will + * typically be much smaller. + */ + public static long estimate(final Object obj) { + return estimate(obj, new IdentityHashMap<>()); + } + + private static long estimate(final Object obj, final IdentityHashMap visited) { + final SearchState state = new SearchState(visited); + state.enqueue(obj); + while (!state.isFinished()) { + visitSingleObject(state.dequeue(), state); + } + return state.size; + } + + private static void visitSingleObject(final Object obj, final SearchState state) { + final Class cls = obj.getClass(); + if (cls.isArray()) { + visitArray(obj, cls, state); + } else if (cls.getName().startsWith("scala.reflect")) { + // Many objects in the scala.reflect package reference global reflection objects which, in + // turn, reference many other large global objects. Do nothing in this case. + } else if (obj instanceof ClassLoader || obj instanceof Class) { + // Hadoop JobConfs created in the interpreter have a ClassLoader, which greatly confuses + // the size estimator since it references the whole REPL. Do nothing in this case. In + // general all ClassLoaders and Classes will be shared between objects anyway. + } else { + final Long calculatedSize = knownSize(obj); + if (calculatedSize != null) { + state.size += calculatedSize.longValue(); + } else { + final ClassInfo classInfo = getClassInfo(cls); + state.size += alignSize(classInfo.shellSize); + for (Field field : classInfo.pointerFields) { + try { + state.enqueue(field.get(obj)); + } catch (IllegalAccessException e) { + //skip this field + } + } + } + } + } + + private static void visitArray(final Object array, final Class arrayClass, final SearchState state) { + final long length = arrayLength(array); + final Class elementClass = arrayClass.getComponentType(); + + // Arrays have object header and length field which is an integer + long arrSize = alignSize(objectSize + INT_SIZE); + + if (elementClass.isPrimitive()) { + arrSize += alignSize(length * primitiveSize(elementClass)); + state.size += arrSize; + } else { + arrSize += alignSize(length * pointerSize); + state.size += arrSize; + + if (length <= ARRAY_SIZE_FOR_SAMPLING) { + int arrayIndex = 0; + while (arrayIndex < length) { + state.enqueue(arrayApply(array, arrayIndex)); + arrayIndex += 1; + } + } else { + // Estimate the size of a large array by sampling elements without replacement. + // To exclude the shared objects that the array elements may link, sample twice + // and use the min one to calculate array size. + final Random rand = new Random(42); + final HashSet drawn = new HashSet<>(2 * ARRAY_SAMPLE_SIZE); + final long s1 = sampleArray(array, state, rand, drawn, (int) length); + final long s2 = sampleArray(array, state, rand, drawn, (int) length); + final long size = Math.min(s1, s2); + state.size += Math.max(s1, s2) + + (size * ((length - ARRAY_SAMPLE_SIZE) / ARRAY_SAMPLE_SIZE)); + } + } + } + + private static long sampleArray(final Object array, final SearchState state, final Random rand, + final HashSet drawn, final int length) { + long size = 0L; + for (int i = 0; i <= ARRAY_SAMPLE_SIZE; i++) { + int index = 0; + do { + index = rand.nextInt(length); + } while (drawn.contains(index)); + drawn.add(index); + final Object obj = arrayApply(array, index); + if (obj != null) { + size += SizeEstimator.estimate(obj, state.visited); + } + } + return size; + } + + private static Object arrayApply(final Object obj, final int index) { + if (obj instanceof Object[]) { + return ((Object[])obj)[index]; + } + throw new IllegalArgumentException("illegal input for arrayApply " + obj); + } + + private static int arrayLength(final Object obj) { + if (obj instanceof Object[]) { + return ((Object[])obj).length; + } + throw new IllegalArgumentException("illegal input for arrayLength " + obj); + } + + private static long primitiveSize(Class cls) { if (cls == byte.class) { return BYTE_SIZE; } else if (cls == boolean.class) { @@ -185,8 +308,52 @@ private long primitiveSize(Class cls) { } } - private long alignSize(long size) { + /** + * Get or compute the ClassInfo for a given class. + */ + private static ClassInfo getClassInfo(Class cls) { + // Check whether we've already cached a ClassInfo for this class + final ClassInfo info = classInfos.get(cls); + if (info != null) { + return info; + } + + final ClassInfo parent = getClassInfo(cls.getSuperclass()); + long shellSize = parent.getShellSize(); + LinkedList pointerFields = parent.getPointerFields(); + + for (Field field : cls.getDeclaredFields()) { + if (!Modifier.isStatic(field.getModifiers())) { + final Class fieldClass = field.getType(); + if (fieldClass.isPrimitive()) { + shellSize += primitiveSize(fieldClass); + } else { + field.setAccessible(true); // Enable future get()'s on this field + shellSize += pointerSize; + pointerFields.add(0, field); + } + } + } + + shellSize = alignSize(shellSize); + + // Create and cache a new ClassInfo + final ClassInfo newInfo = new ClassInfo(shellSize, pointerFields); + classInfos.put(cls, newInfo); + return newInfo; + } + + private static long alignSize(final long size) { final long rem = size % ALIGN_SIZE; return (rem == 0) ? size : (size + ALIGN_SIZE - rem); } + + private static Long knownSize(final Object obj) { + try { + final Method method = obj.getClass().getMethod("estimatedSize"); + return (Long) method.invoke(obj); + } catch (Exception e) { + return null; + } + } } From f574b75e0a5434bfeac4269c55d53d97c5d168f4 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 29 Sep 2022 21:36:01 +0100 Subject: [PATCH 3/9] Update SizeEstimator.java --- .../exec/store/openTSDB/SizeEstimator.java | 87 ++++++++++++++++--- 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java index 7ab8e02dc6f..39df0c7f663 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -17,6 +17,7 @@ */ package org.apache.drill.exec.store.openTSDB; +import com.google.common.collect.MapMaker; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -25,12 +26,15 @@ import java.lang.reflect.Field; import java.lang.reflect.Method; import java.lang.reflect.Modifier; +import java.util.ArrayList; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.LinkedList; +import java.util.List; +import java.util.Map; import java.util.Random; -import java.util.concurrent.ConcurrentHashMap; +// based on https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala class SizeEstimator { private static final Logger logger = LoggerFactory.getLogger(SizeEstimator.class); @@ -103,12 +107,18 @@ LinkedList getPointerFields() { private static final int FLOAT_SIZE = 4; private static final int DOUBLE_SIZE = 8; + // Fields can be primitive types, sizes are: 1, 2, 4, 8. Or fields can be pointers. The size of + // a pointer is 4 or 8 depending on the JVM (32-bit or 64-bit) and UseCompressedOops flag. + // The sizes should be in descending order, as we will use that information for fields placement. + private static final int MAX_FIELD_SIZE = 8; + private static final List fieldSizes = new ArrayList<>(); + // Alignment boundary for objects // TODO: Is this arch dependent ? private static final int ALIGN_SIZE = 8; // A cache of ClassInfo objects for each class - private static final ConcurrentHashMap, ClassInfo> classInfos = new ConcurrentHashMap<>(); + private static final Map, ClassInfo> classInfos = new MapMaker().weakKeys().makeMap(); // Object and pointer sizes are arch dependent private static boolean is64bit = false; @@ -122,6 +132,11 @@ LinkedList getPointerFields() { private static int objectSize = 8; static { + fieldSizes.add(8); + fieldSizes.add(4); + fieldSizes.add(2); + fieldSizes.add(1); + // Sets object size, pointer size based on architecture and CompressedOops settings // from the JVM. final String arch = System.getProperty("os.arch"); @@ -285,7 +300,7 @@ private static int arrayLength(final Object obj) { throw new IllegalArgumentException("illegal input for arrayLength " + obj); } - private static long primitiveSize(Class cls) { + private static int primitiveSize(Class cls) { if (cls == byte.class) { return BYTE_SIZE; } else if (cls == boolean.class) { @@ -321,21 +336,63 @@ private static ClassInfo getClassInfo(Class cls) { final ClassInfo parent = getClassInfo(cls.getSuperclass()); long shellSize = parent.getShellSize(); LinkedList pointerFields = parent.getPointerFields(); + final int[] sizeCount = new int[MAX_FIELD_SIZE + 1]; for (Field field : cls.getDeclaredFields()) { if (!Modifier.isStatic(field.getModifiers())) { final Class fieldClass = field.getType(); if (fieldClass.isPrimitive()) { - shellSize += primitiveSize(fieldClass); + sizeCount[primitiveSize(fieldClass)] += 1; } else { - field.setAccessible(true); // Enable future get()'s on this field - shellSize += pointerSize; - pointerFields.add(0, field); + // Note: in Java 9+ this would be better with trySetAccessible and canAccess + try { + field.setAccessible(true); // Enable future get()'s on this field + pointerFields.add(0, field); + } catch(SecurityException se) { + // do nothing + // Java 9+ can throw InaccessibleObjectException but the class is Java 9+-only + } catch (RuntimeException re) { + if (re.getClass().getSimpleName().equals("InaccessibleObjectException")) { + // do nothing + } else { + throw re; + } + } + sizeCount[pointerSize] += 1; } } } - shellSize = alignSize(shellSize); + // Based on the simulated field layout code in Aleksey Shipilev's report: + // http://cr.openjdk.java.net/~shade/papers/2013-shipilev-fieldlayout-latest.pdf + // The code is in Figure 9. + // The simplified idea of field layout consists of 4 parts (see more details in the report): + // + // 1. field alignment: HotSpot lays out the fields aligned by their size. + // 2. object alignment: HotSpot rounds instance size up to 8 bytes + // 3. consistent fields layouts throughout the hierarchy: This means we should layout + // superclass first. And we can use superclass's shellSize as a starting point to layout the + // other fields in this class. + // 4. class alignment: HotSpot rounds field blocks up to HeapOopSize not 4 bytes, confirmed + // with Aleksey. see https://bugs.openjdk.java.net/browse/CODETOOLS-7901322 + // + // The real world field layout is much more complicated. There are three kinds of fields + // order in Java 8. And we don't consider the @contended annotation introduced by Java 8. + // see the HotSpot classloader code, layout_fields method for more details. + // hg.openjdk.java.net/jdk8/jdk8/hotspot/file/tip/src/share/vm/classfile/classFileParser.cpp + long alignedSize = shellSize; + for (Integer size : fieldSizes) { + if (sizeCount[size] > 0) { + final long count = sizeCount[size]; + // If there are internal gaps, smaller field can fit in. + alignedSize = Math.max(alignedSize, alignSizeUp(shellSize, size) + size * count); + shellSize += size * count; + } + } + + // Should choose a larger size to be new shellSize and clearly alignedSize >= shellSize, and + // round up the instance filed blocks + shellSize = alignSizeUp(alignedSize, pointerSize); // Create and cache a new ClassInfo final ClassInfo newInfo = new ClassInfo(shellSize, pointerFields); @@ -344,8 +401,18 @@ private static ClassInfo getClassInfo(Class cls) { } private static long alignSize(final long size) { - final long rem = size % ALIGN_SIZE; - return (rem == 0) ? size : (size + ALIGN_SIZE - rem); + return alignSizeUp(size, ALIGN_SIZE); + } + + /** + * Compute aligned size. The alignSize must be 2^n, otherwise the result will be wrong. + * When alignSize = 2^n, alignSize - 1 = 2^n - 1. The binary representation of (alignSize - 1) + * will only have n trailing 1s(0b00...001..1). ~(alignSize - 1) will be 0b11..110..0. Hence, + * (size + alignSize - 1) & ~(alignSize - 1) will set the last n bits to zeros, which leads to + * multiple of alignSize. + */ + private static long alignSizeUp(final long size, final int alignSize) { + return (size + alignSize - 1) & ~(alignSize - 1); } private static Long knownSize(final Object obj) { From 0c2b32d31554eb09283d05c9b03a552505f33220 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 29 Sep 2022 22:00:32 +0100 Subject: [PATCH 4/9] Update SizeEstimator.java --- .../exec/store/openTSDB/SizeEstimator.java | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java index 39df0c7f663..61859b4c082 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -17,7 +17,7 @@ */ package org.apache.drill.exec.store.openTSDB; -import com.google.common.collect.MapMaker; +import org.apache.drill.shaded.guava.com.google.common.collect.MapMaker; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -69,8 +69,7 @@ boolean isFinished() { } Object dequeue() { - Object elem = stack.removeLast(); - return elem; + return stack.removeLast(); } } @@ -121,11 +120,8 @@ LinkedList getPointerFields() { private static final Map, ClassInfo> classInfos = new MapMaker().weakKeys().makeMap(); // Object and pointer sizes are arch dependent - private static boolean is64bit = false; + private static final boolean is64bit; - // Size of an object reference - // Based on https://wikis.oracle.com/display/HotSpotInternals/CompressedOops - private static boolean isCompressedOops = false; private static int pointerSize = 4; // Minimum size of a java.lang.Object @@ -141,7 +137,9 @@ LinkedList getPointerFields() { // from the JVM. final String arch = System.getProperty("os.arch"); is64bit = arch.contains("64") || arch.contains("s390x"); - isCompressedOops = getIsCompressedOops(); + // Size of an object reference + // Based on https://wikis.oracle.com/display/HotSpotInternals/CompressedOops + final boolean isCompressedOops = getIsCompressedOops(); objectSize = !is64bit ? 8 : (!isCompressedOops ? 16 : 12); pointerSize = (is64bit && !isCompressedOops) ? 8 : 4; @@ -219,7 +217,7 @@ private static void visitSingleObject(final Object obj, final SearchState state) } else { final Long calculatedSize = knownSize(obj); if (calculatedSize != null) { - state.size += calculatedSize.longValue(); + state.size += calculatedSize; } else { final ClassInfo classInfo = getClassInfo(cls); state.size += alignSize(classInfo.shellSize); @@ -273,7 +271,7 @@ private static long sampleArray(final Object array, final SearchState state, fin final HashSet drawn, final int length) { long size = 0L; for (int i = 0; i <= ARRAY_SAMPLE_SIZE; i++) { - int index = 0; + int index; do { index = rand.nextInt(length); } while (drawn.contains(index)); From 9c8fd98cea216efffef09e5315025fcb1c7a6fb2 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 29 Sep 2022 23:45:22 +0100 Subject: [PATCH 5/9] fix some issues --- .../exec/store/openTSDB/SizeEstimator.java | 32 ++++++++++- .../store/openTSDB/TestDataHolder.java | 2 +- .../store/openTSDB/TestOpenTSDBPlugin.java | 31 +++++----- .../store/openTSDB/TestSizeEstimator.java | 57 +++++++++++++++++++ 4 files changed, 103 insertions(+), 19 deletions(-) rename contrib/storage-opentsdb/src/test/java/org/apache/drill/{ => exec}/store/openTSDB/TestDataHolder.java (99%) rename contrib/storage-opentsdb/src/test/java/org/apache/drill/{ => exec}/store/openTSDB/TestOpenTSDBPlugin.java (84%) create mode 100644 contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java index 61859b4c082..13a2dd7b67a 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -50,7 +50,7 @@ class SizeEstimator { private static class SearchState { private final IdentityHashMap visited; - final LinkedList stack = new LinkedList<>(); + private final LinkedList stack = new LinkedList<>(); long size = 0L; SearchState(IdentityHashMap visited) { @@ -224,7 +224,7 @@ private static void visitSingleObject(final Object obj, final SearchState state) for (Field field : classInfo.pointerFields) { try { state.enqueue(field.get(obj)); - } catch (IllegalAccessException e) { + } catch (Exception e) { //skip this field } } @@ -287,6 +287,20 @@ private static long sampleArray(final Object array, final SearchState state, fin private static Object arrayApply(final Object obj, final int index) { if (obj instanceof Object[]) { return ((Object[])obj)[index]; + } else if (obj instanceof byte[]) { + return ((byte[]) obj)[index]; + } else if (obj instanceof int[]) { + return ((int[]) obj)[index]; + } else if (obj instanceof short[]) { + return ((short[])obj)[index]; + } else if (obj instanceof boolean[]) { + return ((boolean[]) obj)[index]; + } else if (obj instanceof float[]) { + return ((float[]) obj)[index]; + } else if (obj instanceof double[]) { + return ((double[]) obj)[index]; + } else if (obj instanceof char[]) { + return ((char[]) obj)[index]; } throw new IllegalArgumentException("illegal input for arrayApply " + obj); } @@ -294,6 +308,20 @@ private static Object arrayApply(final Object obj, final int index) { private static int arrayLength(final Object obj) { if (obj instanceof Object[]) { return ((Object[])obj).length; + } else if (obj instanceof byte[]) { + return ((byte[]) obj).length; + } else if (obj instanceof int[]) { + return ((int[]) obj).length; + } else if (obj instanceof short[]) { + return ((short[])obj).length; + } else if (obj instanceof boolean[]) { + return ((boolean[]) obj).length; + } else if (obj instanceof float[]) { + return ((float[]) obj).length; + } else if (obj instanceof double[]) { + return ((double[])obj).length; + } else if (obj instanceof char[]) { + return ((char[]) obj).length; } throw new IllegalArgumentException("illegal input for arrayLength " + obj); } diff --git a/contrib/storage-opentsdb/src/test/java/org/apache/drill/store/openTSDB/TestDataHolder.java b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestDataHolder.java similarity index 99% rename from contrib/storage-opentsdb/src/test/java/org/apache/drill/store/openTSDB/TestDataHolder.java rename to contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestDataHolder.java index c6e72284509..7e5fef04f96 100644 --- a/contrib/storage-opentsdb/src/test/java/org/apache/drill/store/openTSDB/TestDataHolder.java +++ b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestDataHolder.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.store.openTSDB; +package org.apache.drill.exec.store.openTSDB; public class TestDataHolder { diff --git a/contrib/storage-opentsdb/src/test/java/org/apache/drill/store/openTSDB/TestOpenTSDBPlugin.java b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestOpenTSDBPlugin.java similarity index 84% rename from contrib/storage-opentsdb/src/test/java/org/apache/drill/store/openTSDB/TestOpenTSDBPlugin.java rename to contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestOpenTSDBPlugin.java index 1c8207ae0c5..e021c15ee80 100644 --- a/contrib/storage-opentsdb/src/test/java/org/apache/drill/store/openTSDB/TestOpenTSDBPlugin.java +++ b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestOpenTSDBPlugin.java @@ -15,13 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.drill.store.openTSDB; +package org.apache.drill.exec.store.openTSDB; import com.github.tomakehurst.wiremock.junit.WireMockRule; import org.apache.drill.common.exceptions.UserRemoteException; import org.apache.drill.exec.proto.UserBitShared.QueryType; import org.apache.drill.exec.store.StoragePluginRegistry; -import org.apache.drill.exec.store.openTSDB.OpenTSDBStoragePluginConfig; import org.apache.drill.test.ClientFixture; import org.apache.drill.test.ClusterFixture; import org.apache.drill.test.ClusterTest; @@ -36,20 +35,20 @@ import static com.github.tomakehurst.wiremock.client.WireMock.get; import static com.github.tomakehurst.wiremock.client.WireMock.post; import static com.github.tomakehurst.wiremock.client.WireMock.urlEqualTo; -import static org.apache.drill.store.openTSDB.TestDataHolder.DOWNSAMPLE_REQUEST_WITH_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.DOWNSAMPLE_REQUEST_WTIHOUT_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.END_PARAM_REQUEST_WITH_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.END_PARAM_REQUEST_WTIHOUT_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.POST_REQUEST_WITHOUT_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.POST_REQUEST_WITH_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.REQUEST_TO_NONEXISTENT_METRIC; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_GET_TABLE_NAME_REQUEST; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_GET_TABLE_REQUEST; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_DOWNSAMPLE_REQUEST_WITHOUT_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_DOWNSAMPLE_REQUEST_WITH_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_END_REQUEST_WITHOUT_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_END_REQUEST_WITH_TAGS; -import static org.apache.drill.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_REQUEST_WITH_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.DOWNSAMPLE_REQUEST_WITH_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.DOWNSAMPLE_REQUEST_WTIHOUT_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.END_PARAM_REQUEST_WITH_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.END_PARAM_REQUEST_WTIHOUT_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.POST_REQUEST_WITHOUT_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.POST_REQUEST_WITH_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.REQUEST_TO_NONEXISTENT_METRIC; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_GET_TABLE_NAME_REQUEST; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_GET_TABLE_REQUEST; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_DOWNSAMPLE_REQUEST_WITHOUT_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_DOWNSAMPLE_REQUEST_WITH_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_END_REQUEST_WITHOUT_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_END_REQUEST_WITH_TAGS; +import static org.apache.drill.exec.store.openTSDB.TestDataHolder.SAMPLE_DATA_FOR_POST_REQUEST_WITH_TAGS; import static org.junit.Assert.assertEquals; public class TestOpenTSDBPlugin extends ClusterTest { diff --git a/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java new file mode 100644 index 00000000000..9d027b3c108 --- /dev/null +++ b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.openTSDB; + +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertTrue; + +public class TestSizeEstimator { + + private static class TestMetricDTO { + private final String metric; + private final Map tags; + private final List aggregateTags; + private final Map dps; + + TestMetricDTO(String metric, Map tags, List aggregateTags, Map dps) { + this.metric = metric; + this.tags = tags; + this.aggregateTags = aggregateTags; + this.dps = dps; + } + } + + @Test + public void testMetricDTO() { + Map tags = new HashMap<>(); + Map dps = new HashMap<>(); + tags.put("t1", "v1"); + dps.put("dp1", "dpv1"); + List aggregateTags = new ArrayList<>(); + TestMetricDTO dto = new TestMetricDTO("metric1", tags, aggregateTags, dps); + long size = SizeEstimator.estimate(dto); + assertTrue(size > 500); + assertTrue(size < 1000); + } +} From 75b347370e153c788963db9af12fa29d5867685f Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Fri, 30 Sep 2022 00:05:01 +0100 Subject: [PATCH 6/9] Update SizeEstimator.java --- .../org/apache/drill/exec/store/openTSDB/SizeEstimator.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java index 13a2dd7b67a..b0d05e0154c 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -293,6 +293,8 @@ private static Object arrayApply(final Object obj, final int index) { return ((int[]) obj)[index]; } else if (obj instanceof short[]) { return ((short[])obj)[index]; + } else if (obj instanceof long[]) { + return ((long[])obj)[index]; } else if (obj instanceof boolean[]) { return ((boolean[]) obj)[index]; } else if (obj instanceof float[]) { @@ -314,6 +316,8 @@ private static int arrayLength(final Object obj) { return ((int[]) obj).length; } else if (obj instanceof short[]) { return ((short[])obj).length; + } else if (obj instanceof long[]) { + return ((long[])obj).length; } else if (obj instanceof boolean[]) { return ((boolean[]) obj).length; } else if (obj instanceof float[]) { From aa99a0f8db2ebe8e11e193c0256335ceb04a7999 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Fri, 30 Sep 2022 15:29:54 +0100 Subject: [PATCH 7/9] extra tests --- .../exec/store/openTSDB/TestSizeEstimator.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java index 9d027b3c108..79bf86e3827 100644 --- a/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java +++ b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class TestSizeEstimator { @@ -54,4 +55,20 @@ public void testMetricDTO() { assertTrue(size > 500); assertTrue(size < 1000); } + + @Test + public void testArrays() { + assertEquals(32, SizeEstimator.estimate(new byte[10])); + assertEquals(40, SizeEstimator.estimate(new char[10])); + assertEquals(40, SizeEstimator.estimate(new short[10])); + assertEquals(56, SizeEstimator.estimate(new int[10])); + assertEquals(96, SizeEstimator.estimate(new long[10])); + assertEquals(56, SizeEstimator.estimate(new float[10])); + assertEquals(96, SizeEstimator.estimate(new double[10])); + assertEquals(4016, SizeEstimator.estimate(new int[1000])); + assertEquals(8016, SizeEstimator.estimate(new long[1000])); + assertEquals(56, SizeEstimator.estimate(new String[10])); + assertEquals(56, SizeEstimator.estimate(new Object[10])); + } + } From d3bf5b140557fe9ce49cb505a9280cfa36ef6541 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 5 Oct 2022 21:00:06 +0100 Subject: [PATCH 8/9] Update SizeEstimator.java --- .../org/apache/drill/exec/store/openTSDB/SizeEstimator.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java index b0d05e0154c..89fd96f8894 100644 --- a/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java +++ b/contrib/storage-opentsdb/src/main/java/org/apache/drill/exec/store/openTSDB/SizeEstimator.java @@ -35,6 +35,7 @@ import java.util.Random; // based on https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala +// which itself is based on https://www.infoworld.com/article/2077408/sizeof-for-java.html class SizeEstimator { private static final Logger logger = LoggerFactory.getLogger(SizeEstimator.class); @@ -270,7 +271,7 @@ private static void visitArray(final Object array, final Class arrayClass, fi private static long sampleArray(final Object array, final SearchState state, final Random rand, final HashSet drawn, final int length) { long size = 0L; - for (int i = 0; i <= ARRAY_SAMPLE_SIZE; i++) { + for (int i = 0; i < ARRAY_SAMPLE_SIZE; i++) { int index; do { index = rand.nextInt(length); From 5d3500b27ec3c91bd05a363f2706f60af166c655 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 5 Oct 2022 22:35:28 +0100 Subject: [PATCH 9/9] Update TestSizeEstimator.java --- .../store/openTSDB/TestSizeEstimator.java | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java index 79bf86e3827..95e5a5b4701 100644 --- a/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java +++ b/contrib/storage-opentsdb/src/test/java/org/apache/drill/exec/store/openTSDB/TestSizeEstimator.java @@ -43,19 +43,6 @@ private static class TestMetricDTO { } } - @Test - public void testMetricDTO() { - Map tags = new HashMap<>(); - Map dps = new HashMap<>(); - tags.put("t1", "v1"); - dps.put("dp1", "dpv1"); - List aggregateTags = new ArrayList<>(); - TestMetricDTO dto = new TestMetricDTO("metric1", tags, aggregateTags, dps); - long size = SizeEstimator.estimate(dto); - assertTrue(size > 500); - assertTrue(size < 1000); - } - @Test public void testArrays() { assertEquals(32, SizeEstimator.estimate(new byte[10])); @@ -71,4 +58,30 @@ public void testArrays() { assertEquals(56, SizeEstimator.estimate(new Object[10])); } + @Test + public void testList() { + ArrayList list = new ArrayList<>(); + for(int i = 0; i < 10; i++) { + list.add(Long.valueOf(i)); + } + assertEquals(320, SizeEstimator.estimate(list)); + for(int i = 0; i < 10; i++) { + list.add(Long.valueOf(i)); + } + assertEquals(368, SizeEstimator.estimate(list)); + } + + @Test + public void testMetricDTO() { + Map tags = new HashMap<>(); + Map dps = new HashMap<>(); + tags.put("t1", "v1"); + dps.put("dp1", "dpv1"); + List aggregateTags = new ArrayList<>(); + TestMetricDTO dto = new TestMetricDTO("metric1", tags, aggregateTags, dps); + long size = SizeEstimator.estimate(dto); + assertTrue("size less then expected: " + size, size > 550); + assertTrue("size greater then expected: " + size, size < 800); + } + }