Skip to content

Commit 345c5d6

Browse files
committed
Add rough memory size tracking with KllItemsSketch
When generating KllSketches, systems may need to have an idea about how much memory utilization there is for a particular sketch. For sketches with fixed-width types the answer can be computed efficiently. With the KllItemsSketch, this is more difficult because the sketch can support String-types with variable widths. This commit adds implementation support to expose the `getTotalItemsNumBytes` method so that external systems can roughly track the memory utilization of a particular sketch. The change accomplishes this by intercepting the code where a new item is added to the items array, or when a new array is generated entirely. This will add a slight overhead due to the sketch now needing to compute the length of inputs. For fixed-width types the overhead is low. For string this will require a call to encode the string as UTF-8/16 before adding it to the array. For fixed-width types, the calculations have little effective overhead as the computation is a single array-access lookup + multiplication with the type width.
1 parent 0bff44b commit 345c5d6

15 files changed

+167
-9
lines changed

src/main/java/org/apache/datasketches/common/ArrayOfBooleansSerDe.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,4 +122,10 @@ public String toString(final Boolean item) {
122122

123123
@Override
124124
public Class<Boolean> getClassOfT() { return Boolean.class; }
125+
126+
@Override
127+
public boolean isFixedWidth()
128+
{
129+
return true;
130+
}
125131
}

src/main/java/org/apache/datasketches/common/ArrayOfDoublesSerDe.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,10 @@ public String toString(final Double item) {
101101

102102
@Override
103103
public Class<Double> getClassOfT() { return Double.class; }
104+
105+
@Override
106+
public boolean isFixedWidth()
107+
{
108+
return true;
109+
}
104110
}

src/main/java/org/apache/datasketches/common/ArrayOfItemsSerDe.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,4 +114,9 @@ public int sizeOf(final T[] items) {
114114
* @return the concrete class of type T
115115
*/
116116
public abstract Class<T> getClassOfT();
117+
118+
/**
119+
* @return if this class serializes all types to a fixed width.
120+
*/
121+
public abstract boolean isFixedWidth();
117122
}

src/main/java/org/apache/datasketches/common/ArrayOfLongsSerDe.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,4 +100,10 @@ public String toString(final Long item) {
100100

101101
@Override
102102
public Class<Long> getClassOfT() { return Long.class; }
103+
104+
@Override
105+
public boolean isFixedWidth()
106+
{
107+
return true;
108+
}
103109
}

src/main/java/org/apache/datasketches/common/ArrayOfNumbersSerDe.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,4 +240,10 @@ public String toString(final Number item) {
240240

241241
@Override
242242
public Class<Number> getClassOfT() { return Number.class; }
243+
244+
@Override
245+
public boolean isFixedWidth()
246+
{
247+
return false;
248+
}
243249
}

src/main/java/org/apache/datasketches/common/ArrayOfStringsSerDe.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,10 @@ public String toString(final String item) {
130130

131131
@Override
132132
public Class<String> getClassOfT() { return String.class; }
133+
134+
@Override
135+
public boolean isFixedWidth()
136+
{
137+
return false;
138+
}
133139
}

src/main/java/org/apache/datasketches/common/ArrayOfUtf16StringsSerDe.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,10 @@ public String toString(final String item) {
124124

125125
@Override
126126
public Class<String> getClassOfT() { return String.class; }
127+
128+
@Override
129+
public boolean isFixedWidth()
130+
{
131+
return false;
132+
}
127133
}

src/main/java/org/apache/datasketches/kll/KllDirectCompactItemsSketch.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,12 @@ T[] getTotalItemsArray() {
233233
return capItems;
234234
}
235235

236+
@Override
237+
int getTotalItemsNumBytesInternal()
238+
{
239+
return getRetainedItemsSizeBytes();
240+
}
241+
236242
@Override
237243
WritableMemory getWritableMemory() {
238244
return (WritableMemory)mem;

src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ int getSingleItemSizeBytes() {
514514
abstract byte[] getTotalItemsByteArr();
515515

516516
@Override
517-
int getTotalItemsNumBytes() {
517+
public int getTotalItemsNumBytes() {
518518
return levelsArr[getNumLevels()] * Double.BYTES;
519519
}
520520

src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,7 @@ int getSingleItemSizeBytes() {
514514
abstract byte[] getTotalItemsByteArr();
515515

516516
@Override
517-
int getTotalItemsNumBytes() {
517+
public int getTotalItemsNumBytes() {
518518
return levelsArr[getNumLevels()] * Float.BYTES;
519519
}
520520

0 commit comments

Comments
 (0)