diff --git a/distribution/pom.xml b/distribution/pom.xml index ab8e082c9ab8..e41fdde64f53 100644 --- a/distribution/pom.xml +++ b/distribution/pom.xml @@ -450,6 +450,8 @@ org.apache.druid.extensions.contrib:opentelemetry-emitter -c org.apache.druid.extensions:druid-iceberg-extensions + -c + org.apache.druid.extensions.contrib:druid-spectator-histogram diff --git a/docs/assets/spectator-histogram-size-comparison.png b/docs/assets/spectator-histogram-size-comparison.png new file mode 100644 index 000000000000..306f45abd817 Binary files /dev/null and b/docs/assets/spectator-histogram-size-comparison.png differ diff --git a/docs/configuration/extensions.md b/docs/configuration/extensions.md index 5fbb20e74efe..3c150333c291 100644 --- a/docs/configuration/extensions.md +++ b/docs/configuration/extensions.md @@ -100,6 +100,7 @@ All of these community extensions can be downloaded using [pull-deps](../operati |gce-extensions|GCE Extensions|[link](../development/extensions-contrib/gce-extensions.md)| |prometheus-emitter|Exposes [Druid metrics](../operations/metrics.md) for Prometheus server collection (https://prometheus.io/)|[link](../development/extensions-contrib/prometheus.md)| |kubernetes-overlord-extensions|Support for launching tasks in k8s without Middle Managers|[link](../development/extensions-contrib/k8s-jobs.md)| +|druid-spectator-histogram|Support for efficient approximate percentile queries|[link](../development/extensions-contrib/spectator-histogram.md)| ## Promoting community extensions to core extensions diff --git a/docs/development/extensions-contrib/spectator-histogram.md b/docs/development/extensions-contrib/spectator-histogram.md new file mode 100644 index 000000000000..e6d12517e5cf --- /dev/null +++ b/docs/development/extensions-contrib/spectator-histogram.md @@ -0,0 +1,457 @@ +--- +id: spectator-histogram +title: "Spectator Histogram module" +--- + + + +## Summary +This module provides Apache Druid approximate histogram aggregators and percentile +post-aggregators based on Spectator fixed-bucket histograms. + +Consider SpectatorHistogram to compute percentile approximations. This extension has a reduced storage footprint compared to the [DataSketches extension](../extensions-core/datasketches-extension.md), which results in smaller segment sizes, faster loading from deep storage, and lower memory usage. This extension provides fast and accurate queries on large datasets at low storage cost. + +This aggregator only applies when your raw data contains positive long integer values. Do not use this aggregator if you have negative values in your data. + +In the Druid instance shown below, the example Wikipedia dataset is loaded 3 times. +* `wikipedia` contains the dataset ingested as is, without rollup +* `wikipedia_spectator` contains the dataset with a single extra metric column of type `spectatorHistogram` for the `added` column +* `wikipedia_datasketch` contains the dataset with a single extra metric column of type `quantilesDoublesSketch` for the `added` column + +Spectator histograms average just 6 extra bytes per row, while the `quantilesDoublesSketch` +adds 48 bytes per row. This represents an eightfold reduction in additional storage size for spectator histograms. + +![Comparison of datasource sizes in web console](../../assets/spectator-histogram-size-comparison.png) + +As rollup improves, so does the size savings. For example, when you ingest the Wikipedia dataset +with day-grain query granularity and remove all dimensions except `countryName`, +this results in a segment that has just 106 rows. The base segment has 87 bytes per row. +Compare the following bytes per row for SpectatorHistogram versus DataSketches: +* An additional `spectatorHistogram` column adds 27 bytes per row on average. +* An additional `quantilesDoublesSketch` column adds 255 bytes per row. + +SpectatorHistogram reduces the additional storage size by 9.4 times in this example. +Storage gains will differ per dataset depending on the variance and rollup of the data. + +## Background +[Spectator](https://netflix.github.io/atlas-docs/spectator/) is a simple library +for instrumenting code to record dimensional time series data. +It was built, primarily, to work with [Atlas](https://netflix.github.io/atlas-docs/). +Atlas was developed by Netflix to manage dimensional time series data for near +real-time operational insight. + +With the [Atlas-Druid](https://github.com/Netflix-Skunkworks/iep-apps/tree/main/atlas-druid) +service, it's possible to use the power of Atlas queries, backed by Druid as a +data store to benefit from high-dimensionality and high-cardinality data. + +SpectatorHistogram is designed for efficient parallel aggregations while still +allowing for filtering and grouping by dimensions. +It provides similar functionality to the built-in DataSketches `quantilesDoublesSketch` aggregator, but is +opinionated to maintain higher absolute accuracy at smaller values. +Larger values have lower absolute accuracy; however, relative accuracy is maintained across the range. +See [Bucket boundaries](#histogram-bucket-boundaries) for more information. +The SpectatorHistogram is optimized for typical measurements from cloud services and web apps, +such as page load time, transferred bytes, response time, and request latency. + +Through some trade-offs SpectatorHistogram provides a significantly more compact +representation with the same aggregation performance and accuracy as +DataSketches Quantiles Sketch. Note that results depend on the dataset. +Also see the [limitations](#limitations] of this extension. + +## Limitations +* Supports positive long integer values within the range of [0, 2^53). Negatives are +coerced to 0. +* Does not support decimals. +* Does not support Druid SQL queries, only native queries. +* Does not support vectorized queries. +* Generates 276 fixed buckets with increasing bucket widths. In practice, the observed error of computed percentiles ranges from 0.1% to 3%, exclusive. See [Bucket boundaries](#histogram-bucket-boundaries) for the full list of bucket boundaries. + +:::tip +If these limitations don't work for your use case, then use [DataSketches](../extensions-core/datasketches-extension.md) instead. +::: + +## Functionality +The SpectatorHistogram aggregator can generate histograms from raw numeric +values as well as aggregating or combining pre-aggregated histograms generated using +the SpectatorHistogram aggregator itself. +While you can generate histograms on the fly at query time, it is generally more +performant to generate histograms during ingestion and then combine them at +query time. This is especially true where rollup is enabled. It may be misleading or +incorrect to generate histograms from already rolled-up summed data. + +The module provides postAggregators, `percentileSpectatorHistogram` (singular) and +`percentilesSpectatorHistogram` (plural), to compute approximate +percentiles from histograms generated by the SpectatorHistogram aggregator. +Again, these postAggregators can be used to compute percentiles from raw numeric +values via the SpectatorHistogram aggregator or from pre-aggregated histograms. + +> If you're only using the aggregator to compute percentiles from raw numeric values, +then you can use the built-in quantilesDoublesSketch aggregator instead. The performance +and accuracy are comparable. However, the DataSketches aggregator supports negative values, +and you don't need to download an additional extension. + +An aggregated SpectatorHistogram can also be queried using a `longSum` or `doubleSum` +aggregator to retrieve the population of the histogram. This is effectively the count +of the number of values that were aggregated into the histogram. This flexibility can +avoid the need to maintain a separate metric for the count of values. + +For high-frequency measurements, you may need to pre-aggregate data at the client prior +to sending into Druid. For example, if you're measuring individual image render times +on an image-heavy website, you may want to aggregate the render times for a page-view +into a single histogram prior to sending to Druid in real-time. This can reduce the +amount of data that's needed to send from the client across the wire. + +SpectatorHistogram supports ingesting pre-aggregated histograms in real-time and batch. +They can be sent as a JSON map, keyed by the spectator bucket ID and the value is the +count of values. This is the same format as the serialized JSON representation of the +histogram. The keys need not be ordered or contiguous. For example: + +```json +{ "4": 8, "5": 15, "6": 37, "7": 9, "8": 3, "10": 1, "13": 1 } +``` + +## Loading the extension +To use SpectatorHistogram, make sure you [include](../../configuration/extensions.md#loading-extensions) the extension in your config file: + +``` +druid.extensions.loadList=["druid-spectator-histogram"] +``` + +## Aggregators + +The result of the aggregation is a histogram that is built by ingesting numeric values from +the raw data, or from combining pre-aggregated histograms. The result is represented in +JSON format where the keys are the bucket index and the values are the count of entries +in that bucket. + +The buckets are defined as per the Spectator [PercentileBuckets](https://github.com/Netflix/spectator/blob/main/spectator-api/src/main/java/com/netflix/spectator/api/histogram/PercentileBuckets.java) specification. +See [Histogram bucket boundaries](#histogram-bucket-boundaries) for the full list of bucket boundaries. +```js + // The set of buckets is generated by using powers of 4 and incrementing by one-third of the + // previous power of 4 in between as long as the value is less than the next power of 4 minus + // the delta. + // + // Base: 1, 2, 3 + // + // 4 (4^1), delta = 1 (~1/3 of 4) + // 5, 6, 7, ..., 14, + // + // 16 (4^2), delta = 5 (~1/3 of 16) + // 21, 26, 31, ..., 56, + // + // 64 (4^3), delta = 21 (~1/3 of 64) + // ... +``` + +There are multiple aggregator types included, all of which are based on the same +underlying implementation. If you use the Atlas-Druid service, the different types +signal the service on how to handle the resulting data from a query. + +* spectatorHistogramTimer signals that the histogram is representing +a collection of timer values. It is recommended to normalize timer values to nanoseconds +at, or prior to, ingestion. If queried via the Atlas-Druid service, it will +normalize timers to second resolution at query time as a more natural unit of time +for human consumption. +* spectatorHistogram and spectatorHistogramDistribution are generic histograms that +can be used to represent any measured value without units. No normalization is +required or performed. + +### `spectatorHistogram` aggregator +Alias: `spectatorHistogramDistribution`, `spectatorHistogramTimer` + +To aggregate at query time: +``` +{ + "type" : "spectatorHistogram", + "name" : , + "fieldName" : + } +``` + +| Property | Description | Required? | +|-----------|--------------------------------------------------------------------------------------------------------------|-----------| +| type | This String must be one of "spectatorHistogram", "spectatorHistogramTimer", "spectatorHistogramDistribution" | yes | +| name | A String for the output (result) name of the aggregation. | yes | +| fieldName | A String for the name of the input field containing raw numeric values or pre-aggregated histograms. | yes | + +### `longSum`, `doubleSum` and `floatSum` aggregators +To get the population size (count of events contributing to the histogram): +``` +{ + "type" : "longSum", + "name" : , + "fieldName" : + } +``` + +| Property | Description | Required? | +|-----------|--------------------------------------------------------------------------------|-----------| +| type | Must be "longSum", "doubleSum", or "floatSum". | yes | +| name | A String for the output (result) name of the aggregation. | yes | +| fieldName | A String for the name of the input field containing pre-aggregated histograms. | yes | + +## Post Aggregators + +### Percentile (singular) +This returns a single percentile calculation based on the distribution of the values in the aggregated histogram. + +``` +{ + "type": "percentileSpectatorHistogram", + "name": , + "field": { + "type": "fieldAccess", + "fieldName": + }, + "percentile": +} +``` + +| Property | Description | Required? | +|------------|-------------------------------------------------------------|-----------| +| type | This String should always be "percentileSpectatorHistogram" | yes | +| name | A String for the output (result) name of the calculation. | yes | +| field | A field reference pointing to the aggregated histogram. | yes | +| percentile | A single decimal percentile between 0.0 and 100.0 | yes | + +### Percentiles (multiple) +This returns an array of percentiles corresponding to those requested. + +``` +{ + "type": "percentilesSpectatorHistogram", + "name": , + "field": { + "type": "fieldAccess", + "fieldName": + }, + "percentiles": [25, 50, 75, 99.5] +} +``` + +> It's more efficient to request multiple percentiles in a single query +than to request individual percentiles in separate queries. This array-based +helper is provided for convenience and has a marginal performance benefit over +using the singular percentile post-aggregator multiple times within a query. +The more expensive part of the query is the aggregation of the histogram. +The post-aggregation calculations all happen on the same aggregated histogram. + +The results contain arrays matching the length and order of the requested +array of percentiles. + +``` +"percentilesAdded": [ + 0.5504911679884643, // 25th percentile + 4.013975155279504, // 50th percentile + 78.89518317503394, // 75th percentile + 8580.024999999994 // 99.5th percentile +] +``` + +| Property | Description | Required? | +|-------------|--------------------------------------------------------------|-----------| +| type | This String should always be "percentilesSpectatorHistogram" | yes | +| name | A String for the output (result) name of the calculation. | yes | +| field | A field reference pointing to the aggregated histogram. | yes | +| percentiles | Non-empty array of decimal percentiles between 0.0 and 100.0 | yes | + +## Examples + +### Example Ingestion Spec +Example of ingesting the sample Wikipedia dataset with a histogram metric column: +```json +{ + "type": "index_parallel", + "spec": { + "ioConfig": { + "type": "index_parallel", + "inputSource": { + "type": "http", + "uris": ["https://druid.apache.org/data/wikipedia.json.gz"] + }, + "inputFormat": { "type": "json" } + }, + "dataSchema": { + "granularitySpec": { + "segmentGranularity": "day", + "queryGranularity": "minute", + "rollup": true + }, + "dataSource": "wikipedia", + "timestampSpec": { "column": "timestamp", "format": "iso" }, + "dimensionsSpec": { + "dimensions": [ + "isRobot", + "channel", + "flags", + "isUnpatrolled", + "page", + "diffUrl", + "comment", + "isNew", + "isMinor", + "isAnonymous", + "user", + "namespace", + "cityName", + "countryName", + "regionIsoCode", + "metroCode", + "countryIsoCode", + "regionName" + ] + }, + "metricsSpec": [ + { "name": "count", "type": "count" }, + { "name": "sum_added", "type": "longSum", "fieldName": "added" }, + { + "name": "hist_added", + "type": "spectatorHistogram", + "fieldName": "added" + } + ] + }, + "tuningConfig": { + "type": "index_parallel", + "partitionsSpec": { "type": "hashed" }, + "forceGuaranteedRollup": true + } + } +} +``` + +### Example Query +Example query using the sample Wikipedia dataset: +```json +{ + "queryType": "timeseries", + "dataSource": { + "type": "table", + "name": "wikipedia" + }, + "intervals": { + "type": "intervals", + "intervals": [ + "0000-01-01/9999-12-31" + ] + }, + "granularity": { + "type": "all" + }, + "aggregations": [ + { + "type": "spectatorHistogram", + "name": "histogram_added", + "fieldName": "added" + } + ], + "postAggregations": [ + { + "type": "percentileSpectatorHistogram", + "name": "medianAdded", + "field": { + "type": "fieldAccess", + "fieldName": "histogram_added" + }, + "percentile": "50.0" + } + ] +} +``` +Results in +```json +[ + { + "result": { + "histogram_added": { + "0": 11096, "1": 632, "2": 297, "3": 187, "4": 322, "5": 161, + "6": 174, "7": 127, "8": 125, "9": 162, "10": 123, "11": 106, + "12": 95, "13": 104, "14": 95, "15": 588, "16": 540, "17": 690, + "18": 719, "19": 478, "20": 288, "21": 250, "22": 219, "23": 224, + "24": 737, "25": 424, "26": 343, "27": 266, "28": 232, "29": 217, + "30": 171, "31": 164, "32": 161, "33": 530, "34": 339, "35": 236, + "36": 181, "37": 152, "38": 113, "39": 128, "40": 80, "41": 75, + "42": 289, "43": 145, "44": 138, "45": 83, "46": 45, "47": 46, + "48": 64, "49": 65, "50": 71, "51": 421, "52": 525, "53": 59, + "54": 31, "55": 35, "56": 8, "57": 10, "58": 5, "59": 4, "60": 11, + "61": 10, "62": 5, "63": 2, "64": 2, "65": 1, "67": 1, "68": 1, + "69": 1, "70": 1, "71": 1, "78": 2 + }, + "medianAdded": 4.013975155279504 + }, + "timestamp": "2016-06-27T00:00:00.000Z" + } +] +``` + +## Histogram bucket boundaries +The following array lists the upper bounds of each bucket index. There are 276 buckets in total. +The first bucket index is 0 and the last bucket index is 275. +The bucket widths increase as the bucket index increases. This leads to a greater absolute error for larger values, but maintains a relative error of rough percentage across the number range. +For example, the maximum error at value 10 is zero since the bucket width is 1 (the difference of `11-10`). For a value of 16,000,000,000, the bucket width is 1,431,655,768 (from `17179869184-15748213416`). This gives an error of up to ~8.9%, from `1,431,655,768/16,000,000,000*100`. In practice, the observed error of computed percentiles is in the range of (0.1%, 3%). +```json +[ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 21, 26, 31, 36, 41, 46, + 51, 56, 64, 85, 106, 127, 148, 169, 190, 211, 232, 256, 341, 426, 511, 596, + 681, 766, 851, 936, 1024, 1365, 1706, 2047, 2388, 2729, 3070, 3411, 3752, + 4096, 5461, 6826, 8191, 9556, 10921, 12286, 13651, 15016, 16384, 21845, + 27306, 32767, 38228, 43689, 49150, 54611, 60072, 65536, 87381, 109226, + 131071, 152916, 174761, 196606, 218451, 240296, 262144, 349525, 436906, + 524287, 611668, 699049, 786430, 873811, 961192, 1048576, 1398101, 1747626, + 2097151, 2446676, 2796201, 3145726, 3495251, 3844776, 4194304, 5592405, + 6990506, 8388607, 9786708, 11184809, 12582910, 13981011, 15379112, 16777216, + 22369621, 27962026, 33554431, 39146836, 44739241, 50331646, 55924051, + 61516456, 67108864, 89478485, 111848106, 134217727, 156587348, 178956969, + 201326590, 223696211, 246065832, 268435456, 357913941, 447392426, 536870911, + 626349396, 715827881, 805306366, 894784851, 984263336, 1073741824, 1431655765, + 1789569706, 2147483647, 2505397588, 2863311529, 3221225470, 3579139411, + 3937053352, 4294967296, 5726623061, 7158278826, 8589934591, 10021590356, + 11453246121, 12884901886, 14316557651, 15748213416, 17179869184, 22906492245, + 28633115306, 34359738367, 40086361428, 45812984489, 51539607550, 57266230611, + 62992853672, 68719476736, 91625968981, 114532461226, 137438953471, + 160345445716, 183251937961, 206158430206, 229064922451, 251971414696, + 274877906944, 366503875925, 458129844906, 549755813887, 641381782868, + 733007751849, 824633720830, 916259689811, 1007885658792, 1099511627776, + 1466015503701, 1832519379626, 2199023255551, 2565527131476, 2932031007401, + 3298534883326, 3665038759251, 4031542635176, 4398046511104, 5864062014805, + 7330077518506, 8796093022207, 10262108525908, 11728124029609, 13194139533310, + 14660155037011, 16126170540712, 17592186044416, 23456248059221, + 29320310074026, 35184372088831, 41048434103636, 46912496118441, + 52776558133246, 58640620148051, 64504682162856, 70368744177664, + 93824992236885, 117281240296106, 140737488355327, 164193736414548, + 187649984473769, 211106232532990, 234562480592211, 258018728651432, + 281474976710656, 375299968947541, 469124961184426, 562949953421311, + 656774945658196, 750599937895081, 844424930131966, 938249922368851, + 1032074914605736, 1125899906842624, 1501199875790165, 1876499844737706, + 2251799813685247, 2627099782632788, 3002399751580329, 3377699720527870, + 3752999689475411, 4128299658422952, 4503599627370496, 6004799503160661, + 7505999378950826, 9007199254740991, 10508399130531156, 12009599006321321, + 13510798882111486, 15011998757901651, 16513198633691816, 18014398509481984, + 24019198012642645, 30023997515803306, 36028797018963967, 42033596522124628, + 48038396025285289, 54043195528445950, 60047995031606611, 66052794534767272, + 72057594037927936, 96076792050570581, 120095990063213226, 144115188075855871, + 168134386088498516, 192153584101141161, 216172782113783806, 240191980126426451, + 264211178139069096, 288230376151711744, 384307168202282325, 480383960252852906, + 576460752303423487, 672537544353994068, 768614336404564649, 864691128455135230, + 960767920505705811, 1056844712556276392, 1152921504606846976, 1537228672809129301, + 1921535841011411626, 2305843009213693951, 2690150177415976276, 3074457345618258601, + 3458764513820540926, 3843071682022823251, 4227378850225105576, 9223372036854775807 +] +``` diff --git a/extensions-contrib/spectator-histogram/pom.xml b/extensions-contrib/spectator-histogram/pom.xml new file mode 100644 index 000000000000..cf15f4bf0006 --- /dev/null +++ b/extensions-contrib/spectator-histogram/pom.xml @@ -0,0 +1,141 @@ + + + + + + org.apache.druid + druid + 29.0.0-SNAPSHOT + ../../pom.xml + + 4.0.0 + + org.apache.druid.extensions.contrib + druid-spectator-histogram + druid-spectator-histogram + Druid extension for generating histograms based on Netflix's Spectator library + + + + com.netflix.spectator + spectator-api + 1.7.0 + + + com.google.guava + guava + ${guava.version} + provided + + + org.apache.druid + druid-processing + ${project.parent.version} + provided + + + com.google.code.findbugs + jsr305 + provided + + + com.google.inject + guice + provided + + + com.fasterxml.jackson.core + jackson-databind + provided + + + it.unimi.dsi + fastutil + provided + + + com.fasterxml.jackson.core + jackson-core + provided + + + com.fasterxml.jackson.core + jackson-annotations + provided + + + it.unimi.dsi + fastutil-core + provided + + + com.google.errorprone + error_prone_annotations + provided + + + org.apache.druid + druid-sql + ${project.parent.version} + provided + + + org.apache.calcite + calcite-core + provided + + + org.apache.druid + druid-server + provided + ${project.parent.version} + + + + + junit + junit + test + + + org.apache.druid + druid-processing + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-server + ${project.parent.version} + test-jar + test + + + org.apache.druid + druid-sql + ${project.parent.version} + test-jar + test + + + diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java new file mode 100644 index 000000000000..61f839c0d246 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/NullableOffsetsHeader.java @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.base.Preconditions; +import org.apache.druid.io.Channels; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.segment.serde.Serializer; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.apache.druid.segment.writeout.WriteOutBytes; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.nio.LongBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.BitSet; +import java.util.Objects; + +/** + * A header for storing offsets for columns with nullable values. + * Provides fast access to the offset start/end for a given row index, while supporting null values. + * For cases where data is sparse, this can save a lot of space. + * The nulls are stored in a bitset, and the offsets are stored in an int array. + * The cost of the nulls is 1 bit per row, the cost of the non-nulls is 4 bytes per row for the offset. + * In cases where every row is non-null, the bitset is omitted. + * In either case, we need the offsets because the values are variable length. + */ +public class NullableOffsetsHeader implements Serializer +{ + private final WriteOutBytes offsetsWriter; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final BitSet valueBitmap; + private int size = 0; + private final IntBuffer offsetsReader; + private final ByteBuffer bitsetBuffer; + private final int[] cumlCardinality; + private final int cardinality; + + private static final int CUML_COUNT_SIZE = Long.SIZE; + private static final int CUML_COUNT_BYTES = Long.BYTES; + + public static NullableOffsetsHeader read(ByteBuffer buffer) + { + // Size + BitmapLength + ValueBitMap + Offsets + final int size = buffer.getInt(); + final int bitmapLength = buffer.getInt(); + final int offsetPosition = buffer.position() + bitmapLength; + + // Grab the bitset + final ByteBuffer bitsetBuffer = buffer.slice(); + bitsetBuffer.limit(bitmapLength); + + int[] cumlCardinality = null; + int cardinality = 0; + if (bitmapLength >= CUML_COUNT_BYTES) { + // Create a quick lookup of the cumulative count of set bits up to + // a given int index in the bitset. This is used to quickly get to + // near the offset that we want. + // Tradeoff is memory use vs scanning per get() call. + LongBuffer bitBuffer = bitsetBuffer.asLongBuffer(); + cumlCardinality = new int[bitBuffer.limit()]; + int i = 0; + + while (bitBuffer.hasRemaining()) { + long bits = bitBuffer.get(); + cardinality += Long.bitCount(bits); + cumlCardinality[i++] = cardinality; + } + + // Count any bits in the remaining bytes after the end of the 64-bit chunks + // In cases where bitsetBuffer length doesn't directly divide into 64 + // there will be up to 7 bytes remaining, with at least 1 bit set somewhere + // else the bytes would have been omitted. + // We use cardinality to compute where offsets end, so the full count is important. + int baseByteIndex = i * (CUML_COUNT_SIZE / Byte.SIZE); + for (int byteIndex = baseByteIndex; byteIndex < bitsetBuffer.limit(); byteIndex++) { + // Read the bit set for this byte within the 64 bits that need counting. + int bitset = bitsetBuffer.get(byteIndex) & 0xFF; + cardinality += BYTE_CARDINALITY[bitset]; + } + } else if (bitmapLength > 0) { + while (bitsetBuffer.hasRemaining()) { + int bitset = bitsetBuffer.get() & 0xFF; + cardinality += BYTE_CARDINALITY[bitset]; + } + } else if (buffer.hasRemaining()) { + // The header is "full", so the bitmap was omitted. + // We'll have an offset per entry. + cardinality = size; + } + + // Grab the offsets + buffer.position(offsetPosition); + final int offsetsLength = cardinality * Integer.BYTES; + final ByteBuffer offsetsBuffer = buffer.slice(); + offsetsBuffer.limit(offsetsLength); + + // Set the buffer position to after the offsets + // to mark this whole header as "read" + buffer.position(offsetPosition + offsetsLength); + + return new NullableOffsetsHeader(size, bitsetBuffer, cardinality, cumlCardinality, offsetsBuffer); + } + + public static NullableOffsetsHeader create(SegmentWriteOutMedium segmentWriteOutMedium) throws IOException + { + Preconditions.checkNotNull(segmentWriteOutMedium, "segmentWriteOutMedium"); + return new NullableOffsetsHeader(segmentWriteOutMedium); + } + + // Constructor for reading + private NullableOffsetsHeader(int size, ByteBuffer bitset, int cardinality, int[] cumlCardinality, ByteBuffer offsetsBuffer) + { + this.segmentWriteOutMedium = null; + this.offsetsWriter = null; + this.valueBitmap = null; + + this.size = size; + this.offsetsReader = offsetsBuffer.asIntBuffer(); + this.bitsetBuffer = bitset; + this.cumlCardinality = cumlCardinality; + this.cardinality = cardinality; + } + + // Constructor for writing + private NullableOffsetsHeader(SegmentWriteOutMedium segmentWriteOutMedium) throws IOException + { + this.offsetsReader = null; + this.cumlCardinality = null; + this.cardinality = 0; + this.bitsetBuffer = null; + + this.valueBitmap = new BitSet(); + this.segmentWriteOutMedium = segmentWriteOutMedium; + this.offsetsWriter = this.segmentWriteOutMedium.makeWriteOutBytes(); + } + + public int size() + { + return size; + } + + public int getCardinality() + { + return cardinality; + } + + private void checkWriteable() + { + if (valueBitmap == null) { + throw new NullPointerException("Write during deserialization"); + } + } + + private void checkReadable() + { + if (offsetsReader == null) { + throw new NullPointerException("Read during serialization"); + } + } + + public void writeNull() + { + checkWriteable(); + + // Nothing to write, but we need to "store" the null + size++; + } + + public void writeOffset(int offset) throws IOException + { + checkWriteable(); + + int index = size++; + valueBitmap.set(index); + offsetsWriter.writeInt(offset); + } + + @Override + public long getSerializedSize() + { + checkWriteable(); + + // Size + BitmapLength + ValueBitMap + Offsets + int sizeField = Integer.BYTES; + int bitmapLength = Integer.BYTES; + // if all values are set, we omit the bitmap, so bytes taken by the bitmap is zero + // bitset.length returns the highest bit index that's set. + // i.e. the length in bits. Round up to the nearest byte. + int valueBitMap = (size == valueBitmap.cardinality()) ? 0 : (valueBitmap.length() + 7) / 8; + int offsetSize = valueBitmap.cardinality() * Integer.BYTES; + return sizeField + bitmapLength + valueBitMap + offsetSize; + } + + @Override + public void writeTo(WritableByteChannel channel, @Nullable FileSmoosher smoosher) throws IOException + { + checkWriteable(); + + // Size + BitmapLength + ValueBitMap + Offsets + ByteBuffer headerBytes = ByteBuffer.allocate(Integer.BYTES + Integer.BYTES); + + // Size + headerBytes.putInt(size); + + // BitmapLength + byte[] bytes = null; + + // Omit bitmap if all entries are set + if (size == valueBitmap.cardinality()) { + headerBytes.putInt(0); + } else { + bytes = valueBitmap.toByteArray(); + headerBytes.putInt(bytes.length); + } + + // Write the size and length + headerBytes.flip(); + Channels.writeFully(channel, headerBytes); + + // Write the ValueBitmap + if (bytes != null) { + Channels.writeFully(channel, ByteBuffer.wrap(bytes)); + } + + // Write the Offsets + offsetsWriter.writeTo(channel); + } + + @Nullable + public Offset get(int index) + { + checkReadable(); + + // Return null for any out of range indexes + if (this.cardinality == 0 || index < 0 || index >= this.size) { + return null; + } + + // Find the index to the offset for this row index + int offsetIndex = getOffsetIndex(index); + if (offsetIndex < 0) { + return null; + } + + // Special case for the first entry + if (offsetIndex == 0) { + return new Offset(0, this.offsetsReader.get(0)); + } + + return new Offset(this.offsetsReader.get(offsetIndex - 1), this.offsetsReader.get(offsetIndex)); + } + + // Exposed for testing + int getOffsetIndex(int index) + { + if (this.cardinality == this.size) { + // If "full" return index + return index; + } + + // Bitset omits trailing nulls, so if index is off the end it's a null. + final int bytePos = index / Byte.SIZE; + if (bytePos >= this.bitsetBuffer.limit()) { + return -1; + } + + final int indexByte = this.bitsetBuffer.get(bytePos) & 0xFF; + // Check for null, is our bit is set. + if ((indexByte & (1 << index % Byte.SIZE)) == 0) { + return -1; + } + + // Get the cardinality for the (index/CUML_COUNT_SIZE)th entry. + // Use that to jump to that point in the bitset to add any incremental bit counts + // until we get to index. + // That is then the index position of the offset in the offsets buffer. + final int baseInt = index / CUML_COUNT_SIZE; + int baseByteIndex = baseInt * (CUML_COUNT_SIZE / Byte.SIZE); + int offsetIndex = baseInt == 0 ? 0 : this.cumlCardinality[baseInt - 1]; + + // We always need to count the bits in the byte containing our index. + // So do that here, then go back and fill in the counts for the + // bytes between baseByteIndex and bytePos. + // We need to mask out only the bits up to and including our index + // to avoid counting later bits. + int mask = (1 << index - (bytePos * Byte.SIZE)) - 1; + int byteCardinality = BYTE_CARDINALITY[indexByte & mask]; + offsetIndex += byteCardinality; + + // After getting the cumulative cardinality upto the 64 bit boundary immediately + // preceeding the 64 bits that contains our index, we need to accumulate the + // cardinality up to the byte including our index. + for (int byteIndex = baseByteIndex; byteIndex < bytePos; byteIndex++) { + // Read the bit set for this byte within the 64 bits that need counting. + int bitset = this.bitsetBuffer.get(byteIndex) & 0xFF; + offsetIndex += BYTE_CARDINALITY[bitset]; + } + + return offsetIndex; + } + + public static class Offset + { + private final int start; + private final int end; + + Offset(int start, int end) + { + this.start = start; + this.end = end; + } + + int getStart() + { + return start; + } + + int getEnd() + { + return end; + } + + int getLength() + { + return end - start; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Offset offset = (Offset) o; + return start == offset.start && end == offset.end; + } + + @Override + public int hashCode() + { + return Objects.hash(start, end); + } + } + + // The count of bits in a byte, keyed by the byte value itself + private static final int[] BYTE_CARDINALITY = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java new file mode 100644 index 000000000000..8fa2c5044f8f --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogram.java @@ -0,0 +1,423 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import it.unimi.dsi.fastutil.shorts.Short2LongMap; +import it.unimi.dsi.fastutil.shorts.Short2LongMaps; +import it.unimi.dsi.fastutil.shorts.Short2LongOpenHashMap; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.jackson.JacksonUtils; +import org.apache.druid.java.util.common.parsers.ParseException; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +// Since queries don't come from SpectatorHistogramAggregator in the case of +// using longSum or doubleSum aggregations. They come from LongSumBufferAggregator. +// Therefore, we extended Number here. +// This will prevent class casting exceptions if trying to query with sum rather +// than explicitly as a SpectatorHistogram +// +// The SpectatorHistogram is a Number. That number is of intValue(), +// which is the count of the number of events in the histogram +// (adding up the counts across all buckets). +// +// There are a few useful aggregators, which as Druid Native Queries use: +// type: "longSum" - Aggregates and returns the number of events in the histogram. +// i.e. the sum of all bucket counts. +// type: "spectatorHistogramDistribution" - Aggregates and returns a map (bucketIndex -> bucketCount) +// representing a SpectatorHistogram. The represented data is a distribution. +// type: "spectatorHistogramTimer" - Aggregates and returns a map (bucketIndex -> bucketCount) +// representing a SpectatorHistogram. The represented data is measuring time. +public class SpectatorHistogram extends Number +{ + private static final int MAX_ENTRY_BYTES = Short.BYTES + Long.BYTES; + private static final int LOW_COUNT_FLAG = 0x0200; + private static final int BYTE_VALUE = 0x8000; + private static final int SHORT_VALUE = 0x4000; + private static final int INT_VALUE = 0xC000; + private static final int VALUE_SIZE_MASK = 0xFC00; + private static final int KEY_MASK = 0x01FF; + + private static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + + // Values are packed into few bytes depending on the size of the counts + // The bucket index falls in the range 0-276, so we need 9 bits for the bucket index. + // Counts can range from 1 to Long.MAX_VALUE, so we need 1 to 64 bits for the value. + // To optimize storage, we use the remaining top 7 bits of the bucket index short to + // encode the storage type for the count value. + // AAbb bbYx xxxx xxxx + // | +-- 9 bits - The bucket index + // +------------- 1 bit - Low-count flag, set if count <= 63 + // ++++ ++-------------- 6 bits - If low-count flag is set, + // The count value, zero extra bytes used. + // If low-count flag is not set, + // The value length indicator as encoded below + // ++------------------- 2 bits - 00 = 8 bytes used for value + // 10 = 1 byte used for value + // 01 = 2 bytes used for value + // 11 = 4 bytes used for value + // + // Example: + // ------------------------------------------------------------------------------------------ + // Consider the histogram: [10, 30, 40x3, 50x2, 100x256] + // That is there is one value of 10, and 3 values of 40, etc. As shown in the table below: + // + // Bucket Index | Bucket Range | Bucket Count + // 10 | [10,11) | 1 + // 17 | [26,31) | 1 + // 19 | [36,41) | 3 + // 21 | [46,51) | 2 + // 25 | [85,106) | 256 + // + // See com.netflix.spectator.api.histogram.PercentileBuckets + // for an explaination of how the bucket index is assigned + // to each of the values: (10, 17, 19, 21, 25). + // + // Based on the specification above the histogram is serialized into a + // byte array to minimize storage size: + // In Base 10: [64, 25, 1, 0, 6, 10, 6, 17, 14, 19, 10, 21] + // In Binary: [01000000, 00011001, 00000001, 00000000, 00000110, 00001010, + // 00000110, 00010001, 00001110, 00010011, 00001010, 00010101] + // + // Each groups of bits (which varies in length), represent a histogram bucket index and count + // 01000000000110010000000100000000 + // 01 - Since the low count bit is NOT set, leading 2 bits 01 indicates that the bucket count + // value is encoded in 2 bytes. + // 0000 - Since the low count bit is Not set these bits are unused, the bucket count will + // be encoded in an additional two bytes. + // 0 - Low count bit is NOT set + // 000011001 - These 9 bits represent the bucket index of 25 + // 0000000100000000 - These 16 bits represent the bucket count of 256 + // + // 0000011000001010 + // 000001 - Low count bit IS set, so these 6-bits represent a bucket count of 1 + // 1 - Low count bit IS set + // 000001010 - These 9 bits represent the bucket index of 10 + // + // 0000011000010001 + // 000001 - Bucket count of 1 + // 1 - Low count bit IS set + // 000010001 - Bucket index of 17 + // + // 0000111000010011 + // 000011 - Bucket count of 3 + // 1 - Low count bit IS set + // 000010011 - Bucket index of 19 + // + // 0000101000010101 + // 000010 - Bucket count of 2 + // 1 - Low count bit IS set + // 000010101 - Bucket index of 21 + // ------------------------------------------------------------------------------------------ + private Short2LongOpenHashMap backingMap; + + // The sum of counts in the histogram. + // These are accumulated when an entry is added, or when another histogram is merged into this one. + private long sumOfCounts = 0; + + static int getMaxIntermdiateHistogramSize() + { + return PercentileBuckets.length() * MAX_ENTRY_BYTES; + } + + @Nullable + static SpectatorHistogram deserialize(Object serializedHistogram) + { + if (serializedHistogram == null) { + return null; + } + if (serializedHistogram instanceof byte[]) { + return fromByteBuffer(ByteBuffer.wrap((byte[]) serializedHistogram)); + } + if (serializedHistogram instanceof SpectatorHistogram) { + return (SpectatorHistogram) serializedHistogram; + } + if (serializedHistogram instanceof String) { + // Try parse as JSON into HashMap + try { + HashMap map = JSON_MAPPER.readerFor(HashMap.class).readValue((String) serializedHistogram); + SpectatorHistogram histogram = new SpectatorHistogram(); + for (Map.Entry entry : map.entrySet()) { + histogram.add(entry.getKey(), entry.getValue()); + } + return histogram; + } + catch (JsonProcessingException e) { + throw new ParseException((String) serializedHistogram, e, "String cannot be deserialized as JSON to a Spectator Histogram"); + } + } + if (serializedHistogram instanceof HashMap) { + SpectatorHistogram histogram = new SpectatorHistogram(); + for (Map.Entry entry : ((HashMap) serializedHistogram).entrySet()) { + histogram.add(entry.getKey(), (Number) entry.getValue()); + } + return histogram; + } + throw new ParseException( + null, + "Object cannot be deserialized to a Spectator Histogram " + + serializedHistogram.getClass() + ); + } + + @Nullable + static SpectatorHistogram fromByteBuffer(ByteBuffer buffer) + { + if (buffer == null || !buffer.hasRemaining()) { + return null; + } + SpectatorHistogram histogram = new SpectatorHistogram(); + while (buffer.hasRemaining()) { + short key = buffer.getShort(); + short idx = (short) (key & KEY_MASK); + long val; + if ((key & LOW_COUNT_FLAG) == LOW_COUNT_FLAG) { + // Value/count is encoded in the top 6 bits of the short + val = (key & VALUE_SIZE_MASK) >>> 10; + } else { + switch (key & VALUE_SIZE_MASK) { + case BYTE_VALUE: + val = buffer.get() & 0xFF; + break; + + case SHORT_VALUE: + val = buffer.getShort() & 0xFFFF; + break; + + case INT_VALUE: + val = buffer.getInt() & 0xFFFFFFFFL; + break; + + default: + val = buffer.getLong(); + break; + } + } + + histogram.add(idx, val); + } + if (histogram.isEmpty()) { + return null; + } + return histogram; + } + + private Short2LongOpenHashMap writableMap() + { + if (backingMap == null) { + backingMap = new Short2LongOpenHashMap(); + } + return backingMap; + } + + private Short2LongMap readableMap() + { + if (isEmpty()) { + return Short2LongMaps.EMPTY_MAP; + } + return backingMap; + } + + @Nullable + byte[] toBytes() + { + if (isEmpty()) { + return null; + } + ByteBuffer buffer = ByteBuffer.allocate(MAX_ENTRY_BYTES * size()); + for (Short2LongMap.Entry e : Short2LongMaps.fastIterable(readableMap())) { + short key = e.getShortKey(); + long value = e.getLongValue(); + if (value <= 0x3F) { + // Value/count is encoded in the top 6 bits of the key bytes + buffer.putShort((short) ((key | LOW_COUNT_FLAG) | ((int) ((value << 10) & VALUE_SIZE_MASK)))); + } else if (value <= 0xFF) { + buffer.putShort((short) (key | BYTE_VALUE)); + buffer.put((byte) value); + } else if (value <= 0xFFFF) { + buffer.putShort((short) (key | SHORT_VALUE)); + buffer.putShort((short) value); + } else if (value <= 0xFFFFFFFFL) { + buffer.putShort((short) (key | INT_VALUE)); + buffer.putInt((int) value); + } else { + buffer.putShort(key); + buffer.putLong(value); + } + } + return Arrays.copyOf(buffer.array(), buffer.position()); + } + + void insert(Number num) + { + this.add(PercentileBuckets.indexOf(num.longValue()), 1L); + } + + void merge(SpectatorHistogram source) + { + if (source == null) { + return; + } + Short2LongOpenHashMap writableMap = writableMap(); + for (Short2LongMap.Entry entry : Short2LongMaps.fastIterable(source.readableMap())) { + writableMap.addTo(entry.getShortKey(), entry.getLongValue()); + this.sumOfCounts += entry.getLongValue(); + } + } + + // Exposed for testing + void add(int bucket, long count) + { + if (bucket >= PercentileBuckets.length() || bucket < 0) { + throw new IAE("Bucket index out of range (0, " + PercentileBuckets.length() + ")"); + } + writableMap().addTo((short) bucket, count); + this.sumOfCounts += count; + } + + private void add(Object key, Number value) + { + if (key instanceof String) { + this.add(Integer.parseInt((String) key), value.longValue()); + return; + } + if (Number.class.isAssignableFrom(key.getClass())) { + this.add(((Number) key).intValue(), value.longValue()); + return; + } + throw new IAE( + "Cannot add " + key.getClass() + "/" + value.getClass() + " to a Spectator Histogram" + ); + } + + // Used for testing + long get(int idx) + { + return readableMap().get((short) idx); + } + + // Accessible for serialization + void serialize(JsonGenerator jsonGenerator, SerializerProvider serializerProvider) throws IOException + { + JacksonUtils.writeObjectUsingSerializerProvider(jsonGenerator, serializerProvider, readableMap()); + } + + public boolean isEmpty() + { + return backingMap == null || backingMap.isEmpty(); + } + + public int size() + { + return readableMap().size(); + } + + public long getSum() + { + return sumOfCounts; + } + + @Override + public String toString() + { + return readableMap().toString(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SpectatorHistogram that = (SpectatorHistogram) o; + return Objects.equals(readableMap(), that.readableMap()); + } + + @Override + public int hashCode() + { + return readableMap().hashCode(); + } + + @Override + public int intValue() + { + return (int) getSum(); + } + + @Override + public long longValue() + { + return getSum(); + } + + @Override + public float floatValue() + { + return getSum(); + } + + @Override + public double doubleValue() + { + return getSum(); + } + + /** + * Compute approximate percentile for the histogram + * @param percentile The percentile to compute + * @return the approximate percentile + */ + public double getPercentileValue(double percentile) + { + double[] pcts = new double[]{percentile}; + return getPercentileValues(pcts)[0]; + } + + /** + * Compute approximate percentiles for the histogram + * @param percentiles The percentiles to compute + * @return an array of approximate percentiles in the order of those provided + */ + public double[] getPercentileValues(double[] percentiles) + { + long[] counts = new long[PercentileBuckets.length()]; + for (Map.Entry e : readableMap().short2LongEntrySet()) { + counts[e.getKey()] = e.getValue(); + } + double[] results = new double[percentiles.length]; + PercentileBuckets.percentiles(counts, percentiles, results); + return results; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java new file mode 100644 index 000000000000..200a3204b7ac --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregator.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.errorprone.annotations.concurrent.GuardedBy; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nullable; + + +/** + * Aggregator to build Spectator style histograms. + */ +public class SpectatorHistogramAggregator implements Aggregator +{ + + private final ColumnValueSelector selector; + + @GuardedBy("this") + private final SpectatorHistogram counts; + + + public SpectatorHistogramAggregator(ColumnValueSelector selector) + { + this.selector = selector; + counts = new SpectatorHistogram(); + } + + @Override + public void aggregate() + { + Object obj = selector.getObject(); + if (obj == null) { + return; + } + if (obj instanceof SpectatorHistogram) { + SpectatorHistogram other = (SpectatorHistogram) obj; + synchronized (this) { + counts.merge(other); + } + } else if (obj instanceof Number) { + synchronized (this) { + counts.insert((Number) obj); + } + } else { + throw new IAE( + "Expected a long or a SpectatorHistogramMap, but received [%s] of type [%s]", + obj, + obj.getClass() + ); + } + } + + @Nullable + @Override + public synchronized Object get() + { + return counts.isEmpty() ? null : counts; + } + + @Override + public synchronized float getFloat() + { + return counts.getSum(); + } + + @Override + public synchronized long getLong() + { + return counts.getSum(); + } + + @Override + public synchronized boolean isNull() + { + return counts.isEmpty(); + } + + @Override + public synchronized void close() + { + + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java new file mode 100644 index 000000000000..235d4781da42 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorFactory.java @@ -0,0 +1,372 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import org.apache.druid.query.aggregation.AggregateCombiner; +import org.apache.druid.query.aggregation.Aggregator; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorFactoryNotMergeableException; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.query.aggregation.ObjectAggregateCombiner; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ValueType; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; + +@JsonTypeName(SpectatorHistogramAggregatorFactory.TYPE_NAME) +public class SpectatorHistogramAggregatorFactory extends AggregatorFactory +{ + @Nonnull + private final String name; + + @Nonnull + private final String fieldName; + + @Nonnull + private final byte cacheTypeId; + + public static final String TYPE_NAME = "spectatorHistogram"; + + @JsonCreator + public SpectatorHistogramAggregatorFactory( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName + ) + { + this(name, fieldName, AggregatorUtil.SPECTATOR_HISTOGRAM_CACHE_TYPE_ID); + } + + public SpectatorHistogramAggregatorFactory( + final String name, + final String fieldName, + final byte cacheTypeId + ) + { + this.name = Objects.requireNonNull(name, "Must have a valid, non-null aggregator name"); + this.fieldName = Objects.requireNonNull(fieldName, "Parameter fieldName must be specified"); + this.cacheTypeId = cacheTypeId; + } + + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder( + cacheTypeId + ).appendString(fieldName).build(); + } + + + @Override + public Aggregator factorize(ColumnSelectorFactory metricFactory) + { + return new SpectatorHistogramAggregator(metricFactory.makeColumnValueSelector(fieldName)); + } + + @Override + public BufferAggregator factorizeBuffered(ColumnSelectorFactory metricFactory) + { + return new SpectatorHistogramBufferAggregator(metricFactory.makeColumnValueSelector(fieldName)); + } + + // This is used when writing metrics to segment files to check whether the column is sorted. + // Since there is no sensible way really to compare histograms, compareTo always returns 1. + public static final Comparator COMPARATOR = (o, o1) -> { + if (o == null && o1 == null) { + return 0; + } else if (o != null && o1 == null) { + return -1; + } else if (o == null) { + return 1; + } + return Integer.compare(o.hashCode(), o1.hashCode()); + }; + + @Override + public Comparator getComparator() + { + return COMPARATOR; + } + + @Override + public Object combine(@Nullable Object lhs, @Nullable Object rhs) + { + if (lhs == null) { + return rhs; + } + if (rhs == null) { + return lhs; + } + SpectatorHistogram lhsHisto = (SpectatorHistogram) lhs; + SpectatorHistogram rhsHisto = (SpectatorHistogram) rhs; + lhsHisto.merge(rhsHisto); + return lhsHisto; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new SpectatorHistogramAggregatorFactory(name, name); + } + + @Override + public AggregatorFactory getMergingFactory(AggregatorFactory other) throws AggregatorFactoryNotMergeableException + { + if (other.getName().equals(this.getName()) && this.getClass() == other.getClass()) { + return getCombiningFactory(); + } else { + throw new AggregatorFactoryNotMergeableException(this, other); + } + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new SpectatorHistogramAggregatorFactory( + fieldName, + fieldName + ) + ); + } + + @Override + public Object deserialize(Object serializedHistogram) + { + return SpectatorHistogram.deserialize(serializedHistogram); + } + + @Nullable + @Override + public Object finalizeComputation(@Nullable Object object) + { + return object; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @JsonProperty + public String getFieldName() + { + return fieldName; + } + + @Override + public List requiredFields() + { + return Collections.singletonList(fieldName); + } + + @Override + public String getComplexTypeName() + { + return TYPE_NAME; + } + + @Override + public ValueType getType() + { + return ValueType.COMPLEX; + } + + @Override + public ValueType getFinalizedType() + { + return ValueType.COMPLEX; + } + + @Override + public int getMaxIntermediateSize() + { + return SpectatorHistogram.getMaxIntermdiateHistogramSize(); + } + + @Override + public AggregateCombiner makeAggregateCombiner() + { + return new ObjectAggregateCombiner() + { + private SpectatorHistogram combined = null; + + @Override + public void reset(final ColumnValueSelector selector) + { + combined = null; + fold(selector); + } + + @Override + public void fold(final ColumnValueSelector selector) + { + SpectatorHistogram other = (SpectatorHistogram) selector.getObject(); + if (other == null) { + return; + } + if (combined == null) { + combined = new SpectatorHistogram(); + } + combined.merge(other); + } + + @Nullable + @Override + public SpectatorHistogram getObject() + { + return combined; + } + + @Override + public Class classOfObject() + { + return SpectatorHistogram.class; + } + }; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) { + return true; + } + if (o == null || !getClass().equals(o.getClass())) { + return false; + } + final SpectatorHistogramAggregatorFactory that = (SpectatorHistogramAggregatorFactory) o; + + return Objects.equals(name, that.name) && + Objects.equals(fieldName, that.fieldName); + } + + @Override + public int hashCode() + { + return Objects.hash(name, fieldName); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name=" + name + + ", fieldName=" + fieldName + + "}"; + } + + @JsonTypeName(SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME) + public static class Timer extends SpectatorHistogramAggregatorFactory + { + public static final String TYPE_NAME = "spectatorHistogramTimer"; + + public Timer( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName + ) + { + super(name, fieldName, AggregatorUtil.SPECTATOR_HISTOGRAM_TIMER_CACHE_TYPE_ID); + } + + public Timer(final String name, final String fieldName, final byte cacheTypeId) + { + super(name, fieldName, cacheTypeId); + } + + @Override + public String getComplexTypeName() + { + return TYPE_NAME; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new SpectatorHistogramAggregatorFactory.Timer(getName(), getName()); + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new SpectatorHistogramAggregatorFactory.Timer( + getFieldName(), + getFieldName() + ) + ); + } + } + + @JsonTypeName(SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME) + public static class Distribution extends SpectatorHistogramAggregatorFactory + { + public static final String TYPE_NAME = "spectatorHistogramDistribution"; + + public Distribution( + @JsonProperty("name") final String name, + @JsonProperty("fieldName") final String fieldName + ) + { + super(name, fieldName, AggregatorUtil.SPECTATOR_HISTOGRAM_DISTRIBUTION_CACHE_TYPE_ID); + } + + public Distribution(final String name, final String fieldName, final byte cacheTypeId) + { + super(name, fieldName, cacheTypeId); + } + + @Override + public String getComplexTypeName() + { + return TYPE_NAME; + } + + @Override + public AggregatorFactory getCombiningFactory() + { + return new SpectatorHistogramAggregatorFactory.Distribution(getName(), getName()); + } + + @Override + public List getRequiredColumns() + { + return Collections.singletonList( + new SpectatorHistogramAggregatorFactory.Distribution( + getFieldName(), + getFieldName() + ) + ); + } + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java new file mode 100644 index 000000000000..f2a808d44d75 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramBufferAggregator.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.base.Preconditions; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.aggregation.BufferAggregator; +import org.apache.druid.segment.ColumnValueSelector; + +import javax.annotation.Nonnull; +import java.nio.ByteBuffer; +import java.util.IdentityHashMap; + +/** + * Aggregator that builds Spectator Histograms over numeric values read from {@link ByteBuffer} + */ +public class SpectatorHistogramBufferAggregator implements BufferAggregator +{ + + @Nonnull + private final ColumnValueSelector selector; + private final IdentityHashMap> histogramCache = new IdentityHashMap<>(); + + public SpectatorHistogramBufferAggregator( + final ColumnValueSelector valueSelector + ) + { + Preconditions.checkNotNull(valueSelector); + this.selector = valueSelector; + } + + @Override + public void init(ByteBuffer buffer, int position) + { + SpectatorHistogram emptyCounts = new SpectatorHistogram(); + addToCache(buffer, position, emptyCounts); + } + + @Override + public void aggregate(ByteBuffer buffer, int position) + { + Object obj = selector.getObject(); + if (obj == null) { + return; + } + SpectatorHistogram counts = histogramCache.get(buffer).get(position); + if (obj instanceof SpectatorHistogram) { + SpectatorHistogram other = (SpectatorHistogram) obj; + counts.merge(other); + } else if (obj instanceof Number) { + counts.insert((Number) obj); + } else { + throw new IAE( + "Expected a number or a long[], but received [%s] of type [%s]", + obj, + obj.getClass() + ); + } + } + + @Override + public Object get(final ByteBuffer buffer, final int position) + { + // histogramCache is an IdentityHashMap where the reference of buffer is used for equality checks. + // So the returned object isn't impacted by the changes in the buffer object made by concurrent threads. + + SpectatorHistogram spectatorHistogram = histogramCache.get(buffer).get(position); + if (spectatorHistogram.isEmpty()) { + return null; + } + return spectatorHistogram; + } + + @Override + public float getFloat(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public long getLong(final ByteBuffer buffer, final int position) + { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public void close() + { + histogramCache.clear(); + } + + @Override + public void relocate(int oldPosition, int newPosition, ByteBuffer oldBuffer, ByteBuffer newBuffer) + { + SpectatorHistogram histogram = histogramCache.get(oldBuffer).get(oldPosition); + addToCache(newBuffer, newPosition, histogram); + final Int2ObjectMap map = histogramCache.get(oldBuffer); + map.remove(oldPosition); + if (map.isEmpty()) { + histogramCache.remove(oldBuffer); + } + } + + private void addToCache(final ByteBuffer buffer, final int position, final SpectatorHistogram histogram) + { + Int2ObjectMap map = histogramCache.computeIfAbsent( + buffer, + b -> new Int2ObjectOpenHashMap<>() + ); + map.put(position, histogram); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java new file mode 100644 index 000000000000..4b4b218a037d --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramColumnPartSupplier.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.base.Supplier; +import org.apache.druid.segment.column.ComplexColumn; + +public class SpectatorHistogramColumnPartSupplier implements Supplier +{ + private final SpectatorHistogramIndexed complexType; + private final String typeName; + + public SpectatorHistogramColumnPartSupplier(final String typeName, final SpectatorHistogramIndexed complexType) + { + this.complexType = complexType; + this.typeName = typeName; + } + + @Override + public ComplexColumn get() + { + return new SpectatorHistogramIndexBasedComplexColumn(typeName, complexType); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java new file mode 100644 index 000000000000..ffad30dd81c0 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramComplexMetricSerde.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.data.input.InputRow; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.ComplexMetricExtractor; +import org.apache.druid.segment.serde.ComplexMetricSerde; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; + +import java.nio.ByteBuffer; + +public class SpectatorHistogramComplexMetricSerde extends ComplexMetricSerde +{ + private static final SpectatorHistogramObjectStrategy STRATEGY = new SpectatorHistogramObjectStrategy(); + private final String typeName; + + SpectatorHistogramComplexMetricSerde(String type) + { + this.typeName = type; + } + + @Override + public String getTypeName() + { + return typeName; + } + + @Override + public ComplexMetricExtractor getExtractor() + { + return new ComplexMetricExtractor() + { + @Override + public Class extractedClass() + { + return SpectatorHistogram.class; + } + + @Override + public Object extractValue(final InputRow inputRow, final String metricName) + { + final Object object = inputRow.getRaw(metricName); + if (object == null || object instanceof SpectatorHistogram || object instanceof Number) { + return object; + } + if (object instanceof String) { + String objectString = (String) object; + // Ignore empty values + if (objectString.trim().isEmpty()) { + return null; + } + // Treat as long number, if it looks like a number + if (Character.isDigit((objectString).charAt(0))) { + return Long.parseLong((String) object); + } + } + // Delegate all other interpretation to SpectatorHistogram + return SpectatorHistogram.deserialize(object); + } + }; + } + + @Override + public void deserializeColumn(ByteBuffer buffer, ColumnBuilder builder) + { + final SpectatorHistogramIndexed column = SpectatorHistogramIndexed.read( + buffer, + STRATEGY + ); + builder.setComplexColumnSupplier(new SpectatorHistogramColumnPartSupplier(this.typeName, column)); + } + + @Override + public ObjectStrategy getObjectStrategy() + { + return STRATEGY; + } + + @Override + public GenericColumnSerializer getSerializer(SegmentWriteOutMedium segmentWriteOutMedium, String column) + { + return SpectatorHistogramSerializer.create( + segmentWriteOutMedium, + column, + this.getObjectStrategy() + ); + } + +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java new file mode 100644 index 000000000000..2e54fcf0d45e --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexBasedComplexColumn.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.column.ComplexColumn; +import org.apache.druid.segment.data.ReadableOffset; + +import javax.annotation.Nullable; + +public class SpectatorHistogramIndexBasedComplexColumn implements ComplexColumn +{ + private final SpectatorHistogramIndexed index; + private final String typeName; + private static final Number ZERO = 0; + + public SpectatorHistogramIndexBasedComplexColumn(String typeName, SpectatorHistogramIndexed index) + { + this.index = index; + this.typeName = typeName; + } + + @Override + public Class getClazz() + { + return index.getClazz(); + } + + @Override + public String getTypeName() + { + return typeName; + } + + @Override + public Object getRowValue(int rowNum) + { + return index.get(rowNum); + } + + @Override + public int getLength() + { + return index.size(); + } + + @Override + public void close() + { + } + + @Override + public ColumnValueSelector makeColumnValueSelector(ReadableOffset offset) + { + // Use ColumnValueSelector directly so that we support being queried as a Number using + // longSum or doubleSum aggregators, the NullableNumericBufferAggregator will call isNull. + // This allows us to behave as a Number or SpectatorHistogram object. + // When queried as a Number, we're returning the count of entries in the histogram. + // As such, we can safely return 0 where the histogram is null. + return new ColumnValueSelector() + { + @Override + public boolean isNull() + { + return getObject() == null; + } + + private Number getOrZero() + { + SpectatorHistogram histogram = getObject(); + return histogram != null ? histogram : ZERO; + } + + @Override + public long getLong() + { + return getOrZero().longValue(); + } + + @Override + public float getFloat() + { + return getOrZero().floatValue(); + } + + @Override + public double getDouble() + { + return getOrZero().doubleValue(); + } + + @Nullable + @Override + public SpectatorHistogram getObject() + { + return (SpectatorHistogram) getRowValue(offset.getOffset()); + } + + @Override + public Class classOfObject() + { + return getClazz(); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("column", SpectatorHistogramIndexBasedComplexColumn.this); + } + }; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java new file mode 100644 index 000000000000..54b76bb05f32 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramIndexed.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.data.CloseableIndexed; +import org.apache.druid.segment.data.IndexedIterable; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.Serializer; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; +import java.nio.channels.WritableByteChannel; +import java.util.Iterator; + +/** + * A generic, flat storage mechanism. Use static SpectatorHistogramSerializer.create to construct. + * Supports efficient storage for sparse columns that contain lots of nulls. + *

+ * Storage Format: + *

+ * byte 1: version (0x1) + * byte 2: reserved flags + * bytes 3-6 =>; numBytesUsed for header and values + * bytes 7-some =>; header including count, bitmap of present values and offsets to values. + * bytes (header.serializedSize + 6)-(numBytesUsed + 6): bytes representing the values. If offset is null, then the value is null. + */ +public class SpectatorHistogramIndexed implements CloseableIndexed, Serializer +{ + static final byte VERSION_ONE = 0x1; + static final byte RESERVED_FLAGS = 0x0; + + public static SpectatorHistogramIndexed read(ByteBuffer buffer, ObjectStrategy strategy) + { + byte versionFromBuffer = buffer.get(); + + if (VERSION_ONE == versionFromBuffer) { + // Reserved flags, not currently used + buffer.get(); + int sizeOfOffsetsAndValues = buffer.getInt(); + ByteBuffer bufferToUse = buffer.slice(); + bufferToUse.limit(sizeOfOffsetsAndValues); + + buffer.position(buffer.position() + sizeOfOffsetsAndValues); + + return new SpectatorHistogramIndexed( + bufferToUse, + strategy + ); + } + throw new IAE("Unknown version[%d]", (int) versionFromBuffer); + } + + private final ObjectStrategy strategy; + private final int size; + private final NullableOffsetsHeader offsetsHeader; + private final ByteBuffer valueBuffer; + + private SpectatorHistogramIndexed( + ByteBuffer buffer, + ObjectStrategy strategy + ) + { + this.strategy = strategy; + offsetsHeader = NullableOffsetsHeader.read(buffer); + // Size is count of entries + size = offsetsHeader.size(); + // The rest of the buffer is the values + valueBuffer = buffer.slice(); + } + + /** + * Checks if {@code index} a valid `element index` in SpectatorHistogramIndexed. + * Similar to Preconditions.checkElementIndex() except this method throws {@link IAE} with custom error message. + *

+ * Used here to get existing behavior(same error message and exception) of V1 GenericIndexed. + * + * @param index index identifying an element of an SpectatorHistogramIndexed. + */ + private void checkIndex(int index) + { + if (index < 0) { + throw new IAE("Index[%s] < 0", index); + } + if (index >= size) { + throw new IAE("Index[%d] >= size[%d]", index, size); + } + } + + public Class getClazz() + { + return strategy.getClazz(); + } + + @Override + public int size() + { + return size; + } + + @Nullable + @Override + public SpectatorHistogram get(int index) + { + checkIndex(index); + + NullableOffsetsHeader.Offset offset = offsetsHeader.get(index); + if (offset == null) { + return null; + } + + ByteBuffer copyValueBuffer = valueBuffer.asReadOnlyBuffer(); + copyValueBuffer.position(offset.getStart()); + copyValueBuffer.limit(offset.getStart() + offset.getLength()); + + return strategy.fromByteBuffer(copyValueBuffer, offset.getLength()); + } + + @Override + public int indexOf(@Nullable SpectatorHistogram value) + { + throw new UnsupportedOperationException("Reverse lookup not allowed."); + } + + @Override + public Iterator iterator() + { + return IndexedIterable.create(this).iterator(); + } + + @Override + public long getSerializedSize() + { + throw new UnsupportedOperationException("Serialization not supported here"); + } + + @Override + public void writeTo(WritableByteChannel channel, FileSmoosher smoosher) + { + throw new UnsupportedOperationException("Serialization not supported here"); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("headerBuffer", offsetsHeader); + inspector.visit("firstValueBuffer", valueBuffer); + inspector.visit("strategy", strategy); + } + + @Override + public String toString() + { + return "SpectatorHistogramIndexed[" + "size: " + + size() + + " cardinality: " + + offsetsHeader.getCardinality() + + ']'; + } + + @Override + public void close() + { + // nothing to close + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java new file mode 100644 index 000000000000..fb0a32b45024 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramJsonSerializer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; + +import java.io.IOException; + +public class SpectatorHistogramJsonSerializer extends JsonSerializer +{ + @Override + public void serialize( + SpectatorHistogram spectatorHistogram, + JsonGenerator jsonGenerator, + SerializerProvider serializerProvider + ) throws IOException + { + spectatorHistogram.serialize(jsonGenerator, serializerProvider); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java new file mode 100644 index 000000000000..b12c600d6b42 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramModule.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.databind.Module; +import com.fasterxml.jackson.databind.jsontype.NamedType; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableList; +import com.google.inject.Binder; +import org.apache.druid.initialization.DruidModule; +import org.apache.druid.segment.serde.ComplexMetrics; + +import java.util.List; + +/** + * Module defining various aggregators for Spectator Histograms + */ +public class SpectatorHistogramModule implements DruidModule +{ + @VisibleForTesting + public static void registerSerde() + { + ComplexMetrics.registerSerde( + SpectatorHistogramAggregatorFactory.TYPE_NAME, + new SpectatorHistogramComplexMetricSerde(SpectatorHistogramAggregatorFactory.TYPE_NAME) + ); + ComplexMetrics.registerSerde( + SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME, + new SpectatorHistogramComplexMetricSerde(SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME) + ); + ComplexMetrics.registerSerde( + SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME, + new SpectatorHistogramComplexMetricSerde(SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME) + ); + } + + @Override + public List getJacksonModules() + { + return ImmutableList.of( + new SimpleModule( + getClass().getSimpleName() + ).registerSubtypes( + new NamedType( + SpectatorHistogramAggregatorFactory.class, + SpectatorHistogramAggregatorFactory.TYPE_NAME + ), + new NamedType( + SpectatorHistogramAggregatorFactory.Timer.class, + SpectatorHistogramAggregatorFactory.Timer.TYPE_NAME + ), + new NamedType( + SpectatorHistogramAggregatorFactory.Distribution.class, + SpectatorHistogramAggregatorFactory.Distribution.TYPE_NAME + ), + new NamedType( + SpectatorHistogramPercentilePostAggregator.class, + SpectatorHistogramPercentilePostAggregator.TYPE_NAME + ), + new NamedType( + SpectatorHistogramPercentilesPostAggregator.class, + SpectatorHistogramPercentilesPostAggregator.TYPE_NAME + ) + ).addSerializer(SpectatorHistogram.class, new SpectatorHistogramJsonSerializer()) + ); + } + + @Override + public void configure(Binder binder) + { + registerSerde(); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java new file mode 100644 index 000000000000..33b59bd6ad6d --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramObjectStrategy.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import org.apache.druid.segment.data.ObjectStrategy; + +import javax.annotation.Nullable; +import java.nio.ByteBuffer; + +public class SpectatorHistogramObjectStrategy implements ObjectStrategy +{ + private static final byte[] EMPTY_BYTES = null; + + @Override + public Class getClazz() + { + return SpectatorHistogram.class; + } + + @Override + public SpectatorHistogram fromByteBuffer(ByteBuffer readOnlyBuffer, int numBytes) + { + if (numBytes == 0) { + return null; + } + return SpectatorHistogram.fromByteBuffer(readOnlyBuffer); + } + + @Override + public byte[] toBytes(@Nullable SpectatorHistogram val) + { + if (val == null) { + return EMPTY_BYTES; + } + return val.toBytes(); + } + + @Override + public int compare(SpectatorHistogram o1, SpectatorHistogram o2) + { + return SpectatorHistogramAggregatorFactory.COMPARATOR.compare(o1, o2); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java new file mode 100644 index 000000000000..80854c57d460 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilePostAggregator.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Comparator; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +public class SpectatorHistogramPercentilePostAggregator implements PostAggregator +{ + + private final String name; + private final PostAggregator field; + + private final double percentile; + + public static final String TYPE_NAME = "percentileSpectatorHistogram"; + + @JsonCreator + public SpectatorHistogramPercentilePostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("percentile") final double percentile + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + Preconditions.checkArgument( + percentile >= 0 && percentile <= 100, + "Percentile argument not in range (0, 100)" + ); + this.percentile = percentile; + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double getPercentile() + { + return percentile; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final SpectatorHistogram sketch = (SpectatorHistogram) field.compute(combinedAggregators); + return sketch.getPercentileValue(percentile); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", fraction=" + percentile + + "}"; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.SPECTATOR_HISTOGRAM_SKETCH_PERCENTILE_CACHE_TYPE_ID).appendCacheable(field); + builder.appendDouble(percentile); + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + SpectatorHistogramPercentilePostAggregator that = (SpectatorHistogramPercentilePostAggregator) o; + return Double.compare(that.percentile, percentile) == 0 && + Objects.equals(name, that.name) && + Objects.equals(field, that.field); + } + + @Override + public int hashCode() + { + return Objects.hash(name, field, percentile); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java new file mode 100644 index 000000000000..11ce9e0d9bd4 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramPercentilesPostAggregator.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.primitives.Doubles; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.PostAggregator; +import org.apache.druid.query.aggregation.post.PostAggregatorIds; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.column.ColumnType; + +import java.util.Arrays; +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +public class SpectatorHistogramPercentilesPostAggregator implements PostAggregator +{ + private final String name; + private final PostAggregator field; + + private final double[] percentiles; + + public static final String TYPE_NAME = "percentilesSpectatorHistogram"; + + @JsonCreator + public SpectatorHistogramPercentilesPostAggregator( + @JsonProperty("name") final String name, + @JsonProperty("field") final PostAggregator field, + @JsonProperty("percentiles") final double[] percentiles + ) + { + this.name = Preconditions.checkNotNull(name, "name is null"); + this.field = Preconditions.checkNotNull(field, "field is null"); + this.percentiles = Preconditions.checkNotNull(percentiles, "array of fractions is null"); + Preconditions.checkArgument(this.percentiles.length >= 1, "Array of percentiles cannot " + + "be empty"); + } + + @Override + @JsonProperty + public String getName() + { + return name; + } + + @Override + public ColumnType getType(ColumnInspector signature) + { + return ColumnType.DOUBLE_ARRAY; + } + + @JsonProperty + public PostAggregator getField() + { + return field; + } + + @JsonProperty + public double[] getPercentiles() + { + return percentiles; + } + + @Override + public Object compute(final Map combinedAggregators) + { + final SpectatorHistogram sketch = (SpectatorHistogram) field.compute(combinedAggregators); + return sketch.getPercentileValues(percentiles); + } + + @Override + public Comparator getComparator() + { + return Doubles::compare; + } + + @Override + public Set getDependentFields() + { + return field.getDependentFields(); + } + + @Override + public String toString() + { + return getClass().getSimpleName() + "{" + + "name='" + name + '\'' + + ", field=" + field + + ", percentiles=" + Arrays.toString(percentiles) + + "}"; + } + + @Override + public byte[] getCacheKey() + { + final CacheKeyBuilder builder = new CacheKeyBuilder( + PostAggregatorIds.SPECTATOR_HISTOGRAM_SKETCH_PERCENTILES_CACHE_TYPE_ID).appendCacheable(field); + for (final double value : percentiles) { + builder.appendDouble(value); + } + return builder.build(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final SpectatorHistogramPercentilesPostAggregator that = (SpectatorHistogramPercentilesPostAggregator) o; + if (!name.equals(that.name)) { + return false; + } + if (!Arrays.equals(percentiles, that.percentiles)) { + return false; + } + return field.equals(that.field); + } + + @Override + public int hashCode() + { + return (name.hashCode() * 31 + field.hashCode()) * 31 + Arrays.hashCode(percentiles); + } + + @Override + public PostAggregator decorate(final Map map) + { + return this; + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java new file mode 100644 index 000000000000..2e4608fee788 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/java/org/apache/druid/spectator/histogram/SpectatorHistogramSerializer.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.primitives.Ints; +import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.GenericColumnSerializer; +import org.apache.druid.segment.data.ColumnCapacityExceededException; +import org.apache.druid.segment.data.ObjectStrategy; +import org.apache.druid.segment.serde.MetaSerdeHelper; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.apache.druid.segment.writeout.WriteOutBytes; + +import java.io.IOException; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramSerializer implements GenericColumnSerializer +{ + private static final MetaSerdeHelper META_SERDE_HELPER = MetaSerdeHelper + .firstWriteByte((SpectatorHistogramSerializer x) -> SpectatorHistogramIndexed.VERSION_ONE) + .writeByte(x -> SpectatorHistogramIndexed.RESERVED_FLAGS) + // numBytesUsed field is header + values (i.e. all bytes _after_ this) + .writeInt(x -> Ints.checkedCast(x.offsetsHeader.getSerializedSize() + x.valuesBuffer.size())); + + public static SpectatorHistogramSerializer create( + SegmentWriteOutMedium segmentWriteOutMedium, + String columnName, + ObjectStrategy strategy + ) + { + return new SpectatorHistogramSerializer( + columnName, + segmentWriteOutMedium, + strategy + ); + } + + private final String columnName; + private final SegmentWriteOutMedium segmentWriteOutMedium; + private final ObjectStrategy objectStrategy; + private NullableOffsetsHeader offsetsHeader; + private WriteOutBytes valuesBuffer; + + private int rowCount = 0; + + private SpectatorHistogramSerializer( + String columnName, + SegmentWriteOutMedium segmentWriteOutMedium, + ObjectStrategy strategy + ) + { + this.columnName = columnName; + this.segmentWriteOutMedium = segmentWriteOutMedium; + this.objectStrategy = strategy; + } + + @Override + public void open() throws IOException + { + this.offsetsHeader = NullableOffsetsHeader.create(segmentWriteOutMedium); + this.valuesBuffer = segmentWriteOutMedium.makeWriteOutBytes(); + } + + @Override + public void serialize(ColumnValueSelector selector) throws IOException + { + rowCount++; + if (rowCount < 0) { + throw new ColumnCapacityExceededException(columnName); + } + Object value = selector.getObject(); + if (value == null) { + offsetsHeader.writeNull(); + } else { + objectStrategy.writeTo((SpectatorHistogram) value, valuesBuffer); + offsetsHeader.writeOffset(Ints.checkedCast(valuesBuffer.size())); + } + } + + @Override + public long getSerializedSize() + { + // Meta header, Offsets, Values + return META_SERDE_HELPER.size(this) + offsetsHeader.getSerializedSize() + valuesBuffer.size(); + } + + @Override + public void writeTo(WritableByteChannel channel, FileSmoosher smoosher) throws IOException + { + META_SERDE_HELPER.writeTo(channel, this); + offsetsHeader.writeTo(channel, null); + valuesBuffer.writeTo(channel); + } +} diff --git a/extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule b/extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule new file mode 100644 index 000000000000..f158b84da3f9 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/main/resources/META-INF/services/org.apache.druid.initialization.DruidModule @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.druid.spectator.histogram.SpectatorHistogramModule diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java new file mode 100644 index 000000000000..add0d88efceb --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/NullableOffsetsHeaderTest.java @@ -0,0 +1,441 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.google.common.collect.ImmutableList; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; +import java.util.Arrays; +import java.util.List; + +public class NullableOffsetsHeaderTest +{ + @Test + public void testShouldAcceptNullWrites() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeNull(); + header.writeNull(); + header.writeNull(); + + Assert.assertEquals("Size should be count of entries", 3, header.size()); + + header = serde(header); + Assert.assertEquals("Size should be count of entries", 3, header.size()); + + Assert.assertNull("Should return null for null entries by index", header.get(0)); + Assert.assertNull("Should return null for null entries by index", header.get(1)); + Assert.assertNull("Should return null for null entries by index", header.get(2)); + } + + @Test + public void testShouldAcceptOffsetWrites() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeOffset(456); + + Assert.assertEquals("Size should be count of entries", 2, header.size()); + + header = serde(header); + Assert.assertEquals("Size should be count of entries", 2, header.size()); + + Assert.assertNotNull("Should flag nulls by index", header.get(0)); + Assert.assertNotNull("Should flag nulls by index", header.get(1)); + + Assert.assertEquals("Should return value for entries by index", 0, header.get(0).getStart()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(0).getEnd()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(1).getStart()); + Assert.assertEquals("Should return value for entries by index", 456, header.get(1).getEnd()); + } + + @Test + public void testShouldAcceptMixedWrites() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeOffset(456); + header.writeOffset(789); + header.writeNull(); + + Assert.assertEquals("Size should be count of entries", 6, header.size()); + + header = serde(header); + Assert.assertEquals("Size should be count of entries", 6, header.size()); + + Assert.assertNotNull("Should flag nulls by index", header.get(0)); + Assert.assertNull("Should flag nulls by index", header.get(1)); + Assert.assertNull("Should flag nulls by index", header.get(2)); + Assert.assertNotNull("Should flag nulls by index", header.get(3)); + Assert.assertNotNull("Should flag nulls by index", header.get(4)); + Assert.assertNull("Should flag nulls by index", header.get(5)); + + Assert.assertEquals("Should return value for entries by index", 0, header.get(0).getStart()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(0).getEnd()); + Assert.assertEquals("Should return value for entries by index", 123, header.get(3).getStart()); + Assert.assertEquals("Should return value for entries by index", 456, header.get(3).getEnd()); + Assert.assertEquals("Should return value for entries by index", 456, header.get(4).getStart()); + Assert.assertEquals("Should return value for entries by index", 789, header.get(4).getEnd()); + } + + @Test + public void testGiveAccessToOffsets() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeOffset(456); + header.writeOffset(789); + header.writeNull(); + + header = serde(header); + + Assert.assertNull("Should return null for 6", header.get(6)); + + Assert.assertNull("Should return null for 5", header.get(5)); + + Assert.assertEquals("Offset at 4", 789, header.get(4).getEnd()); + Assert.assertEquals("Offset prior to 4", 456, header.get(4).getStart()); + + Assert.assertEquals("Offset at 3", 456, header.get(3).getEnd()); + Assert.assertEquals("Offset prior to 3", 123, header.get(3).getStart()); + + Assert.assertNull("Should return null for 2", header.get(2)); + + Assert.assertNull("Should return null for 1", header.get(1)); + + Assert.assertEquals("Offset at 0", 123, header.get(0).getEnd()); + Assert.assertEquals("Offset prior to 0", 0, header.get(0).getStart()); + } + + @Test + public void testGiveAccessToSingleOffsetNulls() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeNull(); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeNull(); + + header = serde(header); + + Assert.assertEquals("Offset at 1", 123, header.get(1).getEnd()); + Assert.assertEquals("Offset prior to 1", 0, header.get(1).getStart()); + + Assert.assertNull("Nulls for anything not set", header.get(0)); + Assert.assertNull("Nulls for anything not set", header.get(-1)); + Assert.assertNull("Nulls for anything not set", header.get(3)); + Assert.assertNull("Nulls for anything not set", header.get(100)); + } + + @Test + public void testShouldSerializeAndDeserialize() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + header.writeOffset(123); + header.writeNull(); + header.writeNull(); + header.writeOffset(456); + header.writeOffset(789); + header.writeNull(); + + // Length + BitmapLength + Bitmap + Offsets + // 4 + 4 + 1 + 12 = 21 bytes + Assert.assertEquals("Serialized size should be minimal", 21, header.getSerializedSize()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + header.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals( + "Reported size and actual size should match", + header.getSerializedSize(), + byteBuffer.remaining() + ); + + NullableOffsetsHeader deserialized = NullableOffsetsHeader.read(byteBuffer); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Deserialized should match pre-serialized size", header.size(), deserialized.size()); + + // Nulls should return the previous offset + List expected = Arrays.asList( + new NullableOffsetsHeader.Offset(0, 123), + null, + null, + new NullableOffsetsHeader.Offset(123, 456), + new NullableOffsetsHeader.Offset(456, 789), + null + ); + + for (int i = 0; i < header.size(); i++) { + Assert.assertEquals("Deserialized should match pre-serialized values", expected.get(i), deserialized.get(i)); + } + } + + @Test + public void testShouldSerializeAndDeserializeAllNulls() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < 10000; i++) { + header.writeNull(); + } + + // Length + BitmapLength + Bitmap + Offsets + // 4 + 4 + 0 + 0 = 8 bytes + Assert.assertEquals("Serialized size should be minimal", 8, header.getSerializedSize()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + header.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals( + "Reported size and actual size should match", + header.getSerializedSize(), + byteBuffer.remaining() + ); + + NullableOffsetsHeader deserialized = NullableOffsetsHeader.read(byteBuffer); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Deserialized should match pre-serialized size", header.size(), deserialized.size()); + + for (int i = 0; i < header.size(); i++) { + Assert.assertNull("Deserialized should be null", deserialized.get(i)); + } + } + + @Test + public void testShouldSerializeAndDeserializeAllValues() throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < 10000; i++) { + header.writeOffset(i + 1); + } + + // Length + BitmapLength + Bitmap + Offsets + // 4 + 4 + 0 + 40000 = 40008 bytes + // Bitmap is omitted if all values are set + Assert.assertEquals("Serialized size should be minimal", 40008, header.getSerializedSize()); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + header.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals( + "Reported size and actual size should match", + header.getSerializedSize(), + byteBuffer.remaining() + ); + + NullableOffsetsHeader deserialized = NullableOffsetsHeader.read(byteBuffer); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Deserialized should match pre-serialized size", header.size(), deserialized.size()); + + for (int i = 0; i < header.size(); i++) { + Assert.assertNotNull("Deserialized should be set " + i, deserialized.get(i)); + Assert.assertEquals("Deserialized should match pre-serialized nulls " + i, i + 1, deserialized.get(i).getEnd()); + } + } + + @Test + public void testShouldFindOffsetFromIndexSingleWord() throws IOException + { + // Should return the exact index of the offset to read, or negative if not present + List expectedOffsetIndexes = ImmutableList.of(15, 21, 30, 31); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", 32, header.size()); + header = serde(header); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expected = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expected, offsetIndex); + } + } + + @Test + public void testShouldFindOffsetFromIndexMultipleWords() throws IOException + { + // Should return the exact index of the offset to read, or negative if not present + List expectedOffsetIndexes = ImmutableList.of(15, 21, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 70, 100); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", 101, header.size()); + header = serde(header); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expected = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expected, offsetIndex); + } + } + + @Test + public void testShouldFindOffsetFromIndexFull() throws IOException + { + // For a full header, the bitset is omitted. + // The expected index, is the queried index. + final int size = 500; + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < size; i++) { + header.writeOffset(i + 1); + } + Assert.assertEquals("Size should be count of entries", size, header.size()); + header = serde(header); + + for (int i = 0; i < size; i++) { + int offsetIndex = header.getOffsetIndex(i); + Assert.assertEquals("Offset " + i, i, offsetIndex); + } + } + + @Test + public void testShouldFindOffsetFromIndexEmpty() throws IOException + { + // For an empty header, the bitset is omitted. + // The expected index, is always -1. + final int size = 500; + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + for (int i = 0; i < size; i++) { + header.writeNull(); + } + Assert.assertEquals("Size should be count of entries", size, header.size()); + header = serde(header); + + for (int i = 0; i < size; i++) { + int offsetIndex = header.getOffsetIndex(i); + Assert.assertEquals("Offset " + i, -1, offsetIndex); + } + } + + @Test + public void testShouldWorkWithBitsSetAfter64bitBoundary() throws IOException + { + List expectedOffsetIndexes = ImmutableList.of(0, 1, 2, 3, 4, 256, 257); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", 258, header.size()); + header = serde(header); + Assert.assertEquals("Size should be count of entries", 258, header.size()); + Assert.assertEquals("Cardinality should be count of non-nulls", expectedOffsetIndexes.size(), header.getCardinality()); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expectedOffset = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expectedOffset, offsetIndex); + + NullableOffsetsHeader.Offset offset = header.get(i); + if (expectedOffset < 0) { + Assert.assertNull("Null Offset " + i, offset); + } else { + int expectedOffsetStart = expectedOffset; + int expectedOffsetEnd = expectedOffset + 1; + Assert.assertEquals("Offset Start " + i, expectedOffsetStart, offset.getStart()); + Assert.assertEquals("Offset End " + i, expectedOffsetEnd, offset.getEnd()); + Assert.assertEquals("Offset Length " + i, 1, offset.getLength()); + } + } + } + + @Test + public void testShouldWorkOnLongByteBoundaries() throws IOException + { + for (int x = 1; x < 24; x++) { + int boundary = ((int) Math.pow(2, x)) - 1; + List expectedOffsetIndexes = ImmutableList.of(boundary - 1); + NullableOffsetsHeader header = createHeaderWithIndexesSet(expectedOffsetIndexes); + Assert.assertEquals("Size should be count of entries", boundary, header.size()); + header = serde(header); + Assert.assertEquals("Size should be count of entries", boundary, header.size()); + Assert.assertEquals( + "Cardinality should be count of non-nulls", + expectedOffsetIndexes.size(), + header.getCardinality() + ); + + for (int i = 0; i < header.size(); i++) { + int offsetIndex = header.getOffsetIndex(i); + int expectedOffset = expectedOffsetIndexes.indexOf(i); + Assert.assertEquals("Offset " + i, expectedOffset, offsetIndex); + + NullableOffsetsHeader.Offset offset = header.get(i); + if (expectedOffset < 0) { + Assert.assertNull("Null Offset " + i, offset); + } else { + int expectedOffsetStart = expectedOffset; + int expectedOffsetEnd = expectedOffset + 1; + Assert.assertEquals("Offset Start " + i, expectedOffsetStart, offset.getStart()); + Assert.assertEquals("Offset End " + i, expectedOffsetEnd, offset.getEnd()); + Assert.assertEquals("Offset Length " + i, 1, offset.getLength()); + } + } + } + } + + /** + * Test helper to serialize and deserialize a NullableOffsetsHeader + * + * @param in The NullableOffsetsHeader to serialize + * @return The deserialized representation of in. + */ + NullableOffsetsHeader serde(NullableOffsetsHeader in) throws IOException + { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + in.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + return NullableOffsetsHeader.read(byteBuffer); + } + + /** + * Helper to make a header with the provided indexes set + */ + NullableOffsetsHeader createHeaderWithIndexesSet(List indexes) throws IOException + { + NullableOffsetsHeader header = NullableOffsetsHeader.create(new OnHeapMemorySegmentWriteOutMedium()); + int offset = 1; + for (Integer idx : indexes) { + while (header.size() < idx) { + header.writeNull(); + } + header.writeOffset(offset++); + } + return header; + } +} diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java new file mode 100644 index 000000000000..1c30cfc05c36 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramAggregatorTest.java @@ -0,0 +1,733 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.java.util.common.granularity.Granularities; +import org.apache.druid.java.util.common.guava.Sequence; +import org.apache.druid.query.Druids; +import org.apache.druid.query.QueryPlus; +import org.apache.druid.query.QueryRunner; +import org.apache.druid.query.QueryRunnerTestHelper; +import org.apache.druid.query.Result; +import org.apache.druid.query.aggregation.AggregationTestHelper; +import org.apache.druid.query.aggregation.AggregatorFactory; +import org.apache.druid.query.aggregation.AggregatorUtil; +import org.apache.druid.query.groupby.GroupByQueryConfig; +import org.apache.druid.query.groupby.GroupByQueryRunnerTest; +import org.apache.druid.query.groupby.ResultRow; +import org.apache.druid.query.metadata.SegmentMetadataQueryConfig; +import org.apache.druid.query.metadata.SegmentMetadataQueryQueryToolChest; +import org.apache.druid.query.metadata.SegmentMetadataQueryRunnerFactory; +import org.apache.druid.query.metadata.metadata.ColumnAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentAnalysis; +import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery; +import org.apache.druid.query.timeseries.TimeseriesResultValue; +import org.apache.druid.segment.IndexIO; +import org.apache.druid.segment.QueryableIndex; +import org.apache.druid.segment.QueryableIndexSegment; +import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.timeline.SegmentId; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@RunWith(Parameterized.class) +public class SpectatorHistogramAggregatorTest extends InitializedNullHandlingTest +{ + public static final String INPUT_DATA_PARSE_SPEC = String.join( + "\n", + "{", + " \"type\": \"string\",", + " \"parseSpec\": {", + " \"format\": \"tsv\",", + " \"timestampSpec\": {\"column\": \"timestamp\", \"format\": \"yyyyMMddHH\"},", + " \"dimensionsSpec\": {", + " \"dimensions\": [\"product\"],", + " \"dimensionExclusions\": [],", + " \"spatialDimensions\": []", + " },", + " \"columns\": [\"timestamp\", \"product\", \"cost\"]", + " }", + "}" + ); + @Rule + public final TemporaryFolder tempFolder = new TemporaryFolder(); + + private static final SegmentMetadataQueryRunnerFactory METADATA_QR_FACTORY = new SegmentMetadataQueryRunnerFactory( + new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig()), + QueryRunnerTestHelper.NOOP_QUERYWATCHER + ); + private static final Map EXPECTED_HISTOGRAMS = new HashMap<>(); + + static { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + EXPECTED_HISTOGRAMS.put("A", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(30 + 40 + 40 + 40 + 50 + 50), 1L); + EXPECTED_HISTOGRAMS.put("B", histogram); + + histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(50 + 20000), 1L); + EXPECTED_HISTOGRAMS.put("C", histogram); + } + + private final AggregationTestHelper helper; + private final AggregationTestHelper timeSeriesHelper; + + public SpectatorHistogramAggregatorTest(final GroupByQueryConfig config) + { + SpectatorHistogramModule.registerSerde(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + helper = AggregationTestHelper.createGroupByQueryAggregationTestHelper( + module.getJacksonModules(), config, tempFolder); + timeSeriesHelper = AggregationTestHelper.createTimeseriesQueryAggregationTestHelper( + module.getJacksonModules(), + tempFolder + ); + } + + @Parameterized.Parameters(name = "{0}") + public static Collection constructorFeeder() + { + final List constructors = new ArrayList<>(); + for (GroupByQueryConfig config : GroupByQueryRunnerTest.testConfigs()) { + constructors.add(new Object[]{config}); + } + return constructors; + } + + // this is to test Json properties and equals + @Test + public void serializeDeserializeFactoryWithFieldName() throws Exception + { + ObjectMapper objectMapper = new DefaultObjectMapper(); + new SpectatorHistogramModule().getJacksonModules().forEach(objectMapper::registerModule); + SpectatorHistogramAggregatorFactory factory = new SpectatorHistogramAggregatorFactory( + "name", + "filedName", + AggregatorUtil.SPECTATOR_HISTOGRAM_CACHE_TYPE_ID + ); + AggregatorFactory other = objectMapper.readValue( + objectMapper.writeValueAsString(factory), + AggregatorFactory.class + ); + + Assert.assertEquals(factory, other); + } + + @Test + public void testBuildingHistogramQueryTime() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"longSum\", \"name\": \"cost_sum\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"cost_histogram\", \"fieldName\": " + + "\"cost_sum\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + List results = seq.toList(); + assertResultsMatch(results, 0, "A"); + assertResultsMatch(results, 1, "B"); + assertResultsMatch(results, 2, "C"); + } + + @Test + public void testBuildingAndMergingHistograms() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testBuildingAndMergingHistogramsTimeseriesQuery() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence> seq = (Sequence>) rawseq; + List> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testBuildingAndMergingGroupbyHistograms() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimensions\": [\"product\"],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List results = seq.toList(); + Assert.assertEquals(6, results.size()); + + SpectatorHistogram expectedA = new SpectatorHistogram(); + expectedA.add(PercentileBuckets.indexOf(10), 1L); + Assert.assertEquals(expectedA, results.get(0).get(1)); + + SpectatorHistogram expectedB = new SpectatorHistogram(); + expectedB.add(PercentileBuckets.indexOf(30), 1L); + expectedB.add(PercentileBuckets.indexOf(40), 3L); + expectedB.add(PercentileBuckets.indexOf(50), 2L); + Assert.assertEquals(expectedB, results.get(1).get(1)); + + SpectatorHistogram expectedC = new SpectatorHistogram(); + expectedC.add(PercentileBuckets.indexOf(50), 1L); + expectedC.add(PercentileBuckets.indexOf(20000), 1L); + Assert.assertEquals(expectedC, results.get(2).get(1)); + + Assert.assertNull(results.get(3).get(1)); + Assert.assertNull(results.get(4).get(1)); + Assert.assertNull(results.get(5).get(1)); + } + + @Test + public void testBuildingAndCountingHistograms() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testBuildingAndCountingHistogramsWithNullFilter() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"longSum\", \"name\": \"count_histogram\", \"fieldName\": " + + "\"histogram\"},", + " {\"type\": \"doubleSum\", \"name\": \"double_count_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"],", + " \"filter\": {\n", + " \"fields\": [\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"0\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " },\n", + " {\n", + " \"field\": {\n", + " \"dimension\": \"histogram\",\n", + " \"value\": \"\",\n", + " \"type\": \"selector\"\n", + " },\n", + " \"type\": \"not\"\n", + " }\n", + " ],\n", + " \"type\": \"and\"\n", + " }", + "}" + ) + ); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check longSum + Assert.assertEquals(9L, results.get(0).get(0)); + // Check doubleSum + Assert.assertEquals(9.0, (Double) results.get(0).get(1), 0.001); + } + + @Test + public void testIngestAsHistogramDistribution() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestHistogramsTimer() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + Assert.assertEquals(expected, results.get(0).get(0)); + } + + @Test + public void testIngestingPreaggregatedHistograms() throws Exception + { + Object rawseq = timeSeriesHelper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("pre_agg_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"timeseries\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + Sequence> seq = (Sequence>) rawseq; + List> results = seq.toList(); + Assert.assertEquals(1, results.size()); + SpectatorHistogram value = (SpectatorHistogram) results.get(0).getValue().getMetric("merged_cost_histogram"); + Assert.assertEquals(expected, value); + } + + @Test + public void testMetadataQueryTimer() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramTimer\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() {} + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramTimer", columns.get("histogram").getType()); + } + + @Test + public void testMetadataQueryDistribution() throws Exception + { + File segmentDir = tempFolder.newFolder(); + helper.createIndex( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogramDistribution\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + segmentDir, + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + true + ); + + ObjectMapper mapper = (ObjectMapper) TestHelper.makeJsonMapper(); + SpectatorHistogramModule module = new SpectatorHistogramModule(); + module.getJacksonModules().forEach(mod -> mapper.registerModule(mod)); + IndexIO indexIO = new IndexIO( + mapper, + new ColumnConfig() { } + ); + + QueryableIndex index = indexIO.loadIndex(segmentDir); + + SegmentId segmentId = SegmentId.dummy("segmentId"); + QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( + METADATA_QR_FACTORY, + segmentId, + new QueryableIndexSegment(index, segmentId), + null + ); + + SegmentMetadataQuery segmentMetadataQuery = Druids.newSegmentMetadataQueryBuilder() + .dataSource("test_datasource") + .intervals("2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z") + .merge(true) + .build(); + List results = runner.run(QueryPlus.wrap(segmentMetadataQuery)).toList(); + System.out.println(results); + Assert.assertEquals(1, results.size()); + Map columns = results.get(0).getColumns(); + Assert.assertNotNull(columns.get("histogram")); + Assert.assertEquals("spectatorHistogramDistribution", columns.get("histogram").getType()); + } + + @Test + public void testPercentilePostAggregator() throws Exception + { + Sequence seq = helper.createIndexAndRunQueryOnSegment( + new File(this.getClass().getClassLoader().getResource("input_data.tsv").getFile()), + INPUT_DATA_PARSE_SPEC, + String.join( + "\n", + "[", + " {\"type\": \"spectatorHistogram\", \"name\": \"histogram\", \"fieldName\": \"cost\"}", + "]" + ), + 0, // minTimestamp + Granularities.NONE, + 10, // maxRowCount + String.join( + "\n", + "{", + " \"queryType\": \"groupBy\",", + " \"dataSource\": \"test_datasource\",", + " \"granularity\": \"ALL\",", + " \"dimenions\": [],", + " \"aggregations\": [", + " {\"type\": \"spectatorHistogram\", \"name\": \"merged_cost_histogram\", \"fieldName\": " + + "\"histogram\"}", + " ],", + " \"postAggregations\": [", + " {\"type\": \"percentileSpectatorHistogram\", \"name\": \"percentileValue\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}" + + ", \"percentile\": \"50.0\"},", + " {\"type\": \"percentilesSpectatorHistogram\", \"name\": \"percentileValues\", \"field\": {\"type\": \"fieldAccess\",\"fieldName\": \"merged_cost_histogram\"}" + + ", \"percentiles\": [25.0, 50.0, 75.0, 99.0]}", + " ],", + " \"intervals\": [\"2016-01-01T00:00:00.000Z/2016-01-31T00:00:00.000Z\"]", + "}" + ) + ); + SpectatorHistogram expected = new SpectatorHistogram(); + expected.add(PercentileBuckets.indexOf(10), 1L); + expected.add(PercentileBuckets.indexOf(30), 1L); + expected.add(PercentileBuckets.indexOf(40), 3L); + expected.add(PercentileBuckets.indexOf(50), 3L); + expected.add(PercentileBuckets.indexOf(20000), 1L); + + List results = seq.toList(); + Assert.assertEquals(1, results.size()); + // Check on Median (true median is 40) + Assert.assertEquals(40.0, (double) results.get(0).get(1), 0.2); + // True percentiles for 25, 50, 75, 99 + double[] expectedPercentiles = new double[]{40.0, 40.0, 50.0, 18404.0}; + double[] resultPercentiles = (double[]) results.get(0).get(2); + + for (int i = 0; i < expectedPercentiles.length; i++) { + double expectedPercentile = expectedPercentiles[i]; + double resultPercentile = resultPercentiles[i]; + double error18pcnt = expectedPercentile * 0.18; + // Should be within 18% + Assert.assertEquals(expectedPercentile, resultPercentile, error18pcnt); + } + } + + private static void assertResultsMatch(List results, int rowNum, String expectedProduct) + { + ResultRow row = results.get(rowNum); + Object product = row.get(0); + Assert.assertTrue("Expected dimension of type String", product instanceof String); + Assert.assertEquals("Product values didn't match", expectedProduct, product); + Object histogram = row.get(1); + Assert.assertTrue( + "Expected histogram metric of type SpectatorHistogramUtils.HistogramMap", + histogram instanceof SpectatorHistogram + ); + Assert.assertEquals("Count values didn't match", EXPECTED_HISTOGRAMS.get(product), histogram); + } + +} diff --git a/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java new file mode 100644 index 000000000000..fb15ac85e4c4 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/java/org/apache/druid/spectator/histogram/SpectatorHistogramTest.java @@ -0,0 +1,451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.spectator.histogram; + +import com.netflix.spectator.api.histogram.PercentileBuckets; +import org.apache.druid.java.util.common.IAE; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnValueSelector; +import org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium; +import org.apache.druid.segment.writeout.SegmentWriteOutMedium; +import org.junit.Assert; +import org.junit.Test; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.WritableByteChannel; + +public class SpectatorHistogramTest +{ + @Test + public void testToBytesSmallValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.insert(10); + histogram.insert(30); + histogram.insert(40); + histogram.insert(40); + histogram.insert(40); + histogram.insert(50); + histogram.insert(50); + // Check the full range of bucket IDs still work + long bigValue = PercentileBuckets.get(270); + histogram.insert(bigValue); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = 0; + Assert.assertEquals("Should compact small values within key bytes", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(3L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(2L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(bigValue))); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8, deserialized.getSum()); + } + + @Test + public void testToBytesSmallishValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 64L); + histogram.add(PercentileBuckets.indexOf(30), 127L); + histogram.add(PercentileBuckets.indexOf(40), 111L); + histogram.add(PercentileBuckets.indexOf(50), 99L); + histogram.add(270, 100L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 501, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Byte.BYTES; + Assert.assertEquals("Should compact small values to a byte", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(64L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(127L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(111L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(99L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(100L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 501, deserialized.getSum()); + } + + @Test + public void testToBytesMedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 512L); + histogram.add(PercentileBuckets.indexOf(30), 1024L); + histogram.add(PercentileBuckets.indexOf(40), 2048L); + histogram.add(PercentileBuckets.indexOf(50), 4096L); + histogram.add(270, 8192L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Short.BYTES; + Assert.assertEquals("Should compact medium values to short", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(512L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(1024L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(2048L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(4096L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(8192L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 15872, deserialized.getSum()); + } + + @Test + public void testToBytesLargerValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 100000L); + histogram.add(PercentileBuckets.indexOf(30), 200000L); + histogram.add(PercentileBuckets.indexOf(40), 500000L); + histogram.add(PercentileBuckets.indexOf(50), 10000000L); + histogram.add(270, 50000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Integer.BYTES; + Assert.assertEquals("Should compact larger values to integer", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(100000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(500000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(10000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(50000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 60800000, deserialized.getSum()); + } + + @Test + public void testToBytesBiggestValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 10000000000L); + histogram.add(PercentileBuckets.indexOf(30), 20000000000L); + histogram.add(PercentileBuckets.indexOf(40), 50000000000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + int valSize = Long.BYTES; + Assert.assertEquals("Should not compact larger values", 5 * (keySize + valSize), bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(10000000000L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(20000000000L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(50000000000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5180000000000L, deserialized.getSum()); + } + + @Test + public void testToBytesMixedValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(PercentileBuckets.indexOf(10), 1L); + histogram.add(PercentileBuckets.indexOf(30), 300L); + histogram.add(PercentileBuckets.indexOf(40), 200000L); + histogram.add(PercentileBuckets.indexOf(50), 100000000000L); + histogram.add(270, 5000000000000L); + + Assert.assertEquals("Should have size matching number of buckets", 5, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should not compact larger values", (5 * keySize) + 0 + 2 + 4 + 8 + 8, bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(1L, deserialized.get(PercentileBuckets.indexOf(10))); + Assert.assertEquals(300L, deserialized.get(PercentileBuckets.indexOf(30))); + Assert.assertEquals(200000L, deserialized.get(PercentileBuckets.indexOf(40))); + Assert.assertEquals(100000000000L, deserialized.get(PercentileBuckets.indexOf(50))); + Assert.assertEquals(5000000000000L, deserialized.get(270)); + + Assert.assertEquals("Should have size matching number of buckets", 5, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 5100000200301L, deserialized.getSum()); + } + + @Test + public void testToBytesBoundaryValues() + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(6, 63L); + histogram.add(7, 64L); + histogram.add(8, 255L); + histogram.add(9, 256L); + histogram.add(16, 65535L); + histogram.add(17, 65536L); + histogram.add(32, 4294967295L); + histogram.add(33, 4294967296L); + + Assert.assertEquals("Should have size matching number of buckets", 8, histogram.size()); + Assert.assertEquals("Should have sum matching number entries", 8590066300L, histogram.getSum()); + + byte[] bytes = histogram.toBytes(); + int keySize = Short.BYTES; + Assert.assertEquals("Should compact", (8 * keySize) + 0 + 1 + 1 + 2 + 2 + 4 + 4 + 8, bytes.length); + + SpectatorHistogram deserialized = SpectatorHistogram.deserialize(bytes); + Assert.assertEquals(63L, deserialized.get(6)); + Assert.assertEquals(64L, deserialized.get(7)); + Assert.assertEquals(255L, deserialized.get(8)); + Assert.assertEquals(256L, deserialized.get(9)); + Assert.assertEquals(65535L, deserialized.get(16)); + Assert.assertEquals(65536L, deserialized.get(17)); + Assert.assertEquals(4294967295L, deserialized.get(32)); + Assert.assertEquals(4294967296L, deserialized.get(33)); + + Assert.assertEquals("Should have size matching number of buckets", 8, deserialized.size()); + Assert.assertEquals("Should have sum matching number entries", 8590066300L, deserialized.getSum()); + } + + @Test(expected = IAE.class) + public void testBucketOutOfRangeMax() throws IAE + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(500, 1); + } + + @Test(expected = IAE.class) + public void testBucketOutOfRangeNegative() throws IAE + { + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(-2, 1); + } + + @Test + public void testSerializeAndDeserialize() throws IOException + { + SegmentWriteOutMedium medium = new OnHeapMemorySegmentWriteOutMedium(); + SpectatorHistogramObjectStrategy strategy = new SpectatorHistogramObjectStrategy(); + SpectatorHistogramSerializer serializer = SpectatorHistogramSerializer.create(medium, "test", strategy); + serializer.open(); + + SpectatorHistogram histogram = new SpectatorHistogram(); + histogram.add(6, 63L); + histogram.add(7, 64L); + histogram.add(8, 255L); + histogram.add(9, 256L); + histogram.add(16, 65535L); + histogram.add(17, 65536L); + histogram.add(32, 4294967295L); + histogram.add(33, 4294967296L); + + ColumnValueSelector selector = new ColumnValueSelector() + { + private int callCount = 0; + + @Override + public boolean isNull() + { + return false; + } + + @Override + public long getLong() + { + return 0; + } + + @Override + public float getFloat() + { + return 0; + } + + @Override + public double getDouble() + { + return 0; + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + + } + + @Override + public SpectatorHistogram getObject() + { + // On every 3rd fetch and after 6, we'll return a null. + // Columns ending with a lot of nulls won't add to the + // size of the segment at all. + ++callCount; + if ((callCount % 3 == 0) || callCount > 6) { + return null; + } + return histogram; + } + + @Override + public Class classOfObject() + { + return histogram.getClass(); + } + }; + + int count = 0; + // Serialize lots of nulls at the end to ensure + // we don't waste space on nulls. + for (int i = 0; i < 125000; i++) { + serializer.serialize(selector); + count++; + } + + long serializedSize = serializer.getSerializedSize(); + // Column header = 6 bytes + // Offset header (Size + BitmapLength + ValueBitMap + Offsets) + // size = 4 bytes + // bitmap length = 4 bytes + // bitmap = 1 byte + // offsets * 4 = 16 bytes (no offset for nulls) + // Offset header = 25 bytes + // 4 values = 152 bytes + // each value = 38 bytes + // Total = 6 + 25 + 152 = 183 + Assert.assertEquals("Expect serialized size", 183L, serializedSize); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final WritableByteChannel channel = Channels.newChannel(baos); + serializer.writeTo(channel, null); + channel.close(); + + final ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray()); + Assert.assertEquals(serializer.getSerializedSize(), byteBuffer.remaining()); + SpectatorHistogramIndexed indexedDeserialized = SpectatorHistogramIndexed.read(byteBuffer, strategy); + Assert.assertEquals(0, byteBuffer.remaining()); + + Assert.assertEquals("Count of entries should match", count, indexedDeserialized.size()); + + for (int i = 0; i < count; i++) { + SpectatorHistogram deserialized = indexedDeserialized.get(i); + if ((i + 1) % 3 == 0 || i >= 6) { + // Expect null + Assert.assertNull(deserialized); + } else { + Assert.assertEquals(63L, deserialized.get(6)); + Assert.assertEquals(64L, deserialized.get(7)); + Assert.assertEquals(255L, deserialized.get(8)); + Assert.assertEquals(256L, deserialized.get(9)); + Assert.assertEquals(65535L, deserialized.get(16)); + Assert.assertEquals(65536L, deserialized.get(17)); + Assert.assertEquals(4294967295L, deserialized.get(32)); + Assert.assertEquals(4294967296L, deserialized.get(33)); + } + } + } + + @Test + public void testPercentileComputation0() + { + SpectatorHistogram h = new SpectatorHistogram(); + h.insert(0); + Assert.assertEquals(0.1, h.getPercentileValue(10.0), 0.01); + Assert.assertEquals(0.5, h.getPercentileValue(50.0), 0.01); + Assert.assertEquals(0.99, h.getPercentileValue(99.0), 0.01); + Assert.assertEquals(1.0, h.getPercentileValue(100.0), 0.01); + } + + @Test + public void testPercentileComputation1_100() + { + SpectatorHistogram h = new SpectatorHistogram(); + for (int i = 0; i < 100; i++) { + h.insert(i); + } + // Precision assigned to half of the bucket width + Assert.assertEquals(10.0, h.getPercentileValue(10.0), 0.5); + Assert.assertEquals(50.0, h.getPercentileValue(50.0), 2.5); + Assert.assertEquals(99.0, h.getPercentileValue(99.0), 10.5); + Assert.assertEquals(100.0, h.getPercentileValue(100.0), 10.5); + } + + @Test + public void testPercentileComputation0_Big() + { + SpectatorHistogram h = new SpectatorHistogram(); + // one very small value, 99 very big values + h.add(0, 1); + h.add(200, 99); + long upperBoundOfBucket0 = PercentileBuckets.get(0); + long upperBoundOfBucket200 = PercentileBuckets.get(200); + long lowerBoundOfBucket200 = PercentileBuckets.get(199); + long widthOfBucket = upperBoundOfBucket200 - lowerBoundOfBucket200; + // P1 should be pulled towards the very low value + // P >1 should be pulled towards the very big value + Assert.assertEquals(upperBoundOfBucket0, h.getPercentileValue(1.0), 0.01); + Assert.assertEquals(lowerBoundOfBucket200, h.getPercentileValue(50.0), widthOfBucket / 2.0); + Assert.assertEquals(upperBoundOfBucket200, h.getPercentileValue(99.0), widthOfBucket / 2.0); + Assert.assertEquals(upperBoundOfBucket200, h.getPercentileValue(100.0), widthOfBucket / 2.0); + } + + @Test + public void testMedianOfSequence() + { + int[] nums = new int[]{9, 10, 12, 13, 13, 13, 15, 15, 16, 16, 18, 22, 23, 24, 24, 25}; + SpectatorHistogram h = new SpectatorHistogram(); + + for (int num : nums) { + h.insert(num); + } + + // Expect middle of the "15.5" bucket, which is 18.0 + int index = PercentileBuckets.indexOf(15); + long upperBoundOfFifteenPointFiveBucket = PercentileBuckets.get(index); + long lowerBoundOfFifteenPointFiveBucket = PercentileBuckets.get(index - 1); + long halfBucketWidth = ((upperBoundOfFifteenPointFiveBucket - lowerBoundOfFifteenPointFiveBucket) / 2); + long middleOfFifteenPointFiveBucket = lowerBoundOfFifteenPointFiveBucket + halfBucketWidth; + + Assert.assertEquals(middleOfFifteenPointFiveBucket, h.getPercentileValue(50.0), 0.01); + } +} diff --git a/extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv b/extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv new file mode 100644 index 000000000000..9938f51e26b4 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/resources/input_data.tsv @@ -0,0 +1,12 @@ +2016010101 A 10 +2016010101 B 30 +2016010101 B 40 +2016010101 B 40 +2016010101 B 40 +2016010101 B 50 +2016010101 B 50 +2016010101 C 50 +2016010101 C 20000 +2016010101 D +2016010101 E +2016010101 F \ No newline at end of file diff --git a/extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv b/extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv new file mode 100644 index 000000000000..6a16d6b1c591 --- /dev/null +++ b/extensions-contrib/spectator-histogram/src/test/resources/pre_agg_data.tsv @@ -0,0 +1,6 @@ +2016010101 A {"10":1} +2016010101 B {"17":1, "19":3, "21":2} +2016010101 C {"60":1, "21":1} +2016010101 D {} +2016010101 E {} +2016010101 F {} diff --git a/pom.xml b/pom.xml index 81cb00bb0cf5..6149c5866db4 100644 --- a/pom.xml +++ b/pom.xml @@ -229,6 +229,7 @@ extensions-contrib/opentelemetry-emitter extensions-contrib/kubernetes-overlord-extensions extensions-contrib/druid-iceberg-extensions + extensions-contrib/spectator-histogram distribution diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java index 4f82bdcfe69d..93cf75857c30 100755 --- a/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/AggregatorUtil.java @@ -129,6 +129,11 @@ public class AggregatorUtil // TDigest sketch aggregators public static final byte TDIGEST_BUILD_SKETCH_CACHE_TYPE_ID = 0x38; + // Spectator histogram aggregators + public static final byte SPECTATOR_HISTOGRAM_CACHE_TYPE_ID = 0x39; + public static final byte SPECTATOR_HISTOGRAM_DISTRIBUTION_CACHE_TYPE_ID = 0x3A; + public static final byte SPECTATOR_HISTOGRAM_TIMER_CACHE_TYPE_ID = 0x3B; + public static final byte MEAN_CACHE_TYPE_ID = 0x41; // ANY aggregator diff --git a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java index f65208bd9069..ed4bbfdc82b5 100644 --- a/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java +++ b/processing/src/main/java/org/apache/druid/query/aggregation/post/PostAggregatorIds.java @@ -66,4 +66,6 @@ public class PostAggregatorIds public static final byte KLL_FLOATS_SKETCH_TO_QUANTILE_CACHE_TYPE_ID = 42; public static final byte KLL_FLOATS_SKETCH_TO_QUANTILES_CACHE_TYPE_ID = 43; public static final byte KLL_FLOATS_SKETCH_TO_STRING_CACHE_TYPE_ID = 44; + public static final byte SPECTATOR_HISTOGRAM_SKETCH_PERCENTILE_CACHE_TYPE_ID = 45; + public static final byte SPECTATOR_HISTOGRAM_SKETCH_PERCENTILES_CACHE_TYPE_ID = 46; } diff --git a/website/.spelling b/website/.spelling index 7561bcec965d..175774e4ac24 100644 --- a/website/.spelling +++ b/website/.spelling @@ -430,6 +430,7 @@ pluggable podSpec postgres postgresql +pre-aggregate pre-aggregated pre-aggregates pre-aggregating @@ -948,6 +949,7 @@ prometheus Pushgateway flushPeriod postAggregator +postAggregators quantileFromTDigestSketch quantilesFromTDigestSketch tDigestSketch @@ -2373,3 +2375,12 @@ markUnused markUsed segmentId aggregateMultipleValues + +- ../docs/development/extensions-contrib/spectator-histogram.md +SpectatorHistogram +PercentileBuckets +spectatorHistogram +spectatorHistogramTimer +spectatorHistogramDistribution +percentileSpectatorHistogram +percentilesSpectatorHistogram