Hello. @jon-wei
I am confused with perform a druid histogram query.
The request body of histogram query like this :
"queryType": "timeseries",
"dataSource": {
"type": "table",
"name": "BRS_PAGE_HOUR"
},
"intervals": {
"type": "intervals",
"intervals": [
"2018-11-14T11:42:00/2019-01-14T11:42:00"
]
},
"descending": false,
"virtualColumns": [],
"filter": {
"type": "and",
"fields": [
{
"type": "selector",
"dimension": "browser_application_id",
"value": "2",
"extractionFn": null
},
{
"type": "selector",
"dimension": "host",
"value": "10.111.243.10:7001",
"extractionFn": null
},
{
"type": "selector",
"dimension": "uri",
"value": "/PMS/framework/popWindow.html",
"extractionFn": null
},
{
"type": "bound",
"dimension": "count",
"lower": "0",
"upper": null,
"lowerStrict": true,
"upperStrict": false,
"extractionFn": null,
"ordering": {
"type": "lexicographic"
}
}
]
},
"granularity": {
"type": "all"
},
"aggregations": [
{
"type": "longSum",
"name": "count_total",
"fieldName": "count",
"expression": null
},
{
"type": "approxHistogramFold",
"name": "first_paint_time_histogram_total",
"fieldName": "first_paint_time_histogram",
"resolution": 50,
"numBuckets": 9,
"lowerLimit": 0,
"upperLimit": "Infinity"
}
],
"postAggregations": [
{
"type": "customBuckets",
"name": "performance_histogram",
"fieldName": "first_paint_time_histogram_total",
"breaks": [
0,
200,
400,
600,
800,
1000,
1200,
1400,
"Infinity"
]
},
{ "type" : "min", "name" : "min_value", "fieldName" : "first_paint_time_histogram_total"},
{ "type" : "max", "name" : "max_value", "fieldName" : "first_paint_time_histogram_total"}
],
"context": {
"skipEmptyBuckets": "true"
}
}
The first query result like this :
[ {
"timestamp" : "2018-12-14T02:00:00.000Z",
"result" : {
"performance_histogram" : {
"breaks" : [ 0.0, 200.0, 400.0, 600.0, 800.0, 1000.0, 1200.0, 1400.0, "Infinity" ],
"counts" : [ 339.6748046875, 96.61546325683594, 70.65382385253906, 44.69218063354492, 18.730541229248047, 0.9215729236602783, 0.43618109822273254, 9.275420188903809 ]
},
"min_value" : 0.0,
"count_total" : 581,
"first_paint_time_histogram_total" : {
"breaks" : [ -4443.75, 0.0, 4443.75, 8887.5, 13331.25, 17775.0, 22218.75, 26662.5, 31106.25, 35550.0 ],
"counts" : [ 0.0, 576.6718139648438, 1.3282045125961304, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0 ]
},
"max_value" : 35550.0
}
} ]
The second query result like this :
[ {
"timestamp" : "2018-12-14T02:00:00.000Z",
"result" : {
"performance_histogram" : {
"breaks" : [ 0.0, 200.0, 400.0, 600.0, 800.0, 1000.0, 1200.0, 1400.0, "Infinity" ],
"counts" : [ 550.0, 4.094977378845215, 6.259572505950928, 2.6454498767852783, 1.0, 1.0, 0.0, 9.0 ]
},
"min_value" : 0.0,
"count_total" : 581,
"first_paint_time_histogram_total" : {
"breaks" : [ -4443.75, 0.0, 4443.75, 8887.5, 13331.25, 17775.0, 22218.75, 26662.5, 31106.25, 35550.0 ],
"counts" : [ 7.0, 570.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0 ]
},
"max_value" : 35550.0
}
} ]
Then i increase the resolution to 500 and the result always like this :
[ {
"timestamp" : "2018-12-14T02:00:00.000Z",
"result" : {
"performance_histogram" : {
"breaks" : [ 0.0, 200.0, 400.0, 600.0, 800.0, 1000.0, 1200.0, 1400.0, "Infinity" ],
"counts" : [ 318.0, 79.0, 14.0, 2.0, 1.0, 1.0, 0.0, 9.0 ]
},
"min_value" : 0.0,
"count_total" : 581,
"first_paint_time_histogram_total" : {
"breaks" : [ -4443.75, 0.0, 4443.75, 8887.5, 13331.25, 17775.0, 22218.75, 26662.5, 31106.25, 35550.0 ],
"counts" : [ 157.0, 420.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0 ]
},
"max_value" : 35550.0
}
} ]
Even though i have known results can vary for the same query from docs, but i am confused why the first bucket of first_paint_time_histogram_total was [negative number, 0] and its count also>0. The min value of the column was 0. And the [0.0, 200.0] bucket which is the first bucket of custom buckets looks like not include count of the first bucket of first_paint_time_histogram_total.
My ingestion spce file define the histogram column like this:
...
{
"name": "first_paint_time_histogram",
"fieldName": "first_paint_time",
"type": "approxHistogramFold",
"resolution": 50,
"numBuckets": 7,
"lowerLimit": 0
},
...
And the data of the column like this:
mysql> select first_paint_time from BRS_PAGE_HOUR where __time>='2019-01-13 00:00:00' and browser_application_id='9' limit 100;
+------------------+
| first_paint_time |
+------------------+
| 325 |
| 147 |
| 653 |
| 189 |
| 147 |
| 422 |
| 413 |
| 17 |
| 238 |
| 423 |
| 150 |
| 449 |
| 149 |
| 345 |
| 1661 |
| 2600 |
| 427 |
| 209 |
| 1622 |
| 0 |
| 0 |
| 12 |
| 153 |
| 144 |
| 572 |
| 112 |
| 48 |
| 2831 |
| 493 |
| 577 |
| 164 |
| 360 |
| 234 |
| 368 |
| 408 |
| 108 |
| 241 |
| 2903 |
| 2923 |
| 1921 |
| 343 |
| 12363 |
| 616 |
| 356 |
| 564 |
| 266 |
| 309 |
| 418 |
| 284 |
| 247 |
| 1858 |
| 1488 |
| 15 |
| 1710 |
| 185 |
| 128 |
| 267 |
| 379 |
| 526 |
| 121 |
| 490 |
| 540 |
| 132 |
| 251 |
| 119 |
| 505 |
| 200 |
| 3715 |
| 347 |
| 33 |
| 18 |
| 1140 |
| 371 |
| 232 |
| 300 |
| 304 |
| 189 |
| 373 |
| 297 |
| 302 |
| 257 |
| 401 |
| 2058 |
| 389 |
| 432 |
| 359 |
| 822 |
| 409 |
| 2992 |
| 1045 |
| 386 |
| 1469 |
| 6715 |
| 1046 |
| 389 |
| 1052 |
| 196 |
| 1050 |
| 390 |
| 454 |
+------------------+
100 rows in set (0.07 sec)
Do you have any advices ? Thank you so much.
Hello. @jon-wei
I am confused with perform a druid histogram query.
The request body of histogram query like this :
The first query result like this :
The second query result like this :
Then i increase the resolution to 500 and the result always like this :
Even though i have known results can vary for the same query from docs, but i am confused why the first bucket of first_paint_time_histogram_total was [negative number, 0] and its count also>0. The min value of the column was 0. And the [0.0, 200.0] bucket which is the first bucket of custom buckets looks like not include count of the first bucket of first_paint_time_histogram_total.
My ingestion spce file define the histogram column like this:
And the data of the column like this:
Do you have any advices ? Thank you so much.