Skip to content

Inconsistencies in the result of the quantile aggregator #6099

@jacktomcat

Description

@jacktomcat

I use the quantile aggregator, but many times the query results are inconsistent. The version of Druid is 0.12.1

Here's my script

{
  "queryType": "groupBy",
  "dataSource": {
    "type": "table",
    "name": "SVR_ACTION_DATA"
  },
  "intervals": {
    "type": "LegacySegmentSpec",
    "intervals": [
      "2018-04-30T00:00:00.000Z/2018-05-31T00:00:00.000Z"
    ]
  },
  "virtualColumns": [],
  "filter": {
    "type": "and",
    "fields": [
      {
        "type": "selector",
        "dimension": "biz_system_id",
        "value": "1",
        "extractionFn": null
      },
      {
        "type": "selector",
        "dimension": "data_type",
        "value": "APP",
        "extractionFn": null
      }
    ]
  },
  "granularity": "ALL",
  "dimensions": [],
  "context" : {
    "skipEmptyBuckets": "true"
  },
  "aggregations": [
    {
      "type": "longSum",
      "name": "respTime",
      "fieldName": "resp_time",
      "expression": null
    },
    {
      "type": "longSum",
      "name": "successCount",
      "fieldName": "success_count",
      "expression": null
    },
    {
      "type": "approxHistogramFold",
      "name": "resp_time_his",
      "fieldName": "resp_time_his",
      "resolution": 600,
      "numBuckets": 7,
      "lowerLimit": "-Infinity",
      "upperLimit": "Infinity"
    }
  ],
  "postAggregations": [
    {
      "type": "arithmetic",
      "name": "response",
      "fn": "/",
      "fields": [
        {
          "type": "fieldAccess",
          "name": "0-fieldaccess",
          "fieldName": "respTime"
        },
        {
          "type": "fieldAccess",
          "name": "1-fieldaccess",
          "fieldName": "successCount"
        }
      ],
      "ordering": null
    },
    {
      "type": "quantiles",
      "name": "respTimeHis",
      "fieldName": "resp_time_his",
      "probabilities": [
        0.5,0.6,0.7,0.8
      ]
    }
  ],
  "having": null,
  "limitSpec": {
    "type": "default",
    "columns": [],
    "limit": 2147483647
  },
  "descending": false
}

The following is the result of my running:

[
    {
        "version": "v1",
        "timestamp": "2018-04-30T00:00:00.000Z",
        "event": {
            "successCount": 1364670887,
            "resp_time_his": {
                "breaks": [
                    -124.83333587646484,
                    0,
                    124.83333587646484,
                    249.6666717529297,
                    374.5,
                    499.3333435058594,
                    624.1666870117188,
                    749
                ],
                "counts": [
                    0,
                    5772020,
                    5751136,
                    5755229,
                    5747462.5,
                    5748575,
                    5775551
                ]
            },
            "response": 9.481008641169906,
            "respTime": 12938456472,
            "respTimeHis": {
                "probabilities": [
                    0.5,
                    0.6,
                    0.7,
                    0.8
                ],
                "quantiles": [
                    373.30453,
                    448.30597,
                    523.41125,
                    598.38446
                ],
                "min": 0,
                "max": 749
            }
        }
    }
]

But quantiles array values have errors,examples:

quantiles: [373.30453, 448.30597, 523.41125, 598.38446 ]
quantiles: [373.64453, 449.40597, 509.42345, 573.23112 ]
.....

What is the cause of this ? @fjy @gianm @xvrl @nishantmonu51 @drcrallen

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions