Skip to content

Re-indexing a DruidInputSource with explicit dimensions includes all dimensions from original datasource #9592

@suneet-s

Description

@suneet-s

Affected Version

0.18

Description

If you attempt to re-index a druid datasource using a Druid InputSource, and you explicitly set the columns in the dimension spec, the re-index job will ingest all columns. To exclude columns, they need to be manually added to the dimensionsExclusion field

This ingestion spec used in integration tests re-produces the issue. The field "robot" is included in the re-indexed datasource even though it's not explicitly specified in the ingestionSpec

{
    "type": "index",
    "spec": {
        "ioConfig": {
            "type": "index",
            "inputSource": {
                "type": "druid",
                "dataSource": "%%DATASOURCE%%",
                "interval": "2013-08-31/2013-09-01"
            }
        },
        "tuningConfig": {
            "type": "index",
            "partitionsSpec": {
                "type": "dynamic"
            }
        },
        "dataSchema": {
            "dataSource": "%%REINDEX_DATASOURCE%%",
            "granularitySpec": {
                "type": "uniform",
                "queryGranularity": "SECOND",
                "segmentGranularity": "DAY"
            },
            "timestampSpec": {
                "column": "__time",
                "format": "iso"
            },
            "dimensionsSpec": {
                "dimensions": [
                    "page",
                    {"type": "string", "name": "language", "createBitmapIndex": false},
                    "user",
                    "unpatrolled",
                    "newPage",
                    "anonymous",
                    "namespace",
                    "country",
                    "region",
                    "city"
                ]
            },
            "transformSpec": {
                "transforms": [
                    {
                        "type": "expression",
                        "name": "newPage",
                        "expression": "page"
                    },
                    {
                        "type": "expression",
                        "name": "one-plus-triple-added",
                        "expression": "\"triple-added\" + 1"
                    },
                    {
                        "type": "expression",
                        "name": "double-deleted",
                        "expression": "deleted * 2"
                    }
                ]
            },
            "metricsSpec": [
                {
                    "type": "doubleSum",
                    "name": "added",
                    "fieldName": "added"
                },
                {
                    "type": "doubleSum",
                    "name": "triple-added",
                    "fieldName": "triple-added"
                },
                {
                    "type": "doubleSum",
                    "name": "one-plus-triple-added",
                    "fieldName": "one-plus-triple-added"
                },
                {
                    "type": "doubleSum",
                    "name": "deleted",
                    "fieldName": "deleted"
                },
                {
                    "type": "doubleSum",
                    "name": "double-deleted",
                    "fieldName": "double-deleted"
                },
                {
                    "type": "doubleSum",
                    "name": "delta",
                    "fieldName": "delta"
                }
            ]
        }
    }
}

Metadata

Metadata

Assignees

No one assigned

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions