From 7ed7cf9ae9b53f944ed6aabd111a8ae2e286680e Mon Sep 17 00:00:00 2001 From: jtuglu1 Date: Thu, 9 Oct 2025 12:49:20 -0700 Subject: [PATCH] Add sessionToken support for S3InputSource (#18609) Allows Druid to use an AWS_SESSION_TOKEN (and skip assuming a role to fetch a token) if provided in the spec. No explicit assumptions are made with the input (e.g. that adding sessionToken and assumeRoleAre are mutually exclusive). This support falls in line with how other engines use things like external catalogs, etc. to vend temporary credentials to access S3, while leaving things like the exact ARN opaque to the caller. --- docs/ingestion/input-sources.md | 5 +- .../model/table/S3InputSourceDefn.java | 9 +- .../druid/data/input/s3/S3InputSource.java | 23 ++- .../data/input/s3/S3InputSourceConfig.java | 25 ++- .../model/table/S3InputSourceDefnTest.java | 3 +- .../input/s3/S3InputSourceConfigTest.java | 6 +- .../data/input/s3/S3InputSourceTest.java | 151 +++++++++++++++++- .../msq/util/MSQTaskQueryMakerUtils.java | 2 +- .../msq/util/MSQTaskQueryMakerUtilsTest.java | 4 +- website/.spelling | 1 + 10 files changed, 206 insertions(+), 23 deletions(-) diff --git a/docs/ingestion/input-sources.md b/docs/ingestion/input-sources.md index 3066ba7386d9..49cf90cdbf58 100644 --- a/docs/ingestion/input-sources.md +++ b/docs/ingestion/input-sources.md @@ -204,8 +204,9 @@ Properties Object: |Property|Description|Default|Required| |--------|-----------|-------|---------| -|accessKeyId|The [Password Provider](../operations/password-provider.md) or plain text string of this S3 input source access key|None|yes if secretAccessKey is given| -|secretAccessKey|The [Password Provider](../operations/password-provider.md) or plain text string of this S3 input source secret key|None|yes if accessKeyId is given| +|accessKeyId|The [Password Provider](../operations/password-provider.md) or plain text string of this S3 input source access key|None|Yes, if `secretAccessKey` or `sessionToken` is given.| +|secretAccessKey|The [Password Provider](../operations/password-provider.md) or plain text string of this S3 input source secret key|None|Yes, if `accessKeyId` or `sessionToken` is given.| +|sessionToken|The [Password Provider](../operations/password-provider.md) or plain text string of this S3 input source session token|None|no| |assumeRoleArn|AWS ARN of the role to assume [see](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html). **assumeRoleArn** can be used either with the ingestion spec AWS credentials or with the default S3 credentials|None|no| |assumeRoleExternalId|A unique identifier that might be required when you assume a role in another account [see](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_temp_request.html)|None|no| diff --git a/extensions-core/s3-extensions/src/main/java/org/apache/druid/catalog/model/table/S3InputSourceDefn.java b/extensions-core/s3-extensions/src/main/java/org/apache/druid/catalog/model/table/S3InputSourceDefn.java index 358862d03327..ca84236d14a7 100644 --- a/extensions-core/s3-extensions/src/main/java/org/apache/druid/catalog/model/table/S3InputSourceDefn.java +++ b/extensions-core/s3-extensions/src/main/java/org/apache/druid/catalog/model/table/S3InputSourceDefn.java @@ -79,6 +79,7 @@ public class S3InputSourceDefn extends FormattedInputSourceDefn public static final String ACCESS_KEY_ID_PARAMETER = "accessKeyId"; public static final String SECRET_ACCESS_KEY_PARAMETER = "secretAccessKey"; public static final String ASSUME_ROLE_ARN_PARAMETER = "assumeRoleArn"; + public static final String SESSION_TOKEN_PARAMETER = "sessionToken"; /** * The {@code objectGlob} property exists in S3, but is not documented. The corresponding @@ -102,7 +103,8 @@ public class S3InputSourceDefn extends FormattedInputSourceDefn private static final List SECURITY_PARAMS = Arrays.asList( new Parameter(ACCESS_KEY_ID_PARAMETER, ParameterType.VARCHAR, true), new Parameter(SECRET_ACCESS_KEY_PARAMETER, ParameterType.VARCHAR, true), - new Parameter(ASSUME_ROLE_ARN_PARAMETER, ParameterType.VARCHAR, true) + new Parameter(ASSUME_ROLE_ARN_PARAMETER, ParameterType.VARCHAR, true), + new Parameter(SESSION_TOKEN_PARAMETER, ParameterType.VARCHAR, true) ); // Field names in the S3InputSource @@ -114,6 +116,7 @@ public class S3InputSourceDefn extends FormattedInputSourceDefn private static final String ACCESS_KEY_ID_FIELD = "accessKeyId"; private static final String SECRET_ACCESS_KEY_FIELD = "secretAccessKey"; private static final String ASSUME_ROLE_ARN_FIELD = "assumeRoleArn"; + private static final String SESSION_TOKEN_FIELD = "sessionToken"; @Override public String typeValue() @@ -250,6 +253,7 @@ private void applySecurityParams(Map jsonMap, Map properties = new HashMap<>(); if (accessKeyId != null) { @@ -258,6 +262,9 @@ private void applySecurityParams(Map jsonMap, Map iterator = reader.read(); + + while (iterator.hasNext()) { + InputRow nextRow = iterator.next(); + Assert.assertEquals(NOW, nextRow.getTimestamp()); + Assert.assertEquals("hello", nextRow.getDimension("dim1").get(0)); + Assert.assertEquals("world", nextRow.getDimension("dim2").get(0)); + } + + EasyMock.verify(S3_CLIENT); + EasyMock.verify(SERVER_SIDE_ENCRYPTING_AMAZON_S3_BUILDER); + } + @Test public void testGetTypes() { diff --git a/multi-stage-query/src/main/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtils.java b/multi-stage-query/src/main/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtils.java index 2e9e67a7534e..955bc3483c16 100644 --- a/multi-stage-query/src/main/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtils.java +++ b/multi-stage-query/src/main/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtils.java @@ -40,7 +40,7 @@ public class MSQTaskQueryMakerUtils { - public static final Set SENSISTIVE_JSON_KEYS = ImmutableSet.of("accessKeyId", "secretAccessKey"); + public static final Set SENSISTIVE_JSON_KEYS = ImmutableSet.of("accessKeyId", "secretAccessKey", "sessionToken"); public static final Set SENSITIVE_KEYS_REGEX_PATTERNS = SENSISTIVE_JSON_KEYS.stream() .map(sensitiveKey -> Pattern.compile( diff --git a/multi-stage-query/src/test/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtilsTest.java b/multi-stage-query/src/test/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtilsTest.java index bb0bdcd34d11..8febf4bfdc23 100644 --- a/multi-stage-query/src/test/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtilsTest.java +++ b/multi-stage-query/src/test/java/org/apache/druid/msq/util/MSQTaskQueryMakerUtilsTest.java @@ -57,7 +57,7 @@ public void maskSensitiveJsonKeys() + "OVERWRITE ALL\\n" + "WITH ext AS " + "(SELECT *\\nFROM TABLE(\\n " - + "EXTERN(\\n '{\\\"type\\\":\\\"s3\\\",\\\"prefixes\\\":[\\\"s3://prefix\\\"],\\\"properties\\\":{\\\"accessKeyId\\\":{\\\"type\\\":\\\"default\\\",\\\"password\\\":\\\"secret_pass\\\"},\\\"secretAccessKey\\\":{\\\"type\\\":\\\"default\\\",\\\"password\\\":\\\"secret_pass\\\"}}}',\\n" + + "EXTERN(\\n '{\\\"type\\\":\\\"s3\\\",\\\"prefixes\\\":[\\\"s3://prefix\\\"],\\\"properties\\\":{\\\"accessKeyId\\\":{\\\"type\\\":\\\"default\\\",\\\"password\\\":\\\"secret_pass\\\"},\\\"secretAccessKey\\\":{\\\"type\\\":\\\"default\\\",\\\"password\\\":\\\"secret_pass\\\"},\\\"sessionToken\\\":{\\\"type\\\":\\\"default\\\",\\\"password\\\":\\\"secret_pass\\\"}}}',\\n" + "'{\\\"type\\\":\\\"json\\\"}',\\n" + "'[{\\\"name\\\":\\\"time\\\",\\\"type\\\":\\\"string\\\"},{\\\"name\\\":\\\"name\\\",\\\"type\\\":\\\"string\\\"}]'\\n )\\n))\\n" + "SELECT\\n TIME_PARSE(\\\"time\\\") AS __time,\\n name,\\n country " @@ -113,7 +113,7 @@ public void maskSensitiveJsonKeys() + "OVERWRITE ALL\\n" + "WITH ext AS " + "(SELECT *\\nFROM TABLE(\\n " - + "EXTERN(\\n '{\\\"type\\\":\\\"s3\\\",\\\"prefixes\\\":[\\\"s3://prefix\\\"],\\\"properties\\\":{\\\"accessKeyId\\\":,\\\"secretAccessKey\\\":}}',\\n" + + "EXTERN(\\n '{\\\"type\\\":\\\"s3\\\",\\\"prefixes\\\":[\\\"s3://prefix\\\"],\\\"properties\\\":{\\\"accessKeyId\\\":,\\\"secretAccessKey\\\":,\\\"sessionToken\\\":}}',\\n" + "'{\\\"type\\\":\\\"json\\\"}',\\n" + "'[{\\\"name\\\":\\\"time\\\",\\\"type\\\":\\\"string\\\"},{\\\"name\\\":\\\"name\\\",\\\"type\\\":\\\"string\\\"}]'\\n )\\n))\\n" + "SELECT\\n TIME_PARSE(\\\"time\\\") AS __time,\\n name,\\n country " diff --git a/website/.spelling b/website/.spelling index 767ef4249aba..82711ff84421 100644 --- a/website/.spelling +++ b/website/.spelling @@ -1481,6 +1481,7 @@ httpAuthenticationPassword accessKeyId secretAccessKey accessKeyId +sessionToken httpAuthenticationPassword countryName appendToExisting