From 5a04206332c94eb308e0147239db56a0d7b0cb42 Mon Sep 17 00:00:00 2001 From: Liam Huffman Date: Mon, 23 Jun 2025 13:17:27 -0700 Subject: [PATCH 1/6] feat(bigquery): Add support for custom timezones and timestamps --- .../bigquery/ExternalTableDefinition.java | 139 ++++++++++++++ .../cloud/bigquery/LoadJobConfiguration.java | 173 +++++++++++++++++- .../bigquery/ExternalTableDefinitionTest.java | 28 +++ .../bigquery/LoadJobConfigurationTest.java | 21 +++ 4 files changed, 360 insertions(+), 1 deletion(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java index ea4cceead..15c128136 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java @@ -215,6 +215,45 @@ public Builder setMaxStaleness(String maxStaleness) { abstract Builder setMaxStalenessInner(String maxStaleness); + /** + * Time zone used when parsing timestamp values that do not have specific time zone information + * (e.g. 2024-04-20 12:34:56). The expected format is a IANA timezone string (e.g. + * America/Los_Angeles). + */ + public abstract Builder setTimeZone(String timeZone); + + /** Format used to parse DATE values. Supports C-style and SQL-style values. */ + public abstract Builder setDateFormat(String dateFormat); + + /** Format used to parse DATETIME values. Supports C-style and SQL-style values. */ + public abstract Builder setDatetimeFormat(String datetimeFormat); + + /** Format used to parse TIME values. Supports C-style and SQL-style values. */ + public abstract Builder setTimeFormat(String timeFormat); + + /** Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. */ + public abstract Builder setTimestampFormat(String timestampFormat); + + /** + * Controls the strategy used to match loaded columns to the schema. If not set, a sensible + * default is chosen based on how the schema is provided. If autodetect is used, then columns + * are matched by name. Otherwise, columns are matched by position. This is done to keep the + * behavior backward-compatible. Acceptable values are: POSITION - matches by position. This + * assumes that the columns are ordered the same way as the schema. NAME - matches by name. This + * reads the header row as column names and reorders columns to match the field names in the + * schema. + */ + public abstract Builder setSourceColumnMatch(String sourceColumnMatch); + + /** + * A list of strings represented as SQL NULL value in a CSV file. null_marker and null_markers + * can't be set at the same time. If null_marker is set, null_markers has to be not set. If + * null_markers is set, null_marker has to be not set. If both null_marker and null_markers are + * set at the same time, a user error would be thrown. Any strings listed in null_markers, + * including empty string would be interpreted as SQL NULL. This applies to all column types. + */ + public abstract Builder setNullMarkers(List nullMarkers); + /** Creates an {@code ExternalTableDefinition} object. */ @Override public abstract ExternalTableDefinition build(); @@ -373,6 +412,37 @@ public HivePartitioningOptions getHivePartitioningOptions() { return getHivePartitioningOptionsInner(); } + /** + * Returns the time zone used when parsing timestamp values that don't have specific time zone + * information. + */ + @Nullable + public abstract String getTimeZone(); + + /** Returns the format used to parse DATE values. */ + @Nullable + public abstract String getDateFormat(); + + /** Returns the format used to parse DATETIME values. */ + @Nullable + public abstract String getDatetimeFormat(); + + /** Returns the format used to parse TIME values. */ + @Nullable + public abstract String getTimeFormat(); + + /** Returns the format used to parse TIMESTAMP values. */ + @Nullable + public abstract String getTimestampFormat(); + + /** Returns the strategy used to match loaded columns to the schema, either POSITION or NAME. */ + @Nullable + public abstract String getSourceColumnMatch(); + + /** Returns a list of strings represented as SQL NULL value in a CSV file. */ + @Nullable + public abstract List getNullMarkers(); + @Nullable abstract HivePartitioningOptions getHivePartitioningOptionsInner(); @@ -454,6 +524,27 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC if (getMetadataCacheMode() != null) { externalConfigurationPb.setMetadataCacheMode(getMetadataCacheMode()); } + if (getTimeZone() != null) { + externalConfigurationPb.setTimeZone(getTimeZone()); + } + if (getDateFormat() != null) { + externalConfigurationPb.setDateFormat(getDateFormat()); + } + if (getDatetimeFormat() != null) { + externalConfigurationPb.setDatetimeFormat(getDatetimeFormat()); + } + if (getTimeFormat() != null) { + externalConfigurationPb.setTimeFormat(getTimeFormat()); + } + if (getTimestampFormat() != null) { + externalConfigurationPb.setTimestampFormat(getTimestampFormat()); + } + if (getSourceColumnMatch() != null) { + externalConfigurationPb.getCsvOptions().setSourceColumnMatch(getSourceColumnMatch()); + } + if (getNullMarkers() != null) { + externalConfigurationPb.getCsvOptions().setNullMarkers(getNullMarkers()); + } return externalConfigurationPb; } @@ -654,6 +745,30 @@ static ExternalTableDefinition fromPb(Table tablePb) { if (tablePb.getMaxStaleness() != null) { builder.setMaxStaleness(tablePb.getMaxStaleness()); } + if (externalDataConfiguration.getTimeZone() != null) { + builder.setTimeZone(externalDataConfiguration.getTimeZone()); + } + if (externalDataConfiguration.getDateFormat() != null) { + builder.setDateFormat(externalDataConfiguration.getDateFormat()); + } + if (externalDataConfiguration.getDatetimeFormat() != null) { + builder.setDatetimeFormat(externalDataConfiguration.getDatetimeFormat()); + } + if (externalDataConfiguration.getTimeFormat() != null) { + builder.setTimeFormat(externalDataConfiguration.getTimeFormat()); + } + if (externalDataConfiguration.getTimestampFormat() != null) { + builder.setTimestampFormat(externalDataConfiguration.getTimestampFormat()); + } + if (externalDataConfiguration.getCsvOptions() != null) { + if (externalDataConfiguration.getCsvOptions().getSourceColumnMatch() != null) { + builder.setSourceColumnMatch( + externalDataConfiguration.getCsvOptions().getSourceColumnMatch()); + } + if (externalDataConfiguration.getCsvOptions().getNullMarkers() != null) { + builder.setNullMarkers(externalDataConfiguration.getCsvOptions().getNullMarkers()); + } + } } return builder.build(); } @@ -724,6 +839,30 @@ static ExternalTableDefinition fromExternalDataConfiguration( if (externalDataConfiguration.getMetadataCacheMode() != null) { builder.setMetadataCacheMode(externalDataConfiguration.getMetadataCacheMode()); } + if (externalDataConfiguration.getTimeZone() != null) { + builder.setTimeZone(externalDataConfiguration.getTimeZone()); + } + if (externalDataConfiguration.getDateFormat() != null) { + builder.setDateFormat(externalDataConfiguration.getDateFormat()); + } + if (externalDataConfiguration.getDatetimeFormat() != null) { + builder.setDatetimeFormat(externalDataConfiguration.getDatetimeFormat()); + } + if (externalDataConfiguration.getTimeFormat() != null) { + builder.setTimeFormat(externalDataConfiguration.getTimeFormat()); + } + if (externalDataConfiguration.getTimestampFormat() != null) { + builder.setTimestampFormat(externalDataConfiguration.getTimeFormat()); + } + if (externalDataConfiguration.getCsvOptions() != null) { + if (externalDataConfiguration.getCsvOptions().getSourceColumnMatch() != null) { + builder.setSourceColumnMatch( + externalDataConfiguration.getCsvOptions().getSourceColumnMatch()); + } + if (externalDataConfiguration.getCsvOptions().getNullMarkers() != null) { + builder.setNullMarkers(externalDataConfiguration.getCsvOptions().getNullMarkers()); + } + } return builder.build(); } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index 5d1755459..88252d88c 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -63,6 +63,13 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load private final List connectionProperties; private final Boolean createSession; private final String reservation; + private final String timeZone; + private final String dateFormat; + private final String datetimeFormat; + private final String timeFormat; + private final String timestampFormat; + private final String sourceColumnMatch; + private final List nullMarkers; public static final class Builder extends JobConfiguration.Builder implements LoadConfiguration.Builder { @@ -95,6 +102,13 @@ public static final class Builder extends JobConfiguration.Builder connectionProperties; private Boolean createSession; private String reservation; + private String timeZone; + private String dateFormat; + private String datetimeFormat; + private String timeFormat; + private String timestampFormat; + private String sourceColumnMatch; + private List nullMarkers; private Builder() { super(Type.LOAD); @@ -129,6 +143,13 @@ private Builder(LoadJobConfiguration loadConfiguration) { this.connectionProperties = loadConfiguration.connectionProperties; this.createSession = loadConfiguration.createSession; this.reservation = loadConfiguration.reservation; + this.timeZone = loadConfiguration.timeZone; + this.dateFormat = loadConfiguration.dateFormat; + this.datetimeFormat = loadConfiguration.datetimeFormat; + this.timeFormat = loadConfiguration.timeFormat; + this.timestampFormat = loadConfiguration.timestampFormat; + this.sourceColumnMatch = loadConfiguration.sourceColumnMatch; + this.nullMarkers = loadConfiguration.nullMarkers; } private Builder(com.google.api.services.bigquery.model.JobConfiguration configurationPb) { @@ -238,6 +259,27 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur if (configurationPb.getReservation() != null) { this.reservation = configurationPb.getReservation(); } + if (loadConfigurationPb.getTimeZone() != null) { + this.timeZone = loadConfigurationPb.getTimeZone(); + } + if (loadConfigurationPb.getDateFormat() != null) { + this.dateFormat = loadConfigurationPb.getDateFormat(); + } + if (loadConfigurationPb.getDatetimeFormat() != null) { + this.datetimeFormat = loadConfigurationPb.getDatetimeFormat(); + } + if (loadConfigurationPb.getTimeFormat() != null) { + this.timeFormat = loadConfigurationPb.getTimeFormat(); + } + if (loadConfigurationPb.getTimestampFormat() != null) { + this.timestampFormat = loadConfigurationPb.getTimestampFormat(); + } + if (loadConfigurationPb.getSourceColumnMatch() != null) { + this.sourceColumnMatch = loadConfigurationPb.getSourceColumnMatch(); + } + if (loadConfigurationPb.getNullMarkers() != null) { + this.nullMarkers = loadConfigurationPb.getNullMarkers(); + } } @Override @@ -449,6 +491,62 @@ public Builder setReservation(String reservation) { return this; } + /** + * [Experimental] Default time zone that will apply when parsing timestamp values that have no + * specific time zone. + */ + public Builder setTimeZone(String timeZone) { + this.timeZone = timeZone; + return this; + } + + /** Date format used for parsing DATE values. */ + public Builder setDateFormat(String dateFormat) { + this.dateFormat = dateFormat; + return this; + } + + /** Date format used for parsing DATETIME values. */ + public Builder setDatetimeFormat(String datetimeFormat) { + this.datetimeFormat = datetimeFormat; + return this; + } + + /** Date format used for parsing TIME values. */ + public Builder setTimeFormat(String timeFormat) { + this.timeFormat = timeFormat; + return this; + } + + /** Date format used for parsing TIMESTAMP values. */ + public Builder setTimestampFormat(String timestampFormat) { + this.timestampFormat = timestampFormat; + return this; + } + + /** + * Controls the strategy used to match loaded columns to the schema. If not set, a sensible + * default is chosen based on how the schema is provided. If autodetect is used, then columns + * are matched by name. Otherwise, columns are matched by position. This is done to keep the + * behavior backward-compatible. + */ + public Builder setSourceColumnMatch(String sourceColumnMatch) { + this.sourceColumnMatch = sourceColumnMatch; + return this; + } + + /** + * A list of strings represented as SQL NULL value in a CSV file. null_marker and null_markers + * can't be set at the same time. If null_marker is set, null_markers has to be not set. If + * null_markers is set, null_marker has to be not set. If both null_marker and null_markers are + * set at the same time, a user error would be thrown. Any strings listed in null_markers, + * including empty string would be interpreted as SQL NULL. This applies to all column types. + */ + public Builder setNullMarkers(List nullMarkers) { + this.nullMarkers = nullMarkers; + return this; + } + @Override public LoadJobConfiguration build() { return new LoadJobConfiguration(this); @@ -483,6 +581,13 @@ private LoadJobConfiguration(Builder builder) { this.connectionProperties = builder.connectionProperties; this.createSession = builder.createSession; this.reservation = builder.reservation; + this.timeZone = builder.timeZone; + this.dateFormat = builder.dateFormat; + this.datetimeFormat = builder.datetimeFormat; + this.timeFormat = builder.timeFormat; + this.timestampFormat = builder.timestampFormat; + this.sourceColumnMatch = builder.sourceColumnMatch; + this.nullMarkers = builder.nullMarkers; } @Override @@ -634,6 +739,44 @@ public String getReservation() { return reservation; } + /** + * Returns the time zone used when parsing timestamp values that don't have specific time zone + * information. + */ + public String getTimeZone() { + return timeZone; + } + + /** Returns the format used to parse DATE values. */ + public String getDateFormat() { + return dateFormat; + } + + /** Returns the format used to parse DATETIME values. */ + public String getDatetimeFormat() { + return datetimeFormat; + } + + /** Returns the format used to parse TIME values. */ + public String getTimeFormat() { + return timeFormat; + } + + /** Returns the format used to parse TIMESTAMP values. */ + public String getTimestampFormat() { + return timestampFormat; + } + + /** Returns the strategy used to match loaded columns to the schema, either POSITION or NAME. */ + public String getSourceColumnMatch() { + return sourceColumnMatch; + } + + /** Returns a list of strings represented as SQL NULL value in a CSV file. */ + public List getNullMarkers() { + return nullMarkers; + } + @Override public Builder toBuilder() { return new Builder(this); @@ -667,7 +810,14 @@ ToStringHelper toStringHelper() { .add("referenceFileSchemaUri", referenceFileSchemaUri) .add("connectionProperties", connectionProperties) .add("createSession", createSession) - .add("reservation", reservation); + .add("reservation", reservation) + .add("timeZone", timeZone) + .add("dateFormat", dateFormat) + .add("datetimeFormat", datetimeFormat) + .add("timeFormat", timeFormat) + .add("timestampFormat", timestampFormat) + .add("sourceColumnMatch", sourceColumnMatch) + .add("nullMarkers", nullMarkers); } @Override @@ -789,6 +939,27 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() { if (reservation != null) { jobConfiguration.setReservation(reservation); } + if (timeZone != null) { + loadConfigurationPb.setTimeZone(timeZone); + } + if (dateFormat != null) { + loadConfigurationPb.setDateFormat(dateFormat); + } + if (datetimeFormat != null) { + loadConfigurationPb.setDatetimeFormat(datetimeFormat); + } + if (timeFormat != null) { + loadConfigurationPb.setTimeFormat(timeFormat); + } + if (timestampFormat != null) { + loadConfigurationPb.setTimestampFormat(timestampFormat); + } + if (sourceColumnMatch != null) { + loadConfigurationPb.setSourceColumnMatch(sourceColumnMatch); + } + if (nullMarkers != null) { + loadConfigurationPb.setNullMarkers(nullMarkers); + } jobConfiguration.setLoad(loadConfigurationPb); return jobConfiguration; diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java index 93a4425df..68c7e8459 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java @@ -61,6 +61,13 @@ public class ExternalTableDefinitionTest { private static final String OBJECT_METADATA = "SIMPLE"; private static final String METADATA_CACHE_MODE = "AUTOMATIC"; private static final String MAX_STALENESS = "INTERVAL 15 MINUTE"; + private static final String TIME_ZONE = "America/Los_Angeles"; + private static final String DATE_FORMAT = "YYYY-MM-DD"; + private static final String DATETIME_FORMAT = "YYYY-MM-DD HH:MI:SS"; + private static final String TIME_FORMAT = "HH:MI:SS"; + private static final String TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MI:SS"; + private static final String SOURCE_COLUMN_MATCH = "POSITION"; + private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "SQL NULL"); private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION = ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS) .setFileSetSpecType("FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH") @@ -74,6 +81,13 @@ public class ExternalTableDefinitionTest { .setObjectMetadata(OBJECT_METADATA) .setMetadataCacheMode(METADATA_CACHE_MODE) .setMaxStaleness(MAX_STALENESS) + .setTimeZone(TIME_ZONE) + .setDateFormat(DATE_FORMAT) + .setDatetimeFormat(DATETIME_FORMAT) + .setTimeFormat(TIME_FORMAT) + .setTimestampFormat(TIMESTAMP_FORMAT) + .setSourceColumnMatch(SOURCE_COLUMN_MATCH) + .setNullMarkers(NULL_MARKERS) .build(); private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION_AVRO = @@ -131,6 +145,13 @@ public void testBuilder() { assertEquals(DECIMAL_TARGET_TYPES, EXTERNAL_TABLE_DEFINITION.getDecimalTargetTypes()); assertEquals(AUTODETECT, EXTERNAL_TABLE_DEFINITION.getAutodetect()); assertEquals(HIVE_PARTITIONING_OPTIONS, EXTERNAL_TABLE_DEFINITION.getHivePartitioningOptions()); + assertEquals(TIME_ZONE, EXTERNAL_TABLE_DEFINITION.getTimeZone()); + assertEquals(DATE_FORMAT, EXTERNAL_TABLE_DEFINITION.getDateFormat()); + assertEquals(DATETIME_FORMAT, EXTERNAL_TABLE_DEFINITION.getDatetimeFormat()); + assertEquals(TIME_FORMAT, EXTERNAL_TABLE_DEFINITION.getTimeFormat()); + assertEquals(TIMESTAMP_FORMAT, EXTERNAL_TABLE_DEFINITION.getTimestampFormat()); + assertEquals(SOURCE_COLUMN_MATCH, EXTERNAL_TABLE_DEFINITION.getSourceColumnMatch()); + assertEquals(NULL_MARKERS, EXTERNAL_TABLE_DEFINITION.getNullMarkers()); assertNotEquals(EXTERNAL_TABLE_DEFINITION, TableDefinition.Type.EXTERNAL); } @@ -174,5 +195,12 @@ private void compareExternalTableDefinition( assertEquals(expected.getObjectMetadata(), value.getObjectMetadata()); assertEquals(expected.getMetadataCacheMode(), value.getMetadataCacheMode()); assertEquals(expected.getMaxStaleness(), value.getMaxStaleness()); + assertEquals(expected.getTimeZone(), value.getTimeZone()); + assertEquals(expected.getDateFormat(), value.getDateFormat()); + assertEquals(expected.getDatetimeFormat(), value.getDatetimeFormat()); + assertEquals(expected.getTimeFormat(), value.getTimeFormat()); + assertEquals(expected.getTimestampFormat(), value.getTimestampFormat()); + assertEquals(expected.getSourceColumnMatch(), value.getSourceColumnMatch()); + assertEquals(expected.getNullMarkers(), value.getNullMarkers()); } } diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java index 6d181a328..633acaa15 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java @@ -77,6 +77,13 @@ public class LoadJobConfigurationTest { private static final String KEY = "session_id"; private static final String VALUE = "session_id_1234567890"; + private static final String TIME_ZONE = "America/Los_Angeles"; + private static final String DATE_FORMAT = "YYYY-MM-DD"; + private static final String DATETIME_FORMAT = "YYYY-MM-DD HH:MI:SS"; + private static final String TIME_FORMAT = "HH:MI:SS"; + private static final String TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MI:SS"; + private static final String SOURCE_COLUMN_MATCH = "POSITION"; + private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "SQL NULL"); private static final ConnectionProperty CONNECTION_PROPERTY = ConnectionProperty.newBuilder().setKey(KEY).setValue(VALUE).build(); private static final List CONNECTION_PROPERTIES = @@ -110,6 +117,13 @@ public class LoadJobConfigurationTest { .setConnectionProperties(CONNECTION_PROPERTIES) .setCreateSession(CREATE_SESSION) .setReservation(RESERVATION) + .setTimeZone(TIME_ZONE) + .setDateFormat(DATE_FORMAT) + .setDatetimeFormat(DATETIME_FORMAT) + .setTimeFormat(TIME_FORMAT) + .setTimestampFormat(TIMESTAMP_FORMAT) + .setSourceColumnMatch(SOURCE_COLUMN_MATCH) + .setNullMarkers(NULL_MARKERS) .build(); private static final DatastoreBackupOptions BACKUP_OPTIONS = @@ -271,5 +285,12 @@ private void compareLoadJobConfiguration( assertEquals(expected.getConnectionProperties(), value.getConnectionProperties()); assertEquals(expected.getCreateSession(), value.getCreateSession()); assertEquals(expected.getReservation(), value.getReservation()); + assertEquals(expected.getTimeZone(), value.getTimeZone()); + assertEquals(expected.getDateFormat(), value.getDateFormat()); + assertEquals(expected.getDatetimeFormat(), value.getDatetimeFormat()); + assertEquals(expected.getTimeFormat(), value.getTimeFormat()); + assertEquals(expected.getTimestampFormat(), value.getTimestampFormat()); + assertEquals(expected.getSourceColumnMatch(), value.getSourceColumnMatch()); + assertEquals(expected.getNullMarkers(), value.getNullMarkers()); } } From 2593cfe20bb8223f792fa59953102c634dfaa1f4 Mon Sep 17 00:00:00 2001 From: Liam Huffman Date: Mon, 23 Jun 2025 13:33:24 -0700 Subject: [PATCH 2/6] update ignored-diff --- .../clirr-ignored-differences.xml | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/google-cloud-bigquery/clirr-ignored-differences.xml b/google-cloud-bigquery/clirr-ignored-differences.xml index 37e349413..208c41fd7 100644 --- a/google-cloud-bigquery/clirr-ignored-differences.xml +++ b/google-cloud-bigquery/clirr-ignored-differences.xml @@ -2,6 +2,76 @@ + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *TimeZone(*) + + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *DataFormat(*) + + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *DatetimeFormat(*) + + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *TimeFormat(*) + + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *TimestampFormat(*) + + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *SourceColumnMatch(*) + + + 7013 + com/google/cloud/bigquery/ExternalTableDefinition* + *NullMarkers(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *TimeZone(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *DataFormat(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *DatetimeFormat(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *TimeFormat(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *TimestampFormat(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *SourceColumnMatch(*) + + + 7013 + com/google/cloud/bigquery/LoadJobConfiguration* + *NullMarkers(*) + 7004 com/google/cloud/bigquery/spi/v2/BigQueryRpc From 099b71ece2315d0026f7cba2c499a27e38d73597 Mon Sep 17 00:00:00 2001 From: Liam Huffman Date: Mon, 23 Jun 2025 13:35:51 -0700 Subject: [PATCH 3/6] fix data -> date typo --- google-cloud-bigquery/clirr-ignored-differences.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google-cloud-bigquery/clirr-ignored-differences.xml b/google-cloud-bigquery/clirr-ignored-differences.xml index 208c41fd7..4e7a75dcc 100644 --- a/google-cloud-bigquery/clirr-ignored-differences.xml +++ b/google-cloud-bigquery/clirr-ignored-differences.xml @@ -10,7 +10,7 @@ 7013 com/google/cloud/bigquery/ExternalTableDefinition* - *DataFormat(*) + *DateFormat(*) 7013 @@ -45,7 +45,7 @@ 7013 com/google/cloud/bigquery/LoadJobConfiguration* - *DataFormat(*) + *DateFormat(*) 7013 From 681af66551a4f5a13522ec2c7d6492d35bd966b5 Mon Sep 17 00:00:00 2001 From: Liam Huffman Date: Tue, 24 Jun 2025 11:32:34 -0700 Subject: [PATCH 4/6] Add enums for SourceColumnMatch --- .../bigquery/ExternalTableDefinition.java | 30 +++++++++++++++---- .../cloud/bigquery/LoadJobConfiguration.java | 30 +++++++++++++++---- .../bigquery/ExternalTableDefinitionTest.java | 3 +- .../bigquery/LoadJobConfigurationTest.java | 3 +- 4 files changed, 53 insertions(+), 13 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java index 15c128136..1f179aeaa 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java @@ -57,6 +57,22 @@ public ExternalDataConfiguration apply(ExternalTableDefinition tableInfo) { private static final long serialVersionUID = -5951580238459622025L; + public enum SourceColumnMatch { + POSITION("POSITION"), + NAME("NAME"); + + private final String option; + + SourceColumnMatch(String option) { + this.option = option; + } + + @Override + public String toString() { + return option; + } + } + @AutoValue.Builder public abstract static class Builder extends TableDefinition.Builder { @@ -243,7 +259,7 @@ public Builder setMaxStaleness(String maxStaleness) { * reads the header row as column names and reorders columns to match the field names in the * schema. */ - public abstract Builder setSourceColumnMatch(String sourceColumnMatch); + public abstract Builder setSourceColumnMatch(SourceColumnMatch sourceColumnMatch); /** * A list of strings represented as SQL NULL value in a CSV file. null_marker and null_markers @@ -437,7 +453,7 @@ public HivePartitioningOptions getHivePartitioningOptions() { /** Returns the strategy used to match loaded columns to the schema, either POSITION or NAME. */ @Nullable - public abstract String getSourceColumnMatch(); + public abstract SourceColumnMatch getSourceColumnMatch(); /** Returns a list of strings represented as SQL NULL value in a CSV file. */ @Nullable @@ -540,7 +556,9 @@ com.google.api.services.bigquery.model.ExternalDataConfiguration toExternalDataC externalConfigurationPb.setTimestampFormat(getTimestampFormat()); } if (getSourceColumnMatch() != null) { - externalConfigurationPb.getCsvOptions().setSourceColumnMatch(getSourceColumnMatch()); + externalConfigurationPb + .getCsvOptions() + .setSourceColumnMatch(getSourceColumnMatch().toString()); } if (getNullMarkers() != null) { externalConfigurationPb.getCsvOptions().setNullMarkers(getNullMarkers()); @@ -763,7 +781,8 @@ static ExternalTableDefinition fromPb(Table tablePb) { if (externalDataConfiguration.getCsvOptions() != null) { if (externalDataConfiguration.getCsvOptions().getSourceColumnMatch() != null) { builder.setSourceColumnMatch( - externalDataConfiguration.getCsvOptions().getSourceColumnMatch()); + SourceColumnMatch.valueOf( + externalDataConfiguration.getCsvOptions().getSourceColumnMatch())); } if (externalDataConfiguration.getCsvOptions().getNullMarkers() != null) { builder.setNullMarkers(externalDataConfiguration.getCsvOptions().getNullMarkers()); @@ -857,7 +876,8 @@ static ExternalTableDefinition fromExternalDataConfiguration( if (externalDataConfiguration.getCsvOptions() != null) { if (externalDataConfiguration.getCsvOptions().getSourceColumnMatch() != null) { builder.setSourceColumnMatch( - externalDataConfiguration.getCsvOptions().getSourceColumnMatch()); + SourceColumnMatch.valueOf( + externalDataConfiguration.getCsvOptions().getSourceColumnMatch())); } if (externalDataConfiguration.getCsvOptions().getNullMarkers() != null) { builder.setNullMarkers(externalDataConfiguration.getCsvOptions().getNullMarkers()); diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index 88252d88c..f3b67d89b 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -68,9 +68,26 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load private final String datetimeFormat; private final String timeFormat; private final String timestampFormat; - private final String sourceColumnMatch; + private final SourceColumnMatch sourceColumnMatch; private final List nullMarkers; + public enum SourceColumnMatch { + SOURCE_COLUMN_MATCH_UNSPECIFIED("SOURCE_COLUMN_MATCH_UNSPECIFIED"), + POSITION("POSITION"), + NAME("NAME"); + + private final String option; + + SourceColumnMatch(String option) { + this.option = option; + } + + @Override + public String toString() { + return option; + } + } + public static final class Builder extends JobConfiguration.Builder implements LoadConfiguration.Builder { @@ -107,7 +124,7 @@ public static final class Builder extends JobConfiguration.Builder nullMarkers; private Builder() { @@ -275,7 +292,8 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur this.timestampFormat = loadConfigurationPb.getTimestampFormat(); } if (loadConfigurationPb.getSourceColumnMatch() != null) { - this.sourceColumnMatch = loadConfigurationPb.getSourceColumnMatch(); + this.sourceColumnMatch = + SourceColumnMatch.valueOf(loadConfigurationPb.getSourceColumnMatch()); } if (loadConfigurationPb.getNullMarkers() != null) { this.nullMarkers = loadConfigurationPb.getNullMarkers(); @@ -530,7 +548,7 @@ public Builder setTimestampFormat(String timestampFormat) { * are matched by name. Otherwise, columns are matched by position. This is done to keep the * behavior backward-compatible. */ - public Builder setSourceColumnMatch(String sourceColumnMatch) { + public Builder setSourceColumnMatch(SourceColumnMatch sourceColumnMatch) { this.sourceColumnMatch = sourceColumnMatch; return this; } @@ -768,7 +786,7 @@ public String getTimestampFormat() { } /** Returns the strategy used to match loaded columns to the schema, either POSITION or NAME. */ - public String getSourceColumnMatch() { + public SourceColumnMatch getSourceColumnMatch() { return sourceColumnMatch; } @@ -955,7 +973,7 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() { loadConfigurationPb.setTimestampFormat(timestampFormat); } if (sourceColumnMatch != null) { - loadConfigurationPb.setSourceColumnMatch(sourceColumnMatch); + loadConfigurationPb.setSourceColumnMatch(sourceColumnMatch.toString()); } if (nullMarkers != null) { loadConfigurationPb.setNullMarkers(nullMarkers); diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java index 68c7e8459..d7ca0b333 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; +import com.google.cloud.bigquery.ExternalTableDefinition.SourceColumnMatch; import com.google.common.collect.ImmutableList; import java.util.List; import org.junit.Test; @@ -66,7 +67,7 @@ public class ExternalTableDefinitionTest { private static final String DATETIME_FORMAT = "YYYY-MM-DD HH:MI:SS"; private static final String TIME_FORMAT = "HH:MI:SS"; private static final String TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MI:SS"; - private static final String SOURCE_COLUMN_MATCH = "POSITION"; + private static final SourceColumnMatch SOURCE_COLUMN_MATCH = SourceColumnMatch.POSITION; private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "SQL NULL"); private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION = ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS) diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java index 633acaa15..34cdda490 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java @@ -21,6 +21,7 @@ import com.google.cloud.bigquery.JobInfo.CreateDisposition; import com.google.cloud.bigquery.JobInfo.SchemaUpdateOption; import com.google.cloud.bigquery.JobInfo.WriteDisposition; +import com.google.cloud.bigquery.LoadJobConfiguration.SourceColumnMatch; import com.google.cloud.bigquery.TimePartitioning.Type; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -82,7 +83,7 @@ public class LoadJobConfigurationTest { private static final String DATETIME_FORMAT = "YYYY-MM-DD HH:MI:SS"; private static final String TIME_FORMAT = "HH:MI:SS"; private static final String TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MI:SS"; - private static final String SOURCE_COLUMN_MATCH = "POSITION"; + private static final SourceColumnMatch SOURCE_COLUMN_MATCH = SourceColumnMatch.POSITION; private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "SQL NULL"); private static final ConnectionProperty CONNECTION_PROPERTY = ConnectionProperty.newBuilder().setKey(KEY).setValue(VALUE).build(); From 9efd587937a2bf8f691edfe03a3b876308e7415f Mon Sep 17 00:00:00 2001 From: Liam Huffman Date: Tue, 24 Jun 2025 11:36:26 -0700 Subject: [PATCH 5/6] Change null markers test names --- .../com/google/cloud/bigquery/ExternalTableDefinitionTest.java | 2 +- .../com/google/cloud/bigquery/LoadJobConfigurationTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java index d7ca0b333..cb7578c75 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/ExternalTableDefinitionTest.java @@ -68,7 +68,7 @@ public class ExternalTableDefinitionTest { private static final String TIME_FORMAT = "HH:MI:SS"; private static final String TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MI:SS"; private static final SourceColumnMatch SOURCE_COLUMN_MATCH = SourceColumnMatch.POSITION; - private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "SQL NULL"); + private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "TEST_MARKER"); private static final ExternalTableDefinition EXTERNAL_TABLE_DEFINITION = ExternalTableDefinition.newBuilder(SOURCE_URIS, TABLE_SCHEMA, CSV_OPTIONS) .setFileSetSpecType("FILE_SET_SPEC_TYPE_FILE_SYSTEM_MATCH") diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java index 34cdda490..b1a2f1af8 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java @@ -84,7 +84,7 @@ public class LoadJobConfigurationTest { private static final String TIME_FORMAT = "HH:MI:SS"; private static final String TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MI:SS"; private static final SourceColumnMatch SOURCE_COLUMN_MATCH = SourceColumnMatch.POSITION; - private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "SQL NULL"); + private static final List NULL_MARKERS = ImmutableList.of("SQL NULL", "TEST MARKER"); private static final ConnectionProperty CONNECTION_PROPERTY = ConnectionProperty.newBuilder().setKey(KEY).setValue(VALUE).build(); private static final List CONNECTION_PROPERTIES = From ae64196d289aa19a564d7a86e034eeaa1ad76b64 Mon Sep 17 00:00:00 2001 From: Liam Huffman Date: Mon, 30 Jun 2025 14:24:20 -0700 Subject: [PATCH 6/6] change enums to StringEnumValue --- .../bigquery/ExternalTableDefinition.java | 45 +++++++++++++---- .../cloud/bigquery/LoadJobConfiguration.java | 48 +++++++++++++++---- 2 files changed, 74 insertions(+), 19 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java index 1f179aeaa..c1859c3f9 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/ExternalTableDefinition.java @@ -19,9 +19,12 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Strings.isNullOrEmpty; +import com.google.api.core.ApiFunction; import com.google.api.services.bigquery.model.ExternalDataConfiguration; import com.google.api.services.bigquery.model.Table; import com.google.auto.value.AutoValue; +import com.google.cloud.StringEnumType; +import com.google.cloud.StringEnumValue; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import java.util.List; @@ -57,19 +60,43 @@ public ExternalDataConfiguration apply(ExternalTableDefinition tableInfo) { private static final long serialVersionUID = -5951580238459622025L; - public enum SourceColumnMatch { - POSITION("POSITION"), - NAME("NAME"); + public static final class SourceColumnMatch extends StringEnumValue { + private static final long serialVersionUID = 818920627219751207L; + private static final ApiFunction CONSTRUCTOR = + new ApiFunction() { + @Override + public SourceColumnMatch apply(String constant) { + return new SourceColumnMatch(constant); + } + }; - private final String option; + private static final StringEnumType type = + new StringEnumType(SourceColumnMatch.class, CONSTRUCTOR); - SourceColumnMatch(String option) { - this.option = option; + public static final SourceColumnMatch POSITION = type.createAndRegister("POSITION"); + + public static final SourceColumnMatch NAME = type.createAndRegister("NAME"); + + private SourceColumnMatch(String constant) { + super(constant); } - @Override - public String toString() { - return option; + /** + * Get the SourceColumnMatch for the given String constant, and throw an exception if the + * constant is not recognized. + */ + public static SourceColumnMatch valueOfStrict(String constant) { + return type.valueOfStrict(constant); + } + + /** Get the SourceColumnMatch for the given String constant, and allow unrecognized values. */ + public static SourceColumnMatch valueOf(String constant) { + return type.valueOf(constant); + } + + /** Return the known values for SourceColumnMatch. */ + public static SourceColumnMatch[] values() { + return type.values(); } } diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index f3b67d89b..0d1eb7245 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -18,7 +18,10 @@ import static com.google.common.base.Preconditions.checkNotNull; +import com.google.api.core.ApiFunction; import com.google.api.services.bigquery.model.JobConfigurationLoad; +import com.google.cloud.StringEnumType; +import com.google.cloud.StringEnumValue; import com.google.common.base.MoreObjects.ToStringHelper; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; @@ -71,20 +74,45 @@ public final class LoadJobConfiguration extends JobConfiguration implements Load private final SourceColumnMatch sourceColumnMatch; private final List nullMarkers; - public enum SourceColumnMatch { - SOURCE_COLUMN_MATCH_UNSPECIFIED("SOURCE_COLUMN_MATCH_UNSPECIFIED"), - POSITION("POSITION"), - NAME("NAME"); + public static final class SourceColumnMatch extends StringEnumValue { + private static final long serialVersionUID = 818920627219751207L; + private static final ApiFunction CONSTRUCTOR = + new ApiFunction() { + @Override + public SourceColumnMatch apply(String constant) { + return new SourceColumnMatch(constant); + } + }; - private final String option; + private static final StringEnumType type = + new StringEnumType(SourceColumnMatch.class, CONSTRUCTOR); - SourceColumnMatch(String option) { - this.option = option; + public static final SourceColumnMatch SOURCE_COLUMN_MATCH_UNSPECIFIED = + type.createAndRegister("SOURCE_COLUMN_MATCH_UNSPECIFIED"); + public static final SourceColumnMatch POSITION = type.createAndRegister("POSITION"); + + public static final SourceColumnMatch NAME = type.createAndRegister("NAME"); + + private SourceColumnMatch(String constant) { + super(constant); } - @Override - public String toString() { - return option; + /** + * Get the SourceColumnMatch for the given String constant, and throw an exception if the + * constant is not recognized. + */ + public static SourceColumnMatch valueOfStrict(String constant) { + return type.valueOfStrict(constant); + } + + /** Get the SourceColumnMatch for the given String constant, and allow unrecognized values. */ + public static SourceColumnMatch valueOf(String constant) { + return type.valueOf(constant); + } + + /** Return the known values for SourceColumnMatch. */ + public static SourceColumnMatch[] values() { + return type.values(); } }