From 7529c2ca1cd40926136e679bc3d861d74bc6576d Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Fri, 4 Jul 2025 11:38:41 -0400 Subject: [PATCH 1/4] fix: load jobs preserve ascii control characters configuration --- .../cloud/bigquery/LoadJobConfiguration.java | 3 ++ .../bigquery/LoadJobConfigurationTest.java | 1 + .../cloud/bigquery/it/ITBigQueryTest.java | 32 +++++++++++++++++-- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index 0d1eb7245..da843a035 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -228,6 +228,8 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur CsvOptions.newBuilder() .setEncoding(loadConfigurationPb.getEncoding()) .setFieldDelimiter(loadConfigurationPb.getFieldDelimiter()) + .setPreserveAsciiControlCharacters( + loadConfigurationPb.getPreserveAsciiControlCharacters()) .setQuote(loadConfigurationPb.getQuote()); if (loadConfigurationPb.getAllowJaggedRows() != null) { builder.setAllowJaggedRows(loadConfigurationPb.getAllowJaggedRows()); @@ -907,6 +909,7 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() { .setAllowJaggedRows(csvOptions.allowJaggedRows()) .setAllowQuotedNewlines(csvOptions.allowQuotedNewLines()) .setEncoding(csvOptions.getEncoding()) + .setPreserveAsciiControlCharacters(csvOptions.getPreserveAsciiControlCharacters()) .setQuote(csvOptions.getQuote()); if (csvOptions.getSkipLeadingRows() != null) { // todo(mziccard) remove checked cast or comment when #1044 is closed diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java index b1a2f1af8..d987eb28e 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/LoadJobConfigurationTest.java @@ -38,6 +38,7 @@ public class LoadJobConfigurationTest { .setAllowJaggedRows(true) .setAllowQuotedNewLines(false) .setEncoding(StandardCharsets.UTF_8) + .setPreserveAsciiControlCharacters(true) .build(); private static final TableId TABLE_ID = TableId.of("dataset", "table"); private static final CreateDisposition CREATE_DISPOSITION = CreateDisposition.CREATE_IF_NEEDED; diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java index f188e7946..1d0d4f95b 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java @@ -613,6 +613,7 @@ public class ITBigQueryTest { private static final String LOAD_FILE_LARGE = "load_large.csv"; private static final String LOAD_FILE_FLEXIBLE_COLUMN_NAME = "load_flexible_column_name.csv"; + private static final String LOAD_FILE_NULL = "load_null.csv"; private static final String JSON_LOAD_FILE = "load.json"; private static final String JSON_LOAD_FILE_BQ_RESULTSET = "load_bq_resultset.json"; private static final String JSON_LOAD_FILE_SIMPLE = "load_simple.json"; @@ -628,6 +629,7 @@ public class ITBigQueryTest { private static final TableId TABLE_ID_FASTQUERY_BQ_RESULTSET = TableId.of(DATASET, "fastquery_testing_bq_resultset"); private static final String CSV_CONTENT = "StringValue1\nStringValue2\n"; + private static final String CSV_CONTENT_NULL = "String\0Value1\n"; private static final String CSV_CONTENT_FLEXIBLE_COLUMN = "name,&ersand\nrow_name,1"; private static final String JSON_CONTENT = @@ -1080,6 +1082,11 @@ public static void beforeClass() throws InterruptedException, IOException { storage.create( BlobInfo.newBuilder(BUCKET, LOAD_FILE).setContentType("text/plain").build(), CSV_CONTENT.getBytes(StandardCharsets.UTF_8)); + storage.create( + BlobInfo.newBuilder(BUCKET, LOAD_FILE_NULL) + .setContentType("text/plain") + .build(), + CSV_CONTENT_NULL.getBytes(StandardCharsets.UTF_8)); storage.create( BlobInfo.newBuilder(BUCKET, LOAD_FILE_FLEXIBLE_COLUMN_NAME) .setContentType("text/plain") @@ -6600,9 +6607,9 @@ public void testLocation() throws Exception { } @Test - public void testPreserveAsciiControlCharacters() + public void testWriteChannelPreserveAsciiControlCharacters() throws InterruptedException, IOException, TimeoutException { - String destinationTableName = "test_preserve_ascii_control_characters"; + String destinationTableName = "test_write_channel_preserve_ascii_control_characters"; TableId tableId = TableId.of(DATASET, destinationTableName); WriteChannelConfiguration configuration = WriteChannelConfiguration.newBuilder(tableId) @@ -6625,6 +6632,27 @@ public void testPreserveAsciiControlCharacters() assertTrue(bigquery.delete(tableId)); } + @Test + public void testLoadJobPreserveAsciiControlCharacters() + throws InterruptedException { + String destinationTableName = "test_load_job_preserve_ascii_control_characters"; + TableId destinationTable = TableId.of(DATASET, destinationTableName); + + try { + LoadJobConfiguration configuration = + LoadJobConfiguration.newBuilder(destinationTable, "gs://" + BUCKET + "/" + LOAD_FILE_NULL) + .setFormatOptions( + CsvOptions.newBuilder().setPreserveAsciiControlCharacters(true).build()) + .setSchema(SIMPLE_SCHEMA) + .build(); + Job remoteLoadJob = bigquery.create(JobInfo.of(configuration)); + remoteLoadJob = remoteLoadJob.waitFor(); + assertNull(remoteLoadJob.getStatus().getError()); + } finally { + assertTrue(bigquery.delete(destinationTable)); + } + } + @Test public void testReferenceFileSchemaUriForAvro() { try { From d59ee75e69dd4776372e38a7f6a777b40abfa0d2 Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Fri, 4 Jul 2025 11:42:13 -0400 Subject: [PATCH 2/4] fix lint --- .../java/com/google/cloud/bigquery/it/ITBigQueryTest.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java index 1d0d4f95b..c9f6296cc 100644 --- a/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java +++ b/google-cloud-bigquery/src/test/java/com/google/cloud/bigquery/it/ITBigQueryTest.java @@ -1083,9 +1083,7 @@ public static void beforeClass() throws InterruptedException, IOException { BlobInfo.newBuilder(BUCKET, LOAD_FILE).setContentType("text/plain").build(), CSV_CONTENT.getBytes(StandardCharsets.UTF_8)); storage.create( - BlobInfo.newBuilder(BUCKET, LOAD_FILE_NULL) - .setContentType("text/plain") - .build(), + BlobInfo.newBuilder(BUCKET, LOAD_FILE_NULL).setContentType("text/plain").build(), CSV_CONTENT_NULL.getBytes(StandardCharsets.UTF_8)); storage.create( BlobInfo.newBuilder(BUCKET, LOAD_FILE_FLEXIBLE_COLUMN_NAME) @@ -6633,8 +6631,7 @@ public void testWriteChannelPreserveAsciiControlCharacters() } @Test - public void testLoadJobPreserveAsciiControlCharacters() - throws InterruptedException { + public void testLoadJobPreserveAsciiControlCharacters() throws InterruptedException { String destinationTableName = "test_load_job_preserve_ascii_control_characters"; TableId destinationTable = TableId.of(DATASET, destinationTableName); From f8172e88bf3aa5bc456e0e1bcbb87a6f74b2e24f Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Fri, 4 Jul 2025 13:24:09 -0400 Subject: [PATCH 3/4] fix NPE --- .../com/google/cloud/bigquery/LoadJobConfiguration.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index da843a035..ff67e1a0b 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -228,9 +228,11 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur CsvOptions.newBuilder() .setEncoding(loadConfigurationPb.getEncoding()) .setFieldDelimiter(loadConfigurationPb.getFieldDelimiter()) - .setPreserveAsciiControlCharacters( - loadConfigurationPb.getPreserveAsciiControlCharacters()) .setQuote(loadConfigurationPb.getQuote()); + if (loadConfigurationPb.getPreserveAsciiControlCharacters() != null) { + builder.setPreserveAsciiControlCharacters( + loadConfigurationPb.getPreserveAsciiControlCharacters()); + } if (loadConfigurationPb.getAllowJaggedRows() != null) { builder.setAllowJaggedRows(loadConfigurationPb.getAllowJaggedRows()); } From 85720ac0257667f0b49ab8e0341b4e8f47a395f9 Mon Sep 17 00:00:00 2001 From: PhongChuong Date: Fri, 4 Jul 2025 13:26:33 -0400 Subject: [PATCH 4/4] fix NPE --- .../java/com/google/cloud/bigquery/LoadJobConfiguration.java | 1 + 1 file changed, 1 insertion(+) diff --git a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java index ff67e1a0b..381942cd0 100644 --- a/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java +++ b/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/LoadJobConfiguration.java @@ -219,6 +219,7 @@ private Builder(com.google.api.services.bigquery.model.JobConfiguration configur this.nullMarker = loadConfigurationPb.getNullMarker(); } if (loadConfigurationPb.getAllowJaggedRows() != null + || loadConfigurationPb.getPreserveAsciiControlCharacters() != null || loadConfigurationPb.getAllowQuotedNewlines() != null || loadConfigurationPb.getEncoding() != null || loadConfigurationPb.getFieldDelimiter() != null