diff --git a/sdks/java/testing/tpcds/build.gradle b/sdks/java/testing/tpcds/build.gradle index a33f24c4f1b6..6fd19ce972be 100644 --- a/sdks/java/testing/tpcds/build.gradle +++ b/sdks/java/testing/tpcds/build.gradle @@ -54,6 +54,7 @@ configurations { } dependencies { + compile library.java.avro compile library.java.vendored_guava_26_0_jre compile library.java.vendored_calcite_1_20_0 compile library.java.commons_csv @@ -62,6 +63,7 @@ dependencies { compile "com.alibaba:fastjson:1.2.69" compile project(":sdks:java:extensions:sql") compile project(":sdks:java:extensions:sql:zetasql") + compile project(":sdks:java:io:parquet") compile project(path: ":runners:google-cloud-dataflow-java") compile project(path: ":sdks:java:core", configuration: "shadow") testRuntimeClasspath library.java.slf4j_jdk14 diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/SqlTransformRunner.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/SqlTransformRunner.java index b6a21506ade4..ad1714fe0f0d 100644 --- a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/SqlTransformRunner.java +++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/SqlTransformRunner.java @@ -17,6 +17,7 @@ */ package org.apache.beam.sdk.tpcds; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -25,12 +26,12 @@ import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions; +import org.apache.avro.generic.GenericRecord; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.sql.SqlTransform; -import org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions; import org.apache.beam.sdk.extensions.sql.meta.provider.text.TextTable; import org.apache.beam.sdk.io.TextIO; +import org.apache.beam.sdk.io.parquet.ParquetIO; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.SchemaCoder; @@ -40,6 +41,8 @@ import org.apache.beam.sdk.values.Row; import org.apache.beam.sdk.values.TupleTag; import org.apache.beam.sdk.values.TypeDescriptors; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.base.Charsets; +import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.io.Resources; import org.apache.commons.csv.CSVFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -87,27 +90,77 @@ private static PCollectionTuple getTables( // Only when queryString contains tableName, the table is relevant to this query and will be // added. This can avoid reading unnecessary data files. + // TODO: Simple but not reliable way since table name can be any substring in a query and can + // give false positives if (queryString.contains(tableName)) { - // This is location path where the data are stored - String filePattern = - tpcdsOptions.getDataDirectory() + "/" + dataSize + "/" + tableName + ".dat"; - - PCollection table = - new TextTable( - tableSchema.getValue(), - filePattern, - new CsvToRow(tableSchema.getValue(), csvFormat), - new RowToCsv(csvFormat)) - .buildIOReader(pipeline.begin()) - .setCoder(SchemaCoder.of(tableSchema.getValue())) - .setName(tableSchema.getKey()); - - tables = tables.and(new TupleTag<>(tableName), table); + switch (tpcdsOptions.getSourceType()) { + case CSV: + { + PCollection table = + getTableCSV(pipeline, csvFormat, tpcdsOptions, dataSize, tableSchema, tableName); + tables = tables.and(new TupleTag<>(tableName), table); + break; + } + case PARQUET: + { + PCollection table = + getTableParquet(pipeline, tpcdsOptions, dataSize, tableName); + tables = tables.and(new TupleTag<>(tableName), table); + break; + } + default: + throw new IllegalStateException( + "Unexpected source type: " + tpcdsOptions.getSourceType()); + } } } return tables; } + private static PCollection getTableParquet( + Pipeline pipeline, TpcdsOptions tpcdsOptions, String dataSize, String tableName) + throws IOException { + org.apache.avro.Schema schema = getAvroSchema(tableName); + + String filepattern = + tpcdsOptions.getDataDirectory() + "/" + dataSize + "/" + tableName + "/*.parquet"; + + return pipeline.apply( + "Read " + tableName + " (parquet)", + ParquetIO.read(schema) + .from(filepattern) + .withSplit() + // TODO: add .withProjection() + .withBeamSchemas(true)); + } + + private static PCollection getTableCSV( + Pipeline pipeline, + CSVFormat csvFormat, + TpcdsOptions tpcdsOptions, + String dataSize, + Map.Entry tableSchema, + String tableName) { + // This is location path where the data are stored + String filePattern = + tpcdsOptions.getDataDirectory() + "/" + dataSize + "/" + tableName + ".dat"; + + return new TextTable( + tableSchema.getValue(), + filePattern, + new CsvToRow(tableSchema.getValue(), csvFormat), + new RowToCsv(csvFormat)) + .buildIOReader(pipeline.begin()) + .setCoder(SchemaCoder.of(tableSchema.getValue())) + .setName(tableSchema.getKey()); + } + + private static org.apache.avro.Schema getAvroSchema(String tableName) throws IOException { + String path = "schemas_avro/" + tableName + ".json"; + return new org.apache.avro.Schema.Parser() + .parse(Resources.toString(Resources.getResource(path), Charsets.UTF_8)); + } + /** * Print the summary table after all jobs are finished. * @@ -160,28 +213,18 @@ public static void runUsingSqlTransform(String[] args) throws Exception { Pipeline[] pipelines = new Pipeline[queryNames.length]; CSVFormat csvFormat = CSVFormat.MYSQL.withDelimiter('|').withNullString(""); - // Execute all queries, transform the each result into a PCollection, write them into + // Execute all queries, transform each result into a PCollection, write them into // the txt file and store in a GCP directory. for (int i = 0; i < queryNames.length; i++) { // For each query, get a copy of pipelineOptions from command line arguments. TpcdsOptions tpcdsOptionsCopy = PipelineOptionsFactory.fromArgs(args).withValidation().as(TpcdsOptions.class); - // Cast tpcdsOptions as a BeamSqlPipelineOptions object to read and set queryPlanner (the - // default one is Calcite, can change to ZetaSQL). - BeamSqlPipelineOptions beamSqlPipelineOptionsCopy = - tpcdsOptionsCopy.as(BeamSqlPipelineOptions.class); - - // Finally, cast BeamSqlPipelineOptions as a DataflowPipelineOptions object to read and set - // other required pipeline optionsparameters . - DataflowPipelineOptions dataflowPipelineOptionsCopy = - beamSqlPipelineOptionsCopy.as(DataflowPipelineOptions.class); - // Set a unique job name using the time stamp so that multiple different pipelines can run // together. - dataflowPipelineOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis()); + tpcdsOptionsCopy.setJobName(queryNames[i] + "result" + System.currentTimeMillis()); - pipelines[i] = Pipeline.create(dataflowPipelineOptionsCopy); + pipelines[i] = Pipeline.create(tpcdsOptionsCopy); String queryString = QueryReader.readQuery(queryNames[i]); PCollectionTuple tables = getTables(pipelines[i], csvFormat, queryNames[i]); diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java index 8e8b3e6325a6..30159991db7a 100644 --- a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java +++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsOptions.java @@ -17,13 +17,13 @@ */ package org.apache.beam.sdk.tpcds; +import org.apache.beam.sdk.extensions.sql.impl.BeamSqlPipelineOptions; import org.apache.beam.sdk.options.Default; import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.Validation; /** Options used to configure TPC-DS test. */ -public interface TpcdsOptions extends PipelineOptions { +public interface TpcdsOptions extends BeamSqlPipelineOptions { @Description( "The size of TPC-DS data to run query on, user input should contain the unit, such as '1G', '10G'") @Validation.Required @@ -55,4 +55,10 @@ public interface TpcdsOptions extends PipelineOptions { String getResultsDirectory(); void setResultsDirectory(String path); + + @Description("Where the data comes from.") + @Default.Enum("CSV") + TpcdsUtils.SourceType getSourceType(); + + void setSourceType(TpcdsUtils.SourceType sourceType); } diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsUtils.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsUtils.java new file mode 100644 index 000000000000..8ef0f4723731 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsUtils.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.tpcds; + +/** Odd's 'n Ends used throughout queries and driver. */ +public class TpcdsUtils { + + /** Possible sources for events. */ + public enum SourceType { + /** Read events from CSV files. */ + CSV, + /** Read events from Parquet files. */ + PARQUET + } +} diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/call_center.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/call_center.json new file mode 100644 index 000000000000..eeaf81db764d --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/call_center.json @@ -0,0 +1,260 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "cc_call_center_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_call_center_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_rec_start_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "cc_rec_end_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "cc_closed_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_open_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_class", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_employees", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_sq_ft", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_hours", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_manager", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_mkt_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_mkt_class", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_mkt_desc", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_market_manager", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_division", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_division_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_company", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_company_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_street_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_street_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_street_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_suite_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_city", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_county", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_state", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_zip", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cc_gmt_offset", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cc_tax_percentage", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_page.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_page.json new file mode 100644 index 000000000000..8b93471a2948 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_page.json @@ -0,0 +1,78 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "cp_catalog_page_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cp_catalog_page_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cp_start_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cp_end_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cp_department", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cp_catalog_number", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cp_catalog_page_number", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cp_description", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cp_type", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_returns.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_returns.json new file mode 100644 index 000000000000..525e97a6630b --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_returns.json @@ -0,0 +1,222 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "cr_returned_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_returned_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_refunded_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_refunded_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_refunded_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_refunded_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_returning_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_returning_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_returning_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_returning_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_call_center_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_catalog_page_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_ship_mode_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_warehouse_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_reason_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_order_number", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "cr_return_quantity", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_return_amount", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_return_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_return_amt_inc_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_fee", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_return_ship_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_refunded_cash", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_reversed_charge", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_store_credit", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cr_net_loss", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_sales.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_sales.json new file mode 100644 index 000000000000..ec86768c9424 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/catalog_sales.json @@ -0,0 +1,278 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "cs_sold_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_sold_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ship_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_bill_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_bill_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_bill_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_bill_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ship_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ship_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ship_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ship_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_call_center_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_catalog_page_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ship_mode_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_warehouse_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_promo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_order_number", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "cs_quantity", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_list_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_sales_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ext_discount_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ext_sales_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ext_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ext_list_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ext_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_coupon_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_ext_ship_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_net_paid", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_net_paid_inc_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_net_paid_inc_ship", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_net_paid_inc_ship_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cs_net_profit", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer.json new file mode 100644 index 000000000000..3736c7a2b63f --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer.json @@ -0,0 +1,150 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "c_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_customer_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_current_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_current_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_current_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_first_shipto_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_first_sales_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_salutation", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_first_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_last_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_preferred_cust_flag", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_birth_day", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_birth_month", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_birth_year", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "c_birth_country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_login", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_email_address", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "c_last_review_date", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer_address.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer_address.json new file mode 100644 index 000000000000..1100dec1e7ea --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer_address.json @@ -0,0 +1,110 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "ca_address_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ca_address_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_street_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_street_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_street_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_suite_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_city", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_county", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_state", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_zip", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ca_gmt_offset", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ca_location_type", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer_demographics.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer_demographics.json new file mode 100644 index 000000000000..c65348892744 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/customer_demographics.json @@ -0,0 +1,78 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "cd_demo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cd_gender", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cd_marital_status", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cd_education_status", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cd_purchase_estimate", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cd_credit_rating", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "cd_dep_count", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cd_dep_employed_count", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "cd_dep_college_count", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/date_dim.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/date_dim.json new file mode 100644 index 000000000000..6ee514124955 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/date_dim.json @@ -0,0 +1,233 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "d_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_date_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "d_month_seq", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_week_seq", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_quarter_seq", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_year", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_dow", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_moy", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_dom", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_qoy", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_fy_year", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_fy_quarter_seq", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_fy_week_seq", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_day_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_quarter_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_holiday", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_weekend", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_following_holiday", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_first_dom", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_last_dom", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_same_day_ly", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_same_day_lq", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "d_current_day", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_current_week", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_current_month", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_current_quarter", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "d_current_year", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/household_demographics.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/household_demographics.json new file mode 100644 index 000000000000..04f32dd0483a --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/household_demographics.json @@ -0,0 +1,46 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "hd_demo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "hd_income_band_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "hd_buy_potential", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "hd_dep_count", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "hd_vehicle_count", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/income_band.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/income_band.json new file mode 100644 index 000000000000..dc711b081f0a --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/income_band.json @@ -0,0 +1,30 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "ib_income_band_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ib_lower_bound", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ib_upper_bound", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/inventory.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/inventory.json new file mode 100644 index 000000000000..675d7bad4729 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/inventory.json @@ -0,0 +1,38 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "inv_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "inv_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "inv_warehouse_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "inv_quantity_on_hand", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/item.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/item.json new file mode 100644 index 000000000000..e9e642ea9202 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/item.json @@ -0,0 +1,188 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "i_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_item_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_rec_start_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "i_rec_end_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "i_item_desc", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_current_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_brand_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_brand", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_class_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_class", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_category_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_category", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_manufact_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_manufact", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_size", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_formulation", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_color", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_units", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_container", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "i_manager_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "i_product_name", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/promotion.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/promotion.json new file mode 100644 index 000000000000..0b1d1ad7d4cd --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/promotion.json @@ -0,0 +1,158 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "p_promo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "p_promo_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_start_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "p_end_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "p_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "p_cost", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "p_response_target", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "p_promo_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_dmail", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_email", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_catalog", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_tv", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_radio", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_press", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_event", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_demo", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_channel_details", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_purpose", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "p_discount_active", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/reason.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/reason.json new file mode 100644 index 000000000000..376baf1d1234 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/reason.json @@ -0,0 +1,30 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "r_reason_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "r_reason_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "r_reason_desc", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/ship_mode.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/ship_mode.json new file mode 100644 index 000000000000..2dc27aa99cef --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/ship_mode.json @@ -0,0 +1,54 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "sm_ship_mode_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sm_ship_mode_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "sm_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "sm_code", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "sm_carrier", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "sm_contract", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store.json new file mode 100644 index 000000000000..69cccde1c370 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store.json @@ -0,0 +1,244 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "s_store_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_store_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_rec_start_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "s_rec_end_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "s_closed_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_store_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_number_employees", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_floor_space", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_hours", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_manager", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_market_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_geography_class", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_market_desc", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_market_manager", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_division_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_division_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_company_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_company_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_street_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_street_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_street_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_suite_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_city", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_county", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_state", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_zip", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "s_gmt_offset", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "s_tax_precentage", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store_returns.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store_returns.json new file mode 100644 index 000000000000..a004d22c960c --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store_returns.json @@ -0,0 +1,166 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "sr_returned_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_return_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_store_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_reason_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_ticket_number", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "sr_return_quantity", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_return_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_return_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_return_amt_inc_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_fee", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_return_ship_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_refunded_cash", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_reversed_charge", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_store_credit", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "sr_net_loss", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store_sales.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store_sales.json new file mode 100644 index 000000000000..97e92ebd7daf --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/store_sales.json @@ -0,0 +1,190 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "ss_sold_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_sold_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_store_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_promo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_ticket_number", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "ss_quantity", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_list_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_sales_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_ext_discount_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_ext_sales_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_ext_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_ext_list_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_ext_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_coupon_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_net_paid", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_net_paid_inc_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ss_net_profit", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/time_dim.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/time_dim.json new file mode 100644 index 000000000000..fc69edda2f55 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/time_dim.json @@ -0,0 +1,86 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "t_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "t_time_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "t_time", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "t_hour", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "t_minute", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "t_second", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "t_am_pm", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "t_shift", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "t_sub_shift", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "t_meal_time", + "type": [ + "null", + "string" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/warehouse.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/warehouse.json new file mode 100644 index 000000000000..872b45d36502 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/warehouse.json @@ -0,0 +1,118 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "w_warehouse_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "w_warehouse_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_warehouse_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_warehouse_sq_ft", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "w_street_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_street_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_street_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_suite_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_city", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_county", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_state", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_zip", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "w_gmt_offset", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_page.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_page.json new file mode 100644 index 000000000000..dcd90565efc3 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_page.json @@ -0,0 +1,124 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "wp_web_page_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_web_page_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "wp_rec_start_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "wp_rec_end_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "wp_creation_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_access_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_autogen_flag", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "wp_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_url", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "wp_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "wp_char_count", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_link_count", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_image_count", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wp_max_ad_count", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_returns.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_returns.json new file mode 100644 index 000000000000..4579457617f0 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_returns.json @@ -0,0 +1,198 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "wr_returned_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_returned_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_refunded_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_refunded_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_refunded_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_refunded_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_returning_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_returning_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_returning_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_returning_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_web_page_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_reason_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_order_number", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "wr_return_quantity", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_return_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_return_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_return_amt_inc_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_fee", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_return_ship_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_refunded_cash", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_reversed_charge", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_account_credit", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "wr_net_loss", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_sales.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_sales.json new file mode 100644 index 000000000000..9b87b764540e --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_sales.json @@ -0,0 +1,278 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "ws_sold_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_sold_time_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ship_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_item_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_bill_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_bill_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_bill_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_bill_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ship_customer_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ship_cdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ship_hdemo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ship_addr_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_web_page_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_web_site_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ship_mode_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_warehouse_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_promo_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_order_number", + "type": [ + "null", + "long" + ], + "default": null + }, + { + "name": "ws_quantity", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_list_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_sales_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ext_discount_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ext_sales_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ext_wholesale_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ext_list_price", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ext_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_coupon_amt", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_ext_ship_cost", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_net_paid", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_net_paid_inc_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_net_paid_inc_ship", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_net_paid_inc_ship_tax", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "ws_net_profit", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file diff --git a/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_site.json b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_site.json new file mode 100644 index 000000000000..a15c002caa37 --- /dev/null +++ b/sdks/java/testing/tpcds/src/main/resources/schemas_avro/web_site.json @@ -0,0 +1,220 @@ +{ + "type": "record", + "name": "spark_schema", + "fields": [ + { + "name": "web_site_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "web_site_id", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_rec_start_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "web_rec_end_date", + "type": [ + "null", + { + "type": "int", + "logicalType": "date" + } + ], + "default": null + }, + { + "name": "web_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_open_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "web_close_date_sk", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "web_class", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_manager", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_mkt_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "web_mkt_class", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_mkt_desc", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_market_manager", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_company_id", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "web_company_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_street_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_street_name", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_street_type", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_suite_number", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_city", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_county", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_state", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_zip", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_country", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "web_gmt_offset", + "type": [ + "null", + "int" + ], + "default": null + }, + { + "name": "web_tax_percentage", + "type": [ + "null", + "int" + ], + "default": null + } + ] +} \ No newline at end of file