Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
86ad4e2
Added very raw basic version of SpannerIO. No unit tests. Must sk…
gamolina Mar 3, 2017
564e714
Simplified SpannerIO getting compilation error on OutputT
gamolina Mar 6, 2017
49beb06
Compiliation fixed and basic Sink functionality working.
gamolina Mar 20, 2017
90754bb
Fixed mutation batch limit calculation.
gamolina Mar 25, 2017
caa6b58
Minor tweak of batch logic.
gamolina Mar 26, 2017
be81ec7
Checkstyle plugin cleanup. Existing unit tests passing.
gamolina Mar 27, 2017
69b0eb7
Code review changes 4/6/17
gamolina Apr 6, 2017
ba7e7ee
Try to resolve merge conflict.
gamolina Apr 6, 2017
f583eae
Code review related cleanup.
gamolina Apr 7, 2017
77cfbb6
Resolve pom file merge conflict.
gamolina Apr 7, 2017
dc97e1e
More review cleanup, etc.
gamolina Apr 7, 2017
0c7cfd4
Added package-info file.
gamolina Apr 8, 2017
0f176fc
Added spanner csv loader example.
gamolina Apr 19, 2017
c6bef41
Added Spanner CSV loader example
gamolina Apr 19, 2017
f4118f7
Code cosmetics and documentation update
Apr 21, 2017
68854f5
Simplified Spanner example
Apr 22, 2017
1b367c3
Merge remote-tracking branch 'beam/master' into spanner
Apr 22, 2017
335021f
Restore coders
Apr 22, 2017
b1daa91
Bump Spanner version
Apr 22, 2017
d8db650
Format the example code
Apr 22, 2017
7d1336f
Typo
Apr 24, 2017
5defd21
Updated google-spanner-client. Removed custom coders, Mutation is ser…
Apr 25, 2017
084d68e
Some cleanup
Apr 25, 2017
5c95afb
Merge pull request #1 from mairbek/spanner
guymolinari Apr 25, 2017
2b64605
Merge branch 'master' of https://github.com/apache/beam
gamolina Apr 25, 2017
839fe05
Minor cleanup of checkstyle issues on SpannerIO, undeclared dependencies
gamolina Apr 26, 2017
500a22a
Minor checkstyle issue with SpannerCSVLoader example.
gamolina Apr 27, 2017
66ccbcd
Minor changes from Mairbek
gamolina Apr 27, 2017
202f6e1
Parameterize new dependencies in pom file.
gamolina May 1, 2017
71b6295
Fixed common protos version
gamolina May 1, 2017
9656e22
Attempt to resolve merge conflict
gamolina May 15, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not put this in the global examples – we're trying to keep this independent of specific proprietary clouds. Instead, we'd probably like to use it as part of an integration test in the sdks/java/io/google-cloud-platform-java module.

* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.examples.spanner;

import com.google.cloud.spanner.Database;
import com.google.cloud.spanner.DatabaseAdminClient;
import com.google.cloud.spanner.Mutation;
import com.google.cloud.spanner.Operation;
import com.google.cloud.spanner.Spanner;
import com.google.cloud.spanner.SpannerException;
import com.google.cloud.spanner.SpannerOptions;
import com.google.spanner.admin.database.v1.CreateDatabaseMetadata;
import java.util.Collections;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.io.gcp.spanner.SpannerIO;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.Validation;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;



/**
* Generalized bulk loader for importing CSV files into Spanner.
*
*/
public class SpannerCSVLoader {

/**
* Command options specification.
*/
private interface Options extends PipelineOptions {
@Description("Create a sample database")
@Default.Boolean(false)
boolean isCreateDatabase();
void setCreateDatabase(boolean createDatabase);

@Description("File to read from ")
@Validation.Required
String getInput();
void setInput(String value);

@Description("Instance ID to write to in Spanner")
@Validation.Required
String getInstanceId();
void setInstanceId(String value);

@Description("Database ID to write to in Spanner")
@Validation.Required
String getDatabaseId();
void setDatabaseId(String value);

@Description("Table name")
@Validation.Required
String getTable();
void setTable(String value);
}


/**
* Constructs and executes the processing pipeline based upon command options.
*/
public static void main(String[] args) throws Exception {
Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);

Pipeline p = Pipeline.create(options);
PCollection<String> lines = p.apply(TextIO.Read.from(options.getInput()));
PCollection<Mutation> mutations = lines
.apply(ParDo.of(new NaiveParseCsvFn(options.getTable())));
mutations
.apply(SpannerIO.writeTo(options.getInstanceId(), options.getDatabaseId()));
p.run().waitUntilFinish();
}

public static void createDatabase(Options options) {
Spanner client = SpannerOptions.getDefaultInstance().getService();

DatabaseAdminClient databaseAdminClient = client.getDatabaseAdminClient();
try {
databaseAdminClient.dropDatabase(options.getInstanceId(), options
.getDatabaseId());
} catch (SpannerException e) {
// Does not exist, ignore.
}
Operation<Database, CreateDatabaseMetadata> op = databaseAdminClient.createDatabase(
options.getInstanceId(), options
.getDatabaseId(), Collections.singleton("CREATE TABLE " + options.getTable() + " ("
+ " Key INT64,"
+ " Name STRING,"
+ " Email STRING,"
+ " Age INT,"
+ ") PRIMARY KEY (Key)"));
op.waitFor();
}


/**
* A DoFn that creates a Spanner Mutation for each CSV line.
*/
static class NaiveParseCsvFn extends DoFn<String, Mutation> {
private final String table;

NaiveParseCsvFn(String table) {
this.table = table;
}

@ProcessElement
public void processElement(ProcessContext c) {
String line = c.element();
String[] elements = line.split(",");
if (elements.length != 4) {
return;
}
Mutation mutation = Mutation.newInsertOrUpdateBuilder(table)
.set("Key").to(Long.valueOf(elements[0]))
.set("Name").to(elements[1])
.set("Email").to(elements[2])
.set("Age").to(Integer.valueOf(elements[3]))
.build();
c.output(mutation);
}
}
}
9 changes: 8 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,11 @@
<woodstox.version>4.4.1</woodstox.version>
<spring.version>4.3.5.RELEASE</spring.version>
<groovy-maven-plugin.version>2.0</groovy-maven-plugin.version>

<compiler.error.flag>-Werror</compiler.error.flag>
<compiler.default.pkginfo.flag>-Xpkginfo:always</compiler.default.pkginfo.flag>
<compiler.default.exclude>nothing</compiler.default.exclude>
<spanner.version>0.16.0-beta</spanner.version>
<api-common.version>1.0.0-rc2</api-common.version>
</properties>

<packaging>pom</packaging>
Expand Down Expand Up @@ -824,6 +825,12 @@
<version>${google-cloud-bigdataoss.version}</version>
</dependency>

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-spanner</artifactId>
<version>${spanner.version}</version>
</dependency>

<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>util</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions sdks/java/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,8 @@
<artifactId>joda-time</artifactId>
</dependency>

<!-- To use org.apache.beam.io.AvroSource with XZ-encoded files, please explicitly
declare this dependency to include org.tukaani:xz on the classpath at runtime. -->
<!-- To use org.apache.beam.io.AvroSource with XZ-encoded files, please explicitly
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert?

declare this dependency to include org.tukaani:xz on the classpath at runtime. -->
<dependency>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
Expand Down
33 changes: 23 additions & 10 deletions sdks/java/io/google-cloud-platform/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,28 @@
<artifactId>jackson-databind</artifactId>
</dependency>

<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-core</artifactId>
</dependency>

<dependency>
<groupId>com.google.api.grpc</groupId>
<artifactId>grpc-google-common-protos</artifactId>
<version>${grpc-google-common-protos.version}</version>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no versions down here -- they should all be in top-level dependency management like you did for the google-cloud-spanner module.

</dependency>

<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-bigquery</artifactId>
</dependency>

<dependency>
<groupId>com.google.api</groupId>
<artifactId>api-common</artifactId>
<version>${api-common.version}</version>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto re dependency management

</dependency>

<dependency>
<groupId>com.google.apis</groupId>
<artifactId>google-api-services-storage</artifactId>
Expand Down Expand Up @@ -121,11 +138,6 @@
<artifactId>grpc-auth</artifactId>
</dependency>

<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-core</artifactId>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is grpc-core no longer used?

</dependency>

<dependency>
<groupId>io.grpc</groupId>
<artifactId>grpc-netty</artifactId>
Expand Down Expand Up @@ -160,6 +172,12 @@
<artifactId>joda-time</artifactId>
</dependency>

<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-spanner</artifactId>
<version>${spanner.version}</version>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto re version

</dependency>

<dependency>
<groupId>com.google.cloud.bigtable</groupId>
<artifactId>bigtable-protos</artifactId>
Expand Down Expand Up @@ -197,11 +215,6 @@
<artifactId>google-auth-library-oauth2-http</artifactId>
</dependency>

<dependency>
<groupId>com.google.api.grpc</groupId>
<artifactId>grpc-google-common-protos</artifactId>
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down
Loading