Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@
<avro.version>1.7.7</avro.version>
<bigquery.version>v2-rev248-1.21.0</bigquery.version>
<bigtable.version>0.2.3</bigtable.version>
<pubsubgrpc.version>0.0.2</pubsubgrpc.version>
<clouddebugger.version>v2-rev6-1.21.0</clouddebugger.version>
<dataflow.version>v1b3-rev22-1.21.0</dataflow.version>
<dataflow.proto.version>0.5.160222</dataflow.proto.version>
Expand Down
34 changes: 34 additions & 0 deletions sdks/java/core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,40 @@
<version>0.12.0</version>
</dependency>

<dependency>
<groupId>com.google.auth</groupId>
<artifactId>google-auth-library-oauth2-http</artifactId>
<version>0.3.1</version>
<exclusions>
<!-- Exclude an old version of guava that is being pulled
in by a transitive dependency of google-api-client -->
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava-jdk5</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-handler</artifactId>
<version>4.1.0.Beta8</version>
</dependency>

<dependency>
<groupId>com.google.api.grpc</groupId>
<artifactId>grpc-pubsub-v1</artifactId>
<version>${pubsubgrpc.version}</version>
<exclusions>
<!-- Exclude an old version of guava that is being pulled
in by a transitive dependency of google-api-client -->
<exclusion>
<groupId>com.google.guava</groupId>
<artifactId>guava-jdk5</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.google.cloud.bigtable</groupId>
<artifactId>bigtable-protos</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,322 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.cloud.dataflow.sdk.io;

import com.google.api.client.repackaged.com.google.common.base.Preconditions;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops

import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;

/**
* A helper interface for talking to Pubsub via an underlying transport.
*/
public interface PubsubClient extends AutoCloseable {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It occurs to me that this PR would really benefit from a follow-up with an in-memory testing fake, with a test suite that can be applied to both it and the gRPC implementation.

/**
* Path representing a cloud project id.
*/
class ProjectPath implements Serializable {
private final String path;

public ProjectPath(String path) {
this.path = path;
}

public String getPath() {
return path;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}

ProjectPath that = (ProjectPath) o;

return path.equals(that.path);

}

@Override
public int hashCode() {
return path.hashCode();
}

@Override
public String toString() {
return path;
}

public static ProjectPath fromId(String projectId) {
return new ProjectPath(String.format("projects/%s", projectId));
}
}

/**
* Path representing a Pubsub subscription.
*/
class SubscriptionPath implements Serializable {
private final String path;

public SubscriptionPath(String path) {
this.path = path;
}

public String getPath() {
return path;
}

public String getV1Beta1Path() {
String[] splits = path.split("/");
Preconditions.checkState(splits.length == 4);
return String.format("/subscriptions/%s/%s", splits[1], splits[3]);
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
SubscriptionPath that = (SubscriptionPath) o;
return path.equals(that.path);
}

@Override
public int hashCode() {
return path.hashCode();
}

@Override
public String toString() {
return path;
}

public static SubscriptionPath fromName(String projectId, String subscriptionName) {
return new SubscriptionPath(String.format("projects/%s/subscriptions/%s",
projectId, subscriptionName));
}
}

/**
* Path representing a Pubsub topic.
*/
class TopicPath implements Serializable {
private final String path;

public TopicPath(String path) {
this.path = path;
}

public String getPath() {
return path;
}

public String getV1Beta1Path() {
String[] splits = path.split("/");
Preconditions.checkState(splits.length == 4);
return String.format("/topics/%s/%s", splits[1], splits[3]);
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
TopicPath topicPath = (TopicPath) o;
return path.equals(topicPath.path);
}

@Override
public int hashCode() {
return path.hashCode();
}

@Override
public String toString() {
return path;
}

public static TopicPath fromName(String projectId, String topicName) {
return new TopicPath(String.format("projects/%s/topics/%s", projectId, topicName));
}
}

/**
* A message to be sent to Pubsub.
*/
class OutgoingMessage {
/**
* Underlying (encoded) element.
*/
public final byte[] elementBytes;

/**
* Timestamp for element (ms since epoch).
*/
public final long timestampMsSinceEpoch;

public OutgoingMessage(byte[] elementBytes, long timestampMsSinceEpoch) {
this.elementBytes = elementBytes;
this.timestampMsSinceEpoch = timestampMsSinceEpoch;
}
}

/**
* A message received from Pubsub.
*/
class IncomingMessage {
/**
* Underlying (encoded) element.
*/
public final byte[] elementBytes;

/**
* Timestamp for element (ms since epoch). Either Pubsub's processing time,
* or the custom timestamp associated with the message.
*/
public final long timestampMsSinceEpoch;

/**
* Timestamp (in system time) at which we requested the message (ms since epoch).
*/
public final long requestTimeMsSinceEpoch;

/**
* Id to pass back to Pubsub to acknowledge receipt of this message.
*/
public final String ackId;

/**
* Id to pass to the runner to distinguish this message from all others.
*/
public final byte[] recordId;

public IncomingMessage(
byte[] elementBytes,
long timestampMsSinceEpoch,
long requestTimeMsSinceEpoch,
String ackId,
byte[] recordId) {
this.elementBytes = elementBytes;
this.timestampMsSinceEpoch = timestampMsSinceEpoch;
this.requestTimeMsSinceEpoch = requestTimeMsSinceEpoch;
this.ackId = ackId;
this.recordId = recordId;
}
}

/**
* Gracefully close the underlying transport.
*/
@Override
void close();


/**
* Publish {@code outgoingMessages} to Pubsub {@code topic}. Return number of messages
* published.
*
* @throws IOException
*/
int publish(TopicPath topic, Iterable<OutgoingMessage> outgoingMessages) throws IOException;

/**
* Request the next batch of up to {@code batchSize} messages from {@code subscription}.
* Return the received messages, or empty collection if none were available. Does not
* wait for messages to arrive. Returned messages will record heir request time
* as {@code requestTimeMsSinceEpoch}.
*
* @throws IOException
*/
Collection<IncomingMessage> pull(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would seem pretty normal to have explicit Subscription class rather than passing the string name as a key each time. Is there a reason this cannot be done?

For example, if #pull did not take a subscription string, but pulled all available messages from all subscriptions in bulk, then I would understand.

Related - publishing and subscribing seem to be two separable activities for pub/sub but this client seems like roughly the (disjoint) union of the two.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This class a) captures common bookkeeping between unbounded pubsub source/sink and benchmarking code in a way which avoids making any grpc or protoc details public b) provides a hook for (as yet unwritten) mocks. Feels like scope creep for it to go any further.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this is blocking other work, I'm OK to wait on developing it into a richer client. What we really need is a thick Pubsub client, which I agree is a slightly larger scope that your immediate goals. No need to do everything up front.

An intermediate approach that leverages the type system to prevent silly errors and improves readability: a simple rich string wrapper to make it explicitly type-incompatible with String. Should be ~4 lines using AutoValue or ~10 without it. (What I mean is the Java version of the Haskell newtype Subscription = Subscription String, if that means anything to you.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Decided not to add dep on com.google.auto.value:auto-value though it is certainly tempting.

long requestTimeMsSinceEpoch, SubscriptionPath subscription, int batchSize)
throws IOException;

/**
* Acknowldege messages from {@code subscription} with {@code ackIds}.
*
* @throws IOException
*/
void acknowledge(SubscriptionPath subscription, Iterable<String> ackIds) throws IOException;

/**
* Modify the ack deadline for messages from {@code subscription} with {@code ackIds} to
* be {@code deadlineSeconds} from now.
*
* @throws IOException
*/
void modifyAckDeadline(
SubscriptionPath subscription, Iterable<String> ackIds,
int deadlineSeconds)
throws IOException;

/**
* Create {@code topic}.
*
* @throws IOException
*/
void createTopic(TopicPath topic) throws IOException;

/*
* Delete {@code topic}.
*
* @throws IOException
*/
void deleteTopic(TopicPath topic) throws IOException;

/**
* Return a list of topics for {@code project}.
*
* @throws IOException
*/
Collection<TopicPath> listTopics(ProjectPath project) throws IOException;

/**
* Create {@code subscription} to {@code topic}.
*
* @throws IOException
*/
void createSubscription(
TopicPath topic, SubscriptionPath subscription,
int ackDeadlineSeconds) throws IOException;

/**
* Delete {@code subscription}.
*
* @throws IOException
*/
void deleteSubscription(SubscriptionPath subscription) throws IOException;

/**
* Return a list of subscriptions for {@code topic} in {@code project}.
*
* @throws IOException
*/
Collection<SubscriptionPath> listSubscriptions(ProjectPath project, TopicPath topic)
throws IOException;
}
Loading