Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 69 additions & 23 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>io.cdap.plugin</groupId>
<artifactId>github-plugin</artifactId>
<name>GitHub plugin</name>
<version>1.0.0-SNAPSHOT</version>
<description>A collection of gitHub connectors and plugins</description>
<url>https://github.com/data-integrations/github</url>

<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
Expand All @@ -19,7 +17,6 @@
<comments>A business-friendly OSS license</comments>
</license>
</licenses>

<developers>
<developer>
<name>CDAP</name>
Expand All @@ -28,7 +25,6 @@
<organizationUrl>http://www.cdap.io</organizationUrl>
</developer>
</developers>

<repositories>
<repository>
<id>sonatype</id>
Expand All @@ -43,11 +39,9 @@
<url>https://repo.eclipse.org/content/groups/releases</url>
</repository>
</repositories>

<issueManagement>
<url>https://issues.cask.co/browse/CDAP</url>
</issueManagement>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!-- properties for script build step that creates the config files for the artifacts -->
Expand All @@ -64,9 +58,64 @@
<assertj.version>3.11.1</assertj.version>
<random-beans.version>3.9.0</random-beans.version>
<google-http-client-gson.version>1.32.1</google-http-client-gson.version>
<beam.version>2.32.0</beam.version>
</properties>

<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-bom</artifactId>
<version>${beam.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<!-- Beam -->
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-google-cloud-platform</artifactId>
<exclusions>
<exclusion>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-extensions-google-cloud-platform-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hadoop-file-system</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hadoop-format</artifactId>
</dependency>
<dependency>
<!-- Direct runner included for local development and testing. -->
<groupId>org.apache.beam</groupId>
<artifactId>beam-runners-direct-java</artifactId>
</dependency>
<dependency>
<groupId>io.cdap.cdap</groupId>
<artifactId>cdap-etl-api</artifactId>
Expand Down Expand Up @@ -195,7 +244,6 @@
<artifactId>google-http-client-gson</artifactId>
<version>${google-http-client-gson.version}</version>
</dependency>

<!-- tests -->
<dependency>
<groupId>io.cdap.cdap</groupId>
Expand Down Expand Up @@ -233,26 +281,25 @@
<version>${cdap.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.9.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.9.0</version>
<scope>test</scope>
</dependency>
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-core</artifactId>-->
<!-- <version>2.9.8</version>-->
<!-- <scope>test</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.fasterxml.jackson.core</groupId>-->
<!-- <artifactId>jackson-annotations</artifactId>-->
<!-- <version>2.9.0</version>-->
<!-- <scope>test</scope>-->
<!-- </dependency>-->
<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
<version>4.2.2</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
Expand Down Expand Up @@ -363,5 +410,4 @@
</plugin>
</plugins>
</build>

</project>
</project>
74 changes: 74 additions & 0 deletions src/main/java/io/cdap/plugin/github/source/BeamAdapter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package io.cdap.plugin.github.source;

import com.google.gson.Gson;
import io.cdap.plugin.github.source.batch.GithubBatchSourceConfig;
import io.cdap.plugin.github.source.batch.GithubFormatProvider;
import io.cdap.plugin.github.source.batch.GithubInputFormat;
import io.cdap.plugin.github.source.common.model.impl.Commit;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.KvCoder;
import org.apache.beam.sdk.coders.NullableCoder;
import org.apache.beam.sdk.coders.SerializableCoder;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.io.hadoop.WritableCoder;
import org.apache.beam.sdk.io.hadoop.format.HadoopFormatIO;
import org.apache.beam.sdk.transforms.MapElements;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.TypeDescriptors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;

public class BeamAdapter {


public static void main(String[] args) {
Gson gson = new Gson();
Configuration myHadoopConfiguration = new Configuration(false);

String authorizationToken = System.getenv("GITHUB_PAT");
String repoOwner = "ktttnv";
String repoName = "react-quiz";
String datasetName = "Commits";
String hostname = "https://api.github.com";

GithubBatchSourceConfig githubBatchSourceConfig = gson.fromJson(
String.format("{\"authorizationToken\":\"%s\",\"repoOwner\":\"%s\",\"repoName\":\"%s\",\"datasetName\":\"%s\",\"hostname\":\"%s\"}",
authorizationToken, repoOwner, repoName, datasetName, hostname),
GithubBatchSourceConfig.class);
GithubFormatProvider githubFormatProvider = new GithubFormatProvider(githubBatchSourceConfig);
myHadoopConfiguration.setClass("mapreduce.job.inputformat.class", GithubInputFormat.class,
InputFormat.class);
myHadoopConfiguration.setClass("key.class", Text.class, Object.class);
myHadoopConfiguration.setClass("value.class", Commit.class, Object.class);
myHadoopConfiguration.set(GithubFormatProvider.PROPERTY_CONFIG_JSON,
githubFormatProvider.getInputFormatConfiguration()
.get(GithubFormatProvider.PROPERTY_CONFIG_JSON));

Pipeline p = Pipeline.create();

// Read data only with Hadoop configuration.
PCollection<KV<Text, Commit>> pcol = p.apply("read",
HadoopFormatIO.<Text, Commit>read()
.withConfiguration(myHadoopConfiguration)
).setCoder(
KvCoder.of(NullableCoder.of(WritableCoder.of(Text.class)), SerializableCoder.of(Commit.class)));

PCollection<String> strings = pcol.apply(MapElements
.into(TypeDescriptors.strings())
.via(
((SerializableFunction<KV<Text, Commit>, String>) input -> {
Gson gson1 = new Gson();
return gson1.toJson(input.getValue());
})
)
)
.setCoder(StringUtf8Coder.of());

strings.apply(TextIO.write().to("./txt.txt"));
p.run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import io.cdap.plugin.github.source.common.GitHubRequestFactory;
import io.cdap.plugin.github.source.common.model.GitHubModel;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
Expand All @@ -35,7 +36,7 @@
/**
* RecordReader implementation, which reads {@link GitHubModel} instances from GitHub repository API for github.
*/
public class GithubRecordReader extends RecordReader<NullWritable, GitHubModel> {
public class GithubRecordReader extends RecordReader<Text, GitHubModel> {

private final GithubBatchSourceConfig config;
private final String link;
Expand All @@ -54,7 +55,7 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
HttpRequest httpRequest = GitHubRequestFactory.buildRequest(link, config.getAuthorizationToken());
HttpResponse response = httpRequest.execute();
Class<? extends GitHubModel[]> datasetClass = (Class<? extends GitHubModel[]>)
Array.newInstance(config.getDatasetClass(), 0).getClass();
Array.newInstance(config.getDatasetClass(), 0).getClass();
currentPage = Arrays.stream(response.parseAs(datasetClass)).iterator();
}

Expand All @@ -69,8 +70,8 @@ public boolean nextKeyValue() {
}

@Override
public NullWritable getCurrentKey() {
return null;
public Text getCurrentKey() {
return new Text("");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
*/
package io.cdap.plugin.github.source.common.model;

import java.io.Serializable;

/**
* Generic GitHub model interface for github.
*/
public interface GitHubModel {
public interface GitHubModel extends Serializable {
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import io.cdap.plugin.github.source.common.model.GitHubModel;
import io.cdap.plugin.github.source.common.model.impl.user.User;

import java.io.Serializable;
import java.util.List;

/**
Expand Down Expand Up @@ -48,7 +49,7 @@ public class Commit implements GitHubModel {
/**
* Commit.CommitData model
*/
public static class CommitData {
public static class CommitData implements Serializable {
@Key
private String url;
@Key
Expand All @@ -67,7 +68,7 @@ public static class CommitData {
/**
* Commit.CommitData.CommitUser model
*/
public static class CommitUser {
public static class CommitUser implements Serializable {
@Key
private String name;
@Key
Expand All @@ -79,7 +80,7 @@ public static class CommitUser {
/**
* Commit.CommitData.Tree model
*/
public static class Tree {
public static class Tree implements Serializable {
@Key
private String url;
@Key
Expand All @@ -89,7 +90,7 @@ public static class Tree {
/**
* Commit.CommitData.Verification model
*/
public static class Verification {
public static class Verification implements Serializable {
@Key
private Boolean verified;
@Key
Expand Down