diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
index b4a9ec6d7fa4..e211739a9412 100644
--- a/examples/java8/pom.xml
+++ b/examples/java8/pom.xml
@@ -49,6 +49,18 @@
         </configuration>
       </plugin>
 
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <systemPropertyVariables>
+            <beamTestPipelineOptions>
+            </beamTestPipelineOptions>
+          </systemPropertyVariables>
+        </configuration>
+      </plugin>
+
+
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index a53a386c2828..b29a5bf221c0 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -34,31 +34,6 @@
 
   <packaging>jar</packaging>
 
-  <profiles>
-    <profile>
-      <id>disable-runnable-on-service-tests</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>runnable-on-service-tests</id>
-                <configuration>
-                  <skip>true</skip>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-
   <dependencies>
     <!-- Flink dependencies -->
     <dependency>
@@ -87,7 +62,8 @@
       <artifactId>flink-avro_2.10</artifactId>
       <version>${flink.version}</version>
     </dependency>
-    <!-- Beam -->
+
+    <!--- Beam -->
     <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>java-sdk-all</artifactId>
@@ -111,6 +87,37 @@
     </dependency>
 
     <!-- Test scoped -->
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- Depend on test jar to scan for RunnableOnService tests -->
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>java-sdk-all</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-jdk14</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
     <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>java-examples-all</artifactId>
@@ -133,12 +140,6 @@
       <groupId>org.apache.flink</groupId>
       <artifactId>flink-test-utils_2.10</artifactId>
       <version>${flink.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <scope>test</scope>
     </dependency>
     <!-- Optional Pipeline Registration -->
     <dependency>
@@ -168,10 +169,60 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>runnable-on-service-tests</id>
+            <phase>integration-test</phase>
+            <goals>
+              <goal>test</goal>
+            </goals>
+            <configuration>
+              <groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
+              <parallel>none</parallel>
+              <failIfNoTests>true</failIfNoTests>
+              <dependenciesToScan>
+                <dependency>org.apache.beam:java-sdk-all</dependency>
+              </dependenciesToScan>
+              <systemPropertyVariables>
+                <beamTestPipelineOptions>
+                  [
+                    "--runner=org.apache.beam.runners.flink.TestFlinkPipelineRunner",
+                    "--streaming=false"
+                  ]
+                </beamTestPipelineOptions>
+              </systemPropertyVariables>
+            </configuration>
+          </execution>
+          <execution>
+            <id>streaming-runnable-on-service-tests</id>
+            <phase>integration-test</phase>
+            <goals>
+              <goal>test</goal>
+            </goals>
+            <configuration>
+              <skip>true</skip>
+              <groups>org.apache.beam.sdk.testing.RunnableOnService</groups>
+              <parallel>none</parallel>
+              <failIfNoTests>true</failIfNoTests>
+              <dependenciesToScan>
+                <dependency>org.apache.beam:java-sdk-all</dependency>
+              </dependenciesToScan>
+              <systemPropertyVariables>
+                <beamTestPipelineOptions>
+                  [
+                    "--runner=org.apache.beam.runners.flink.TestFlinkPipelineRunner",
+                    "--streaming=true"
+                  ]
+                </beamTestPipelineOptions>
+              </systemPropertyVariables>
+              <excludes>
+              </excludes>
+            </configuration>
+          </execution>
+        </executions>
       </plugin>
 
     </plugins>
-
   </build>
 
 </project>
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
index 3edf6f30c22d..b5ffced60d19 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineRunner.java
@@ -108,7 +108,7 @@ public FlinkRunnerResult run(Pipeline pipeline) {
     this.flinkJobEnv.translate(pipeline);
 
     LOG.info("Starting execution of Flink program.");
-    
+
     JobExecutionResult result;
     try {
       result = this.flinkJobEnv.executePipeline();
@@ -138,20 +138,6 @@ public FlinkPipelineOptions getPipelineOptions() {
     return options;
   }
 
-  /**
-   * Constructs a runner with default properties for testing.
-   *
-   * @return The newly created runner.
-   */
-  public static FlinkPipelineRunner createForTest(boolean streaming) {
-    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
-    // we use [auto] for testing since this will make it pick up the Testing
-    // ExecutionEnvironment
-    options.setFlinkMaster("[auto]");
-    options.setStreaming(streaming);
-    return new FlinkPipelineRunner(options);
-  }
-
   @Override
   public <Output extends POutput, Input extends PInput> Output apply(
       PTransform<Input, Output> transform, Input input) {
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
index cd99f4e65bce..ec61805a4ed0 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
@@ -41,7 +41,9 @@ private FlinkRunnerRegistrar() { }
   public static class Runner implements PipelineRunnerRegistrar {
     @Override
     public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
-      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(FlinkPipelineRunner.class);
+      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(
+          FlinkPipelineRunner.class,
+          TestFlinkPipelineRunner.class);
     }
   }
 
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
new file mode 100644
index 000000000000..139aebf9dd2b
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkPipelineRunner.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.PipelineOptionsValidator;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+
+import org.apache.flink.runtime.client.JobExecutionException;
+
+public class TestFlinkPipelineRunner extends PipelineRunner<FlinkRunnerResult> {
+
+  private FlinkPipelineRunner delegate;
+
+  private TestFlinkPipelineRunner(FlinkPipelineOptions options) {
+    // We use [auto] for testing since this will make it pick up the Testing ExecutionEnvironment
+    options.setFlinkMaster("[auto]");
+    this.delegate = FlinkPipelineRunner.fromOptions(options);
+  }
+
+  public static TestFlinkPipelineRunner fromOptions(PipelineOptions options) {
+    FlinkPipelineOptions flinkOptions = PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options);
+    return new TestFlinkPipelineRunner(flinkOptions);
+  }
+
+  public static TestFlinkPipelineRunner create(boolean streaming) {
+    FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
+    flinkOptions.setStreaming(streaming);
+    return TestFlinkPipelineRunner.fromOptions(flinkOptions);
+  }
+
+  @Override
+  public <OutputT extends POutput, InputT extends PInput>
+      OutputT apply(PTransform<InputT,OutputT> transform, InputT input) {
+    return delegate.apply(transform, input);
+  }
+
+  @Override
+  public FlinkRunnerResult run(Pipeline pipeline) {
+    try {
+      return delegate.run(pipeline);
+    } catch (RuntimeException e) {
+      // Special case hack to pull out assertion errors from PAssert; instead there should
+      // probably be a better story along the lines of UserCodeException.
+      if (e.getCause() != null
+          && e.getCause() instanceof JobExecutionException
+          && e.getCause().getCause() instanceof AssertionError) {
+          throw (AssertionError) e.getCause().getCause();
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  public PipelineOptions getPipelineOptions() {
+    return delegate.getPipelineOptions();
+  }
+}
+
+
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java
deleted file mode 100644
index 9c36c217df36..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/io/ConsoleIO.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.io;
-
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PDone;
-
-/**
- * Transform for printing the contents of a {@link org.apache.beam.sdk.values.PCollection}.
- * to standard output.
- *
- * This is Flink-specific and will only work when executed using the
- * {@link org.apache.beam.runners.flink.FlinkPipelineRunner}.
- */
-public class ConsoleIO {
-
-  /**
-   * A PTransform that writes a PCollection to a standard output.
-   */
-  public static class Write {
-
-    /**
-     * Returns a ConsoleIO.Write PTransform with a default step name.
-     */
-    public static Bound create() {
-      return new Bound();
-    }
-
-    /**
-     * Returns a ConsoleIO.Write PTransform with the given step name.
-     */
-    public static Bound named(String name) {
-      return new Bound().named(name);
-    }
-
-    /**
-     * A PTransform that writes a bounded PCollection to standard output.
-     */
-    public static class Bound extends PTransform<PCollection<?>, PDone> {
-      private static final long serialVersionUID = 0;
-
-      Bound() {
-        super("ConsoleIO.Write");
-      }
-
-      Bound(String name) {
-        super(name);
-      }
-
-      /**
-       * Returns a new ConsoleIO.Write PTransform that's like this one but with the given
-       * step
-       * name.  Does not modify this object.
-       */
-      public Bound named(String name) {
-        return new Bound(name);
-      }
-
-      @Override
-      public PDone apply(PCollection<?> input) {
-        return PDone.in(input.getPipeline());
-      }
-    }
-  }
-}
-
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
index 3d39e8182cab..69c02a22b36d 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchPipelineTranslator.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.runners.flink.translation;
 
+import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.runners.TransformTreeNode;
 import org.apache.beam.sdk.transforms.AppliedPTransform;
@@ -24,13 +25,15 @@
 import org.apache.beam.sdk.transforms.join.CoGroupByKey;
 import org.apache.beam.sdk.values.PValue;
 
+import org.apache.flink.api.java.DataSet;
 import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.io.DiscardingOutputFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * FlinkBatchPipelineTranslator knows how to translate Pipeline objects into Flink Jobs.
- * This is based on {@link org.apache.beam.runners.dataflow.DataflowPipelineTranslator}
+ * {@link Pipeline.PipelineVisitor} for executing a {@link Pipeline} as a
+ * Flink batch job.
  */
 public class FlinkBatchPipelineTranslator extends FlinkPipelineTranslator {
 
@@ -47,6 +50,17 @@ public FlinkBatchPipelineTranslator(ExecutionEnvironment env, PipelineOptions op
     this.batchContext = new FlinkBatchTranslationContext(env, options);
   }
 
+  @Override
+  @SuppressWarnings("rawtypes, unchecked")
+  public void translate(Pipeline pipeline) {
+    super.translate(pipeline);
+
+    // terminate dangling DataSets
+    for (DataSet<?> dataSet: batchContext.getDanglingDataSets().values()) {
+      dataSet.output(new DiscardingOutputFormat());
+    }
+  }
+
   // --------------------------------------------------------------------------------------------
   //  Pipeline Visitor Methods
   // --------------------------------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
index a03352efae15..83588076c46c 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTransformTranslators.java
@@ -17,81 +17,88 @@
  */
 package org.apache.beam.runners.flink.translation;
 
-import org.apache.beam.runners.flink.io.ConsoleIO;
-import org.apache.beam.runners.flink.translation.functions.FlinkCoGroupKeyedListAggregator;
-import org.apache.beam.runners.flink.translation.functions.FlinkCreateFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
 import org.apache.beam.runners.flink.translation.functions.FlinkDoFnFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkKeyedListAggregationFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingNonShuffleReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingPartialReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingReduceFunction;
 import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputDoFnFunction;
 import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputPruningFunction;
 import org.apache.beam.runners.flink.translation.functions.FlinkPartialReduceFunction;
 import org.apache.beam.runners.flink.translation.functions.FlinkReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.UnionCoder;
 import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
 import org.apache.beam.runners.flink.translation.types.KvCoderTypeInformation;
 import org.apache.beam.runners.flink.translation.wrappers.SinkOutputFormat;
 import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.io.AvroIO;
 import org.apache.beam.sdk.io.BoundedSource;
 import org.apache.beam.sdk.io.Read;
 import org.apache.beam.sdk.io.TextIO;
 import org.apache.beam.sdk.io.Write;
 import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.CombineFnBase;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.Flatten;
 import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGbkResultSchema;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
 import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.join.UnionCoder;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PValue;
 import org.apache.beam.sdk.values.TupleTag;
 
-import com.google.api.client.util.Maps;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
-import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.common.functions.FilterFunction;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.common.functions.MapFunction;
 import org.apache.flink.api.common.operators.Keys;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.java.DataSet;
-import org.apache.flink.api.java.io.AvroInputFormat;
 import org.apache.flink.api.java.io.AvroOutputFormat;
-import org.apache.flink.api.java.io.TextInputFormat;
-import org.apache.flink.api.java.operators.CoGroupOperator;
 import org.apache.flink.api.java.operators.DataSink;
 import org.apache.flink.api.java.operators.DataSource;
 import org.apache.flink.api.java.operators.FlatMapOperator;
 import org.apache.flink.api.java.operators.GroupCombineOperator;
 import org.apache.flink.api.java.operators.GroupReduceOperator;
 import org.apache.flink.api.java.operators.Grouping;
+import org.apache.flink.api.java.operators.MapOperator;
 import org.apache.flink.api.java.operators.MapPartitionOperator;
+import org.apache.flink.api.java.operators.SingleInputUdfOperator;
 import org.apache.flink.api.java.operators.UnsortedGrouping;
 import org.apache.flink.core.fs.Path;
+import org.apache.flink.util.Collector;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
 import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
 /**
- * Translators for transforming
- * Dataflow {@link org.apache.beam.sdk.transforms.PTransform}s to
- * Flink {@link org.apache.flink.api.java.DataSet}s
+ * Translators for transforming {@link PTransform PTransforms} to
+ * Flink {@link DataSet DataSets}.
  */
 public class FlinkBatchTransformTranslators {
 
@@ -100,113 +107,90 @@ public class FlinkBatchTransformTranslators {
   // --------------------------------------------------------------------------------------------
 
   @SuppressWarnings("rawtypes")
-  private static final Map<Class<? extends PTransform>, FlinkBatchPipelineTranslator.BatchTransformTranslator> TRANSLATORS = new HashMap<>();
+  private static final Map<
+      Class<? extends PTransform>,
+      FlinkBatchPipelineTranslator.BatchTransformTranslator> TRANSLATORS = new HashMap<>();
 
-  // register the known translators
   static {
     TRANSLATORS.put(View.CreatePCollectionView.class, new CreatePCollectionViewTranslatorBatch());
 
     TRANSLATORS.put(Combine.PerKey.class, new CombinePerKeyTranslatorBatch());
-    // we don't need this because we translate the Combine.PerKey directly
-    //TRANSLATORS.put(Combine.GroupedValues.class, new CombineGroupedValuesTranslator());
-
-    TRANSLATORS.put(Create.Values.class, new CreateTranslatorBatch());
+    TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslatorBatch());
 
     TRANSLATORS.put(Flatten.FlattenPCollectionList.class, new FlattenPCollectionTranslatorBatch());
 
-    // TODO we're currently ignoring windows here but that has to change in the future
-    TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslatorBatch());
+    TRANSLATORS.put(Window.Bound.class, new WindowBoundTranslatorBatch());
 
-    TRANSLATORS.put(ParDo.BoundMulti.class, new ParDoBoundMultiTranslatorBatch());
     TRANSLATORS.put(ParDo.Bound.class, new ParDoBoundTranslatorBatch());
-
-    TRANSLATORS.put(CoGroupByKey.class, new CoGroupByKeyTranslatorBatch());
-
-    TRANSLATORS.put(AvroIO.Read.Bound.class, new AvroIOReadTranslatorBatch());
-    TRANSLATORS.put(AvroIO.Write.Bound.class, new AvroIOWriteTranslatorBatch());
+    TRANSLATORS.put(ParDo.BoundMulti.class, new ParDoBoundMultiTranslatorBatch());
 
     TRANSLATORS.put(Read.Bounded.class, new ReadSourceTranslatorBatch());
-    TRANSLATORS.put(Write.Bound.class, new WriteSinkTranslatorBatch());
-
-    TRANSLATORS.put(TextIO.Read.Bound.class, new TextIOReadTranslatorBatch());
-    TRANSLATORS.put(TextIO.Write.Bound.class, new TextIOWriteTranslatorBatch());
-
-    // Flink-specific
-    TRANSLATORS.put(ConsoleIO.Write.Bound.class, new ConsoleIOWriteTranslatorBatch());
-
   }
 
 
-  public static FlinkBatchPipelineTranslator.BatchTransformTranslator<?> getTranslator(PTransform<?, ?> transform) {
+  public static FlinkBatchPipelineTranslator.BatchTransformTranslator<?> getTranslator(
+      PTransform<?, ?> transform) {
     return TRANSLATORS.get(transform.getClass());
   }
 
-  private static class ReadSourceTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Read.Bounded<T>> {
+  private static class ReadSourceTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Read.Bounded<T>> {
 
     @Override
     public void translateNode(Read.Bounded<T> transform, FlinkBatchTranslationContext context) {
       String name = transform.getName();
       BoundedSource<T> source = transform.getSource();
       PCollection<T> output = context.getOutput(transform);
-      Coder<T> coder = output.getCoder();
 
-      TypeInformation<T> typeInformation = context.getTypeInfo(output);
+      TypeInformation<WindowedValue<T>> typeInformation = context.getTypeInfo(output);
 
-      DataSource<T> dataSource = new DataSource<>(context.getExecutionEnvironment(),
-          new SourceInputFormat<>(source, context.getPipelineOptions()), typeInformation, name);
+      DataSource<WindowedValue<T>> dataSource = new DataSource<>(
+          context.getExecutionEnvironment(),
+          new SourceInputFormat<>(source, context.getPipelineOptions()),
+          typeInformation,
+          name);
 
       context.setOutputDataSet(output, dataSource);
     }
   }
 
-  private static class AvroIOReadTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<AvroIO.Read.Bound<T>> {
-    private static final Logger LOG = LoggerFactory.getLogger(AvroIOReadTranslatorBatch.class);
+  private static class WriteSinkTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Write.Bound<T>> {
 
     @Override
-    public void translateNode(AvroIO.Read.Bound<T> transform, FlinkBatchTranslationContext context) {
-      String path = transform.getFilepattern();
+    public void translateNode(Write.Bound<T> transform, FlinkBatchTranslationContext context) {
       String name = transform.getName();
-//      Schema schema = transform.getSchema();
-      PValue output = context.getOutput(transform);
-
-      TypeInformation<T> typeInformation = context.getTypeInfo(output);
-
-      // This is super hacky, but unfortunately we cannot get the type otherwise
-      Class<T> extractedAvroType;
-      try {
-        Field typeField = transform.getClass().getDeclaredField("type");
-        typeField.setAccessible(true);
-        @SuppressWarnings("unchecked")
-        Class<T> avroType = (Class<T>) typeField.get(transform);
-        extractedAvroType = avroType;
-      } catch (NoSuchFieldException | IllegalAccessException e) {
-        // we know that the field is there and it is accessible
-        throw new RuntimeException("Could not access type from AvroIO.Bound", e);
-      }
-
-      DataSource<T> source = new DataSource<>(context.getExecutionEnvironment(),
-          new AvroInputFormat<>(new Path(path), extractedAvroType),
-          typeInformation, name);
+      PValue input = context.getInput(transform);
+      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
 
-      context.setOutputDataSet(output, source);
+      inputDataSet.output(new SinkOutputFormat<>(transform, context.getPipelineOptions()))
+          .name(name);
     }
   }
 
-  private static class AvroIOWriteTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<AvroIO.Write.Bound<T>> {
+  private static class AvroIOWriteTranslatorBatch<T> implements
+      FlinkBatchPipelineTranslator.BatchTransformTranslator<AvroIO.Write.Bound<T>> {
     private static final Logger LOG = LoggerFactory.getLogger(AvroIOWriteTranslatorBatch.class);
 
+
     @Override
-    public void translateNode(AvroIO.Write.Bound<T> transform, FlinkBatchTranslationContext context) {
-      DataSet<T> inputDataSet = context.getInputDataSet(context.getInput(transform));
+    public void translateNode(
+        AvroIO.Write.Bound<T> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(context.getInput(transform));
+
       String filenamePrefix = transform.getFilenamePrefix();
       String filenameSuffix = transform.getFilenameSuffix();
       int numShards = transform.getNumShards();
       String shardNameTemplate = transform.getShardNameTemplate();
 
       // TODO: Implement these. We need Flink support for this.
-      LOG.warn("Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
+      LOG.warn(
+          "Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
           filenameSuffix);
-      LOG.warn("Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.", shardNameTemplate);
+      LOG.warn(
+          "Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.",
+          shardNameTemplate);
 
       // This is super hacky, but unfortunately we cannot get the type otherwise
       Class<T> extractedAvroType;
@@ -221,8 +205,17 @@ public void translateNode(AvroIO.Write.Bound<T> transform, FlinkBatchTranslation
         throw new RuntimeException("Could not access type from AvroIO.Bound", e);
       }
 
-      DataSink<T> dataSink = inputDataSet.output(new AvroOutputFormat<>(new Path
-          (filenamePrefix), extractedAvroType));
+      MapOperator<WindowedValue<T>, T> valueStream = inputDataSet.map(
+          new MapFunction<WindowedValue<T>, T>() {
+            @Override
+            public T map(WindowedValue<T> value) throws Exception {
+              return value.getValue();
+            }
+          }).returns(new CoderTypeInformation<>(context.getInput(transform).getCoder()));
+
+
+      DataSink<T> dataSink = valueStream.output(
+          new AvroOutputFormat<>(new Path(filenamePrefix), extractedAvroType));
 
       if (numShards > 0) {
         dataSink.setParallelism(numShards);
@@ -230,37 +223,16 @@ public void translateNode(AvroIO.Write.Bound<T> transform, FlinkBatchTranslation
     }
   }
 
-  private static class TextIOReadTranslatorBatch implements FlinkBatchPipelineTranslator.BatchTransformTranslator<TextIO.Read.Bound<String>> {
-    private static final Logger LOG = LoggerFactory.getLogger(TextIOReadTranslatorBatch.class);
-
-    @Override
-    public void translateNode(TextIO.Read.Bound<String> transform, FlinkBatchTranslationContext context) {
-      String path = transform.getFilepattern();
-      String name = transform.getName();
-
-      TextIO.CompressionType compressionType = transform.getCompressionType();
-      boolean needsValidation = transform.needsValidation();
-
-      // TODO: Implement these. We need Flink support for this.
-      LOG.warn("Translation of TextIO.CompressionType not yet supported. Is: {}.", compressionType);
-      LOG.warn("Translation of TextIO.Read.needsValidation not yet supported. Is: {}.", needsValidation);
-
-      PValue output = context.getOutput(transform);
-
-      TypeInformation<String> typeInformation = context.getTypeInfo(output);
-      DataSource<String> source = new DataSource<>(context.getExecutionEnvironment(), new TextInputFormat(new Path(path)), typeInformation, name);
-
-      context.setOutputDataSet(output, source);
-    }
-  }
-
-  private static class TextIOWriteTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<TextIO.Write.Bound<T>> {
+  private static class TextIOWriteTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<TextIO.Write.Bound<T>> {
     private static final Logger LOG = LoggerFactory.getLogger(TextIOWriteTranslatorBatch.class);
 
     @Override
-    public void translateNode(TextIO.Write.Bound<T> transform, FlinkBatchTranslationContext context) {
+    public void translateNode(
+        TextIO.Write.Bound<T> transform,
+        FlinkBatchTranslationContext context) {
       PValue input = context.getInput(transform);
-      DataSet<T> inputDataSet = context.getInputDataSet(input);
+      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
 
       String filenamePrefix = transform.getFilenamePrefix();
       String filenameSuffix = transform.getFilenameSuffix();
@@ -269,12 +241,25 @@ public void translateNode(TextIO.Write.Bound<T> transform, FlinkBatchTranslation
       String shardNameTemplate = transform.getShardNameTemplate();
 
       // TODO: Implement these. We need Flink support for this.
-      LOG.warn("Translation of TextIO.Write.needsValidation not yet supported. Is: {}.", needsValidation);
-      LOG.warn("Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.", filenameSuffix);
-      LOG.warn("Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.", shardNameTemplate);
+      LOG.warn(
+          "Translation of TextIO.Write.needsValidation not yet supported. Is: {}.",
+          needsValidation);
+      LOG.warn(
+          "Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
+          filenameSuffix);
+      LOG.warn(
+          "Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.",
+          shardNameTemplate);
 
-      //inputDataSet.print();
-      DataSink<T> dataSink = inputDataSet.writeAsText(filenamePrefix);
+      MapOperator<WindowedValue<T>, T> valueStream = inputDataSet.map(
+          new MapFunction<WindowedValue<T>, T>() {
+            @Override
+            public T map(WindowedValue<T> value) throws Exception {
+              return value.getValue();
+            }
+          }).returns(new CoderTypeInformation<>(transform.getCoder()));
+
+      DataSink<T> dataSink = valueStream.writeAsText(filenamePrefix);
 
       if (numShards > 0) {
         dataSink.setParallelism(numShards);
@@ -282,148 +267,414 @@ public void translateNode(TextIO.Write.Bound<T> transform, FlinkBatchTranslation
     }
   }
 
-  private static class ConsoleIOWriteTranslatorBatch implements FlinkBatchPipelineTranslator.BatchTransformTranslator<ConsoleIO.Write.Bound> {
+  private static class WindowBoundTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Window.Bound<T>> {
+
     @Override
-    public void translateNode(ConsoleIO.Write.Bound transform, FlinkBatchTranslationContext context) {
+    public void translateNode(Window.Bound<T> transform, FlinkBatchTranslationContext context) {
       PValue input = context.getInput(transform);
-      DataSet<?> inputDataSet = context.getInputDataSet(input);
-      inputDataSet.printOnTaskManager(transform.getName());
+
+      TypeInformation<WindowedValue<T>> resultTypeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
+
+      @SuppressWarnings("unchecked")
+      final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy =
+          (WindowingStrategy<T, ? extends BoundedWindow>)
+              context.getOutput(transform).getWindowingStrategy();
+
+      WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();
+
+      FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
+          new FlinkAssignWindows<>(windowFn);
+
+      DataSet<WindowedValue<T>> resultDataSet = inputDataSet
+          .flatMap(assignWindowsFunction)
+          .name(context.getOutput(transform).getName())
+          .returns(resultTypeInfo);
+
+      context.setOutputDataSet(context.getOutput(transform), resultDataSet);
     }
   }
 
-  private static class WriteSinkTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Write.Bound<T>> {
+  private static class GroupByKeyTranslatorBatch<K, InputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<GroupByKey<K, InputT>> {
 
     @Override
-    public void translateNode(Write.Bound<T> transform, FlinkBatchTranslationContext context) {
-      String name = transform.getName();
-      PValue input = context.getInput(transform);
-      DataSet<T> inputDataSet = context.getInputDataSet(input);
+    public void translateNode(
+        GroupByKey<K, InputT> transform,
+        FlinkBatchTranslationContext context) {
+
+      // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API
+      // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn
+
+      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
+
+      Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn =
+          new Concatenate<InputT>().asKeyedFn();
+
+      KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();
+
+      Coder<List<InputT>> accumulatorCoder;
+
+      try {
+        accumulatorCoder =
+            combineFn.getAccumulatorCoder(
+                context.getInput(transform).getPipeline().getCoderRegistry(),
+                inputCoder.getKeyCoder(),
+                inputCoder.getValueCoder());
+      } catch (CannotProvideCoderException e) {
+        throw new RuntimeException(e);
+      }
+
+      WindowingStrategy<?, ?> windowingStrategy =
+          context.getInput(transform).getWindowingStrategy();
+
+      TypeInformation<WindowedValue<KV<K, InputT>>> kvCoderTypeInformation =
+          new KvCoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  inputCoder,
+                  windowingStrategy.getWindowFn().windowCoder()));
+
+      TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
+          new KvCoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
+                  windowingStrategy.getWindowFn().windowCoder()));
+
+      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
+          new UnsortedGrouping<>(
+              inputDataSet,
+              new Keys.ExpressionKeys<>(new String[]{"key"},
+                  kvCoderTypeInformation));
+
+      FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction;
+      FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction;
+
+      if (windowingStrategy.getWindowFn().isNonMerging()) {
+        @SuppressWarnings("unchecked")
+        WindowingStrategy<?, BoundedWindow> boundedStrategy =
+            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
+
+        partialReduceFunction = new FlinkPartialReduceFunction<>(
+            combineFn,
+            boundedStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+
+        reduceFunction = new FlinkReduceFunction<>(
+            combineFn,
+            boundedStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+
+      } else {
+        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
+          throw new UnsupportedOperationException(
+              "Merging WindowFn with windows other than IntervalWindow are not supported.");
+        }
+
+        @SuppressWarnings("unchecked")
+        WindowingStrategy<?, IntervalWindow> intervalStrategy =
+            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
+
+        partialReduceFunction = new FlinkMergingPartialReduceFunction<>(
+            combineFn,
+            intervalStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+
+        reduceFunction = new FlinkMergingReduceFunction<>(
+            combineFn,
+            intervalStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+      }
+
+      // Partially GroupReduce the values into the intermediate format AccumT (combine)
+      GroupCombineOperator<
+          WindowedValue<KV<K, InputT>>,
+          WindowedValue<KV<K, List<InputT>>>> groupCombine =
+          new GroupCombineOperator<>(
+              inputGrouping,
+              partialReduceTypeInfo,
+              partialReduceFunction,
+              "GroupCombine: " + transform.getName());
+
+      Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
+          new UnsortedGrouping<>(
+              groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));
+
+      // Fully reduce the values and create output format VO
+      GroupReduceOperator<
+          WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet =
+          new GroupReduceOperator<>(
+              intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName());
+
+      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
 
-      inputDataSet.output(new SinkOutputFormat<>(transform, context.getPipelineOptions())).name(name);
     }
   }
 
   /**
-   * Translates a GroupByKey while ignoring window assignments. Current ignores windows.
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
+   *
+   * <p>For internal use to translate {@link GroupByKey}. For a large {@link PCollection} this
+   * is expected to crash!
+   *
+   * <p>This is copied from the dataflow runner code.
+   *
+   * @param <T> the type of elements to concatenate.
    */
-  private static class GroupByKeyTranslatorBatch<K, V> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<GroupByKey<K, V>> {
+  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<T>();
+    }
 
     @Override
-    public void translateNode(GroupByKey<K, V> transform, FlinkBatchTranslationContext context) {
-      DataSet<KV<K, V>> inputDataSet = context.getInputDataSet(context.getInput(transform));
-      GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> groupReduceFunction = new FlinkKeyedListAggregationFunction<>();
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
 
-      TypeInformation<KV<K, Iterable<V>>> typeInformation = context.getTypeInfo(context.getOutput(transform));
+    @Override
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
+      }
+      return result;
+    }
 
-      Grouping<KV<K, V>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet.getType()));
+    @Override
+    public List<T> extractOutput(List<T> accumulator) {
+      return accumulator;
+    }
 
-      GroupReduceOperator<KV<K, V>, KV<K, Iterable<V>>> outputDataSet =
-          new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
+    @Override
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
 
-      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+    @Override
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
     }
   }
 
-  private static class CombinePerKeyTranslatorBatch<K, VI, VA, VO> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Combine.PerKey<K, VI, VO>> {
+
+  private static class CombinePerKeyTranslatorBatch<K, InputT, AccumT, OutputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          Combine.PerKey<K, InputT, OutputT>> {
 
     @Override
-    public void translateNode(Combine.PerKey<K, VI, VO> transform, FlinkBatchTranslationContext context) {
-      DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(context.getInput(transform));
+    @SuppressWarnings("unchecked")
+    public void translateNode(
+        Combine.PerKey<K, InputT, OutputT> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
 
-      @SuppressWarnings("unchecked")
-      Combine.KeyedCombineFn<K, VI, VA, VO> keyedCombineFn = (Combine.KeyedCombineFn<K, VI, VA, VO>) transform.getFn();
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn =
+          (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn();
 
-      KvCoder<K, VI> inputCoder = (KvCoder<K, VI>) context.getInput(transform).getCoder();
+      KvCoder<K, InputT> inputCoder = (KvCoder<K, InputT>) context.getInput(transform).getCoder();
+
+      Coder<AccumT> accumulatorCoder;
 
-      Coder<VA> accumulatorCoder =
-          null;
       try {
-        accumulatorCoder = keyedCombineFn.getAccumulatorCoder(context.getInput(transform).getPipeline().getCoderRegistry(), inputCoder.getKeyCoder(), inputCoder.getValueCoder());
+        accumulatorCoder =
+            combineFn.getAccumulatorCoder(
+                context.getInput(transform).getPipeline().getCoderRegistry(),
+                inputCoder.getKeyCoder(),
+                inputCoder.getValueCoder());
       } catch (CannotProvideCoderException e) {
-        e.printStackTrace();
-        // TODO
+        throw new RuntimeException(e);
       }
 
-      TypeInformation<KV<K, VI>> kvCoderTypeInformation = new KvCoderTypeInformation<>(inputCoder);
-      TypeInformation<KV<K, VA>> partialReduceTypeInfo = new KvCoderTypeInformation<>(KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder));
+      WindowingStrategy<?, ?> windowingStrategy =
+          context.getInput(transform).getWindowingStrategy();
+
+      TypeInformation<WindowedValue<KV<K, InputT>>> kvCoderTypeInformation =
+          new KvCoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  inputCoder,
+                  windowingStrategy.getWindowFn().windowCoder()));
+
+      TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo =
+          new KvCoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
+                  windowingStrategy.getWindowFn().windowCoder()));
+
+      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
+          new UnsortedGrouping<>(
+              inputDataSet,
+              new Keys.ExpressionKeys<>(new String[]{"key"},
+                  kvCoderTypeInformation));
+
+      // construct a map from side input to WindowingStrategy so that
+      // the DoFn runner can map main-input windows to side input windows
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+      for (PCollectionView<?> sideInput: transform.getSideInputs()) {
+        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+      }
 
-      Grouping<KV<K, VI>> inputGrouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));
+      if (windowingStrategy.getWindowFn().isNonMerging()) {
+        WindowingStrategy<?, BoundedWindow> boundedStrategy =
+            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
+
+        FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction =
+            new FlinkPartialReduceFunction<>(
+                combineFn,
+                boundedStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions());
+
+        FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction =
+            new FlinkReduceFunction<>(
+                combineFn,
+                boundedStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions());
+
+        // Partially GroupReduce the values into the intermediate format AccumT (combine)
+        GroupCombineOperator<
+            WindowedValue<KV<K, InputT>>,
+            WindowedValue<KV<K, AccumT>>> groupCombine =
+            new GroupCombineOperator<>(
+                inputGrouping,
+                partialReduceTypeInfo,
+                partialReduceFunction,
+                "GroupCombine: " + transform.getName());
+
+        transformSideInputs(transform.getSideInputs(), groupCombine, context);
+
+        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
+            context.getTypeInfo(context.getOutput(transform));
+
+        Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping =
+            new UnsortedGrouping<>(
+                groupCombine,
+                new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));
+
+        // Fully reduce the values and create output format OutputT
+        GroupReduceOperator<
+            WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
+            new GroupReduceOperator<>(
+                intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());
+
+        transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+
+        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+
+      } else {
+        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
+          throw new UnsupportedOperationException(
+              "Merging WindowFn with windows other than IntervalWindow are not supported.");
+        }
 
-      FlinkPartialReduceFunction<K, VI, VA> partialReduceFunction = new FlinkPartialReduceFunction<>(keyedCombineFn);
+        // for merging windows we can't to a pre-shuffle combine step since
+        // elements would not be in their correct windows for side-input access
 
-      // Partially GroupReduce the values into the intermediate format VA (combine)
-      GroupCombineOperator<KV<K, VI>, KV<K, VA>> groupCombine =
-          new GroupCombineOperator<>(inputGrouping, partialReduceTypeInfo, partialReduceFunction,
-              "GroupCombine: " + transform.getName());
+        WindowingStrategy<?, IntervalWindow> intervalStrategy =
+            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
 
-      // Reduce fully to VO
-      GroupReduceFunction<KV<K, VA>, KV<K, VO>> reduceFunction = new FlinkReduceFunction<>(keyedCombineFn);
+        FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction =
+            new FlinkMergingNonShuffleReduceFunction<>(
+                combineFn,
+                intervalStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions());
 
-      TypeInformation<KV<K, VO>> reduceTypeInfo = context.getTypeInfo(context.getOutput(transform));
+        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
+            context.getTypeInfo(context.getOutput(transform));
 
-      Grouping<KV<K, VA>> intermediateGrouping = new UnsortedGrouping<>(groupCombine, new Keys.ExpressionKeys<>(new String[]{"key"}, groupCombine.getType()));
+        Grouping<WindowedValue<KV<K, InputT>>> grouping =
+            new UnsortedGrouping<>(
+                inputDataSet,
+                new Keys.ExpressionKeys<>(new String[]{"key"}, kvCoderTypeInformation));
+
+        // Fully reduce the values and create output format OutputT
+        GroupReduceOperator<
+            WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
+            new GroupReduceOperator<>(
+                grouping, reduceTypeInfo, reduceFunction, transform.getName());
+
+        transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+
+        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+      }
 
-      // Fully reduce the values and create output format VO
-      GroupReduceOperator<KV<K, VA>, KV<K, VO>> outputDataSet =
-          new GroupReduceOperator<>(intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());
 
-      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
     }
   }
 
-//  private static class CombineGroupedValuesTranslator<K, VI, VO> implements FlinkPipelineTranslator.TransformTranslator<Combine.GroupedValues<K, VI, VO>> {
-//
-//    @Override
-//    public void translateNode(Combine.GroupedValues<K, VI, VO> transform, TranslationContext context) {
-//      DataSet<KV<K, VI>> inputDataSet = context.getInputDataSet(transform.getInput());
-//
-//      Combine.KeyedCombineFn<? super K, ? super VI, ?, VO> keyedCombineFn = transform.getFn();
-//
-//      GroupReduceFunction<KV<K, VI>, KV<K, VO>> groupReduceFunction = new FlinkCombineFunction<>(keyedCombineFn);
-//
-//      TypeInformation<KV<K, VO>> typeInformation = context.getTypeInfo(transform.getOutput());
-//
-//      Grouping<KV<K, VI>> grouping = new UnsortedGrouping<>(inputDataSet, new Keys.ExpressionKeys<>(new String[]{""}, inputDataSet.getType()));
-//
-//      GroupReduceOperator<KV<K, VI>, KV<K, VO>> outputDataSet =
-//          new GroupReduceOperator<>(grouping, typeInformation, groupReduceFunction, transform.getName());
-//      context.setOutputDataSet(transform.getOutput(), outputDataSet);
-//    }
-//  }
-
-  private static class ParDoBoundTranslatorBatch<IN, OUT> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<ParDo.Bound<IN, OUT>> {
-    private static final Logger LOG = LoggerFactory.getLogger(ParDoBoundTranslatorBatch.class);
+  private static class ParDoBoundTranslatorBatch<InputT, OutputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          ParDo.Bound<InputT, OutputT>> {
 
     @Override
-    public void translateNode(ParDo.Bound<IN, OUT> transform, FlinkBatchTranslationContext context) {
-      DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));
+    public void translateNode(
+        ParDo.Bound<InputT, OutputT> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<InputT>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
 
-      final DoFn<IN, OUT> doFn = transform.getFn();
+      final DoFn<InputT, OutputT> doFn = transform.getFn();
 
-      TypeInformation<OUT> typeInformation = context.getTypeInfo(context.getOutput(transform));
+      TypeInformation<WindowedValue<OutputT>> typeInformation =
+          context.getTypeInfo(context.getOutput(transform));
 
-      FlinkDoFnFunction<IN, OUT> doFnWrapper = new FlinkDoFnFunction<>(doFn, context.getPipelineOptions());
-      MapPartitionOperator<IN, OUT> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());
+      List<PCollectionView<?>> sideInputs = transform.getSideInputs();
 
-      transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+      // construct a map from side input to WindowingStrategy so that
+      // the DoFn runner can map main-input windows to side input windows
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+      for (PCollectionView<?> sideInput: sideInputs) {
+        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+      }
+
+      FlinkDoFnFunction<InputT, OutputT> doFnWrapper =
+          new FlinkDoFnFunction<>(
+              doFn,
+              context.getOutput(transform).getWindowingStrategy(),
+              sideInputStrategies,
+              context.getPipelineOptions());
+
+      MapPartitionOperator<WindowedValue<InputT>, WindowedValue<OutputT>> outputDataSet =
+          new MapPartitionOperator<>(
+              inputDataSet,
+              typeInformation,
+              doFnWrapper,
+              transform.getName());
+
+      transformSideInputs(sideInputs, outputDataSet, context);
 
       context.setOutputDataSet(context.getOutput(transform), outputDataSet);
     }
   }
 
-  private static class ParDoBoundMultiTranslatorBatch<IN, OUT> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<ParDo.BoundMulti<IN, OUT>> {
-    private static final Logger LOG = LoggerFactory.getLogger(ParDoBoundMultiTranslatorBatch.class);
+  private static class ParDoBoundMultiTranslatorBatch<InputT, OutputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          ParDo.BoundMulti<InputT, OutputT>> {
 
     @Override
-    public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTranslationContext context) {
-      DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform));
+    public void translateNode(
+        ParDo.BoundMulti<InputT, OutputT> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<InputT>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
 
-      final DoFn<IN, OUT> doFn = transform.getFn();
+      final DoFn<InputT, OutputT> doFn = transform.getFn();
 
       Map<TupleTag<?>, PCollection<?>> outputs = context.getOutput(transform).getAll();
 
       Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
-      // put the main output at index 0, FlinkMultiOutputDoFnFunction also expects this
+      // put the main output at index 0, FlinkMultiOutputDoFnFunction  expects this
       outputMap.put(transform.getMainOutputTag(), 0);
       int count = 1;
       for (TupleTag<?> tag: outputs.keySet()) {
@@ -432,147 +683,166 @@ public void translateNode(ParDo.BoundMulti<IN, OUT> transform, FlinkBatchTransla
         }
       }
 
+      // assume that the windowing strategy is the same for all outputs
+      WindowingStrategy<?, ?> windowingStrategy = null;
+
       // collect all output Coders and create a UnionCoder for our tagged outputs
       List<Coder<?>> outputCoders = Lists.newArrayList();
       for (PCollection<?> coll: outputs.values()) {
         outputCoders.add(coll.getCoder());
+        windowingStrategy = coll.getWindowingStrategy();
+      }
+
+      if (windowingStrategy == null) {
+        throw new IllegalStateException("No outputs defined.");
       }
 
       UnionCoder unionCoder = UnionCoder.of(outputCoders);
 
-      @SuppressWarnings("unchecked")
-      TypeInformation<RawUnionValue> typeInformation = new CoderTypeInformation<>(unionCoder);
+      TypeInformation<WindowedValue<RawUnionValue>> typeInformation =
+          new CoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  unionCoder,
+                  windowingStrategy.getWindowFn().windowCoder()));
 
-      @SuppressWarnings("unchecked")
-      FlinkMultiOutputDoFnFunction<IN, OUT> doFnWrapper = new FlinkMultiOutputDoFnFunction(doFn, context.getPipelineOptions(), outputMap);
-      MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());
+      List<PCollectionView<?>> sideInputs = transform.getSideInputs();
 
-      transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+      // construct a map from side input to WindowingStrategy so that
+      // the DoFn runner can map main-input windows to side input windows
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+      for (PCollectionView<?> sideInput: sideInputs) {
+        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+      }
 
-      for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
-        TypeInformation<Object> outputType = context.getTypeInfo(output.getValue());
-        int outputTag = outputMap.get(output.getKey());
-        FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag);
-        FlatMapOperator<RawUnionValue, Object> pruningOperator = new
-            FlatMapOperator<>(outputDataSet, outputType,
-            pruningFunction, output.getValue().getName());
-        context.setOutputDataSet(output.getValue(), pruningOperator);
+      @SuppressWarnings("unchecked")
+      FlinkMultiOutputDoFnFunction<InputT, OutputT> doFnWrapper =
+          new FlinkMultiOutputDoFnFunction(
+              doFn,
+              windowingStrategy,
+              sideInputStrategies,
+              context.getPipelineOptions(),
+              outputMap);
+
+      MapPartitionOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>> taggedDataSet =
+          new MapPartitionOperator<>(
+              inputDataSet,
+              typeInformation,
+              doFnWrapper,
+              transform.getName());
+
+      transformSideInputs(sideInputs, taggedDataSet, context);
 
+      for (Map.Entry<TupleTag<?>, PCollection<?>> output: outputs.entrySet()) {
+        pruneOutput(
+            taggedDataSet,
+            context,
+            outputMap.get(output.getKey()),
+            (PCollection) output.getValue());
       }
     }
+
+    private <T> void pruneOutput(
+        MapPartitionOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>> taggedDataSet,
+        FlinkBatchTranslationContext context,
+        int integerTag,
+        PCollection<T> collection) {
+      TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection);
+
+      FlinkMultiOutputPruningFunction<T> pruningFunction =
+          new FlinkMultiOutputPruningFunction<>(integerTag);
+
+      FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator =
+          new FlatMapOperator<>(
+              taggedDataSet,
+              outputType,
+              pruningFunction,
+              collection.getName());
+
+      context.setOutputDataSet(collection, pruningOperator);
+    }
   }
 
-  private static class FlattenPCollectionTranslatorBatch<T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Flatten.FlattenPCollectionList<T>> {
+  private static class FlattenPCollectionTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          Flatten.FlattenPCollectionList<T>> {
 
     @Override
-    public void translateNode(Flatten.FlattenPCollectionList<T> transform, FlinkBatchTranslationContext context) {
+    @SuppressWarnings("unchecked")
+    public void translateNode(
+        Flatten.FlattenPCollectionList<T> transform,
+        FlinkBatchTranslationContext context) {
+
       List<PCollection<T>> allInputs = context.getInput(transform).getAll();
-      DataSet<T> result = null;
-      for(PCollection<T> collection : allInputs) {
-        DataSet<T> current = context.getInputDataSet(collection);
-        if (result == null) {
-          result = current;
-        } else {
-          result = result.union(current);
+      DataSet<WindowedValue<T>> result = null;
+
+      if (allInputs.isEmpty()) {
+
+        // create an empty dummy source to satisfy downstream operations
+        // we cannot create an empty source in Flink, therefore we have to
+        // add the flatMap that simply never forwards the single element
+        DataSource<String> dummySource =
+            context.getExecutionEnvironment().fromElements("dummy");
+        result = dummySource.flatMap(new FlatMapFunction<String, WindowedValue<T>>() {
+          @Override
+          public void flatMap(String s, Collector<WindowedValue<T>> collector) throws Exception {
+            // never return anything
+          }
+        }).returns(
+            new CoderTypeInformation<>(
+                WindowedValue.getFullCoder(
+                    (Coder<T>) VoidCoder.of(),
+                    GlobalWindow.Coder.INSTANCE)));
+      } else {
+        for (PCollection<T> collection : allInputs) {
+          DataSet<WindowedValue<T>> current = context.getInputDataSet(collection);
+          if (result == null) {
+            result = current;
+          } else {
+            result = result.union(current);
+          }
         }
       }
-      context.setOutputDataSet(context.getOutput(transform), result);
-    }
-  }
 
-  private static class CreatePCollectionViewTranslatorBatch<R, T> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<View.CreatePCollectionView<R, T>> {
-    @Override
-    public void translateNode(View.CreatePCollectionView<R, T> transform, FlinkBatchTranslationContext context) {
-      DataSet<T> inputDataSet = context.getInputDataSet(context.getInput(transform));
-      PCollectionView<T> input = transform.apply(null);
-      context.setSideInputDataSet(input, inputDataSet);
+      // insert a dummy filter, there seems to be a bug in Flink
+      // that produces duplicate elements after the union in some cases
+      // if we don't
+      result = result.filter(new FilterFunction<WindowedValue<T>>() {
+        @Override
+        public boolean filter(WindowedValue<T> tWindowedValue) throws Exception {
+          return true;
+        }
+      }).name("UnionFixFilter");
+      context.setOutputDataSet(context.getOutput(transform), result);
     }
   }
 
-  private static class CreateTranslatorBatch<OUT> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Create.Values<OUT>> {
+  private static class CreatePCollectionViewTranslatorBatch<ElemT, ViewT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          View.CreatePCollectionView<ElemT, ViewT>> {
 
     @Override
-    public void translateNode(Create.Values<OUT> transform, FlinkBatchTranslationContext context) {
-      TypeInformation<OUT> typeInformation = context.getOutputTypeInfo();
-      Iterable<OUT> elements = transform.getElements();
-
-      // we need to serialize the elements to byte arrays, since they might contain
-      // elements that are not serializable by Java serialization. We deserialize them
-      // in the FlatMap function using the Coder.
-
-      List<byte[]> serializedElements = Lists.newArrayList();
-      Coder<OUT> coder = context.getOutput(transform).getCoder();
-      for (OUT element: elements) {
-        ByteArrayOutputStream bao = new ByteArrayOutputStream();
-        try {
-          coder.encode(element, bao, Coder.Context.OUTER);
-          serializedElements.add(bao.toByteArray());
-        } catch (IOException e) {
-          throw new RuntimeException("Could not serialize Create elements using Coder: " + e);
-        }
-      }
+    public void translateNode(
+        View.CreatePCollectionView<ElemT, ViewT> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<ElemT>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
 
-      DataSet<Integer> initDataSet = context.getExecutionEnvironment().fromElements(1);
-      FlinkCreateFunction<Integer, OUT> flatMapFunction = new FlinkCreateFunction<>(serializedElements, coder);
-      FlatMapOperator<Integer, OUT> outputDataSet = new FlatMapOperator<>(initDataSet, typeInformation, flatMapFunction, transform.getName());
+      PCollectionView<ViewT> input = transform.getView();
 
-      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+      context.setSideInputDataSet(input, inputDataSet);
     }
   }
 
-  private static void transformSideInputs(List<PCollectionView<?>> sideInputs,
-                                          MapPartitionOperator<?, ?> outputDataSet,
-                                          FlinkBatchTranslationContext context) {
+  private static void transformSideInputs(
+      List<PCollectionView<?>> sideInputs,
+      SingleInputUdfOperator<?, ?, ?> outputDataSet,
+      FlinkBatchTranslationContext context) {
     // get corresponding Flink broadcast DataSets
-    for(PCollectionView<?> input : sideInputs) {
+    for (PCollectionView<?> input : sideInputs) {
       DataSet<?> broadcastSet = context.getSideInputDataSet(input);
       outputDataSet.withBroadcastSet(broadcastSet, input.getTagInternal().getId());
     }
   }
 
-// Disabled because it depends on a pending pull request to the DataFlowSDK
-  /**
-   * Special composite transform translator. Only called if the CoGroup is two dimensional.
-   * @param <K>
-   */
-  private static class CoGroupByKeyTranslatorBatch<K, V1, V2> implements FlinkBatchPipelineTranslator.BatchTransformTranslator<CoGroupByKey<K>> {
-
-    @Override
-    public void translateNode(CoGroupByKey<K> transform, FlinkBatchTranslationContext context) {
-      KeyedPCollectionTuple<K> input = context.getInput(transform);
-
-      CoGbkResultSchema schema = input.getCoGbkResultSchema();
-      List<KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?>> keyedCollections = input.getKeyedCollections();
-
-      KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection1 = keyedCollections.get(0);
-      KeyedPCollectionTuple.TaggedKeyedPCollection<K, ?> taggedCollection2 = keyedCollections.get(1);
-
-      TupleTag<?> tupleTag1 = taggedCollection1.getTupleTag();
-      TupleTag<?> tupleTag2 = taggedCollection2.getTupleTag();
-
-      PCollection<? extends KV<K, ?>> collection1 = taggedCollection1.getCollection();
-      PCollection<? extends KV<K, ?>> collection2 = taggedCollection2.getCollection();
-
-      DataSet<KV<K,V1>> inputDataSet1 = context.getInputDataSet(collection1);
-      DataSet<KV<K,V2>> inputDataSet2 = context.getInputDataSet(collection2);
-
-      TypeInformation<KV<K,CoGbkResult>> typeInfo = context.getOutputTypeInfo();
-
-      FlinkCoGroupKeyedListAggregator<K,V1,V2> aggregator = new FlinkCoGroupKeyedListAggregator<>(schema, tupleTag1, tupleTag2);
-
-      Keys.ExpressionKeys<KV<K,V1>> keySelector1 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet1.getType());
-      Keys.ExpressionKeys<KV<K,V2>> keySelector2 = new Keys.ExpressionKeys<>(new String[]{"key"}, inputDataSet2.getType());
-
-      DataSet<KV<K, CoGbkResult>> out = new CoGroupOperator<>(inputDataSet1, inputDataSet2,
-                                  keySelector1, keySelector2,
-                                                          aggregator, typeInfo, null, transform.getName());
-      context.setOutputDataSet(context.getOutput(transform), out);
-    }
-  }
-
-  // --------------------------------------------------------------------------------------------
-  //  Miscellaneous
-  // --------------------------------------------------------------------------------------------
-
   private FlinkBatchTransformTranslators() {}
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
index 71950cf216cb..ecc3a65c7965 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkBatchTranslationContext.java
@@ -18,31 +18,40 @@
 package org.apache.beam.runners.flink.translation;
 
 import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.types.KvCoderTypeInformation;
 import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.AppliedPTransform;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PInput;
 import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TypedPValue;
 
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.java.DataSet;
 import org.apache.flink.api.java.ExecutionEnvironment;
-import org.apache.flink.api.java.typeutils.GenericTypeInfo;
 
 import java.util.HashMap;
 import java.util.Map;
 
+/**
+ * Helper for {@link FlinkBatchPipelineTranslator} and translators in
+ * {@link FlinkBatchTransformTranslators}.
+ */
 public class FlinkBatchTranslationContext {
   
   private final Map<PValue, DataSet<?>> dataSets;
   private final Map<PCollectionView<?>, DataSet<?>> broadcastDataSets;
 
+  /**
+   * For keeping track about which DataSets don't have a successor. We
+   * need to terminate these with a discarding sink because the Beam
+   * model allows dangling operations.
+   */
+  private final Map<PValue, DataSet<?>> danglingDataSets;
+
   private final ExecutionEnvironment env;
   private final PipelineOptions options;
 
@@ -55,10 +64,16 @@ public FlinkBatchTranslationContext(ExecutionEnvironment env, PipelineOptions op
     this.options = options;
     this.dataSets = new HashMap<>();
     this.broadcastDataSets = new HashMap<>();
+
+    this.danglingDataSets = new HashMap<>();
   }
   
   // ------------------------------------------------------------------------
-  
+
+  public Map<PValue, DataSet<?>> getDanglingDataSets() {
+    return danglingDataSets;
+  }
+
   public ExecutionEnvironment getExecutionEnvironment() {
     return env;
   }
@@ -68,13 +83,16 @@ public PipelineOptions getPipelineOptions() {
   }
   
   @SuppressWarnings("unchecked")
-  public <T> DataSet<T> getInputDataSet(PValue value) {
-    return (DataSet<T>) dataSets.get(value);
+  public <T> DataSet<WindowedValue<T>> getInputDataSet(PValue value) {
+    // assume that the DataSet is used as an input if retrieved here
+    danglingDataSets.remove(value);
+    return (DataSet<WindowedValue<T>>) dataSets.get(value);
   }
 
-  public void setOutputDataSet(PValue value, DataSet<?> set) {
+  public <T> void setOutputDataSet(PValue value, DataSet<WindowedValue<T>> set) {
     if (!dataSets.containsKey(value)) {
       dataSets.put(value, set);
+      danglingDataSets.put(value, set);
     }
   }
 
@@ -91,40 +109,32 @@ public <T> DataSet<T> getSideInputDataSet(PCollectionView<?> value) {
     return (DataSet<T>) broadcastDataSets.get(value);
   }
 
-  public void setSideInputDataSet(PCollectionView<?> value, DataSet<?> set) {
+  public <ViewT, ElemT> void setSideInputDataSet(
+      PCollectionView<ViewT> value,
+      DataSet<WindowedValue<ElemT>> set) {
     if (!broadcastDataSets.containsKey(value)) {
       broadcastDataSets.put(value, set);
     }
   }
-  
-  @SuppressWarnings("unchecked")
-  public <T> TypeInformation<T> getTypeInfo(PInput output) {
-    if (output instanceof TypedPValue) {
-      Coder<?> outputCoder = ((TypedPValue) output).getCoder();
-      if (outputCoder instanceof KvCoder) {
-        return new KvCoderTypeInformation((KvCoder) outputCoder);
-      } else {
-        return new CoderTypeInformation(outputCoder);
-      }
-    }
-    return new GenericTypeInfo<>((Class<T>)Object.class);
-  }
 
-  public <T> TypeInformation<T> getInputTypeInfo() {
-    return getTypeInfo(currentTransform.getInput());
-  }
+  @SuppressWarnings("unchecked")
+  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
+    Coder<T> valueCoder = collection.getCoder();
+    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            valueCoder,
+            collection.getWindowingStrategy().getWindowFn().windowCoder());
 
-  public <T> TypeInformation<T> getOutputTypeInfo() {
-    return getTypeInfo((PValue) currentTransform.getOutput());
+    return new CoderTypeInformation<>(windowedValueCoder);
   }
 
   @SuppressWarnings("unchecked")
-  <I extends PInput> I getInput(PTransform<I, ?> transform) {
-    return (I) currentTransform.getInput();
+  <T extends PInput> T getInput(PTransform<T, ?> transform) {
+    return (T) currentTransform.getInput();
   }
 
   @SuppressWarnings("unchecked")
-  <O extends POutput> O getOutput(PTransform<?, O> transform) {
-    return (O) currentTransform.getOutput();
+  <T extends POutput> T getOutput(PTransform<?, T> transform) {
+    return (T) currentTransform.getOutput();
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
index 2778d5c3166e..b3fed99ad39f 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTransformTranslators.java
@@ -18,7 +18,6 @@
 
 package org.apache.beam.runners.flink.translation;
 
-import org.apache.beam.runners.flink.translation.functions.UnionCoder;
 import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
 import org.apache.beam.runners.flink.translation.types.FlinkCoder;
 import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
@@ -46,6 +45,7 @@
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.join.UnionCoder;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
@@ -229,29 +229,15 @@ public void translateNode(Read.Bounded<T> transform, FlinkStreamingTranslationCo
       BoundedSource<T> boundedSource = transform.getSource();
       PCollection<T> output = context.getOutput(transform);
 
-      Coder<T> defaultOutputCoder = boundedSource.getDefaultOutputCoder();
-      CoderTypeInformation<T> typeInfo = new CoderTypeInformation<>(defaultOutputCoder);
+      TypeInformation<WindowedValue<T>> typeInfo = context.getTypeInfo(output);
 
-      DataStream<T> source = context.getExecutionEnvironment().createInput(
+      DataStream<WindowedValue<T>> source = context.getExecutionEnvironment().createInput(
           new SourceInputFormat<>(
               boundedSource,
               context.getPipelineOptions()),
           typeInfo);
 
-      DataStream<WindowedValue<T>> windowedStream = source.flatMap(
-          new FlatMapFunction<T, WindowedValue<T>>() {
-            @Override
-            public void flatMap(T value, Collector<WindowedValue<T>> out) throws Exception {
-              out.collect(
-                  WindowedValue.of(value,
-                    Instant.now(),
-                    GlobalWindow.INSTANCE,
-                    PaneInfo.NO_FIRING));
-            }
-          })
-          .assignTimestampsAndWatermarks(new IngestionTimeExtractor<WindowedValue<T>>());
-
-      context.setOutputDataStream(output, windowedStream);
+      context.setOutputDataStream(output, source);
     }
   }
 
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
index 8bc73172405c..0cb80baa7cc8 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/FlinkStreamingTranslationContext.java
@@ -17,21 +17,30 @@
  */
 package org.apache.beam.runners.flink.translation;
 
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.AppliedPTransform;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PInput;
 import org.apache.beam.sdk.values.POutput;
 import org.apache.beam.sdk.values.PValue;
 
 import com.google.common.base.Preconditions;
 
+import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.streaming.api.datastream.DataStream;
 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 
 import java.util.HashMap;
 import java.util.Map;
 
+/**
+ * Helper for keeping track of which {@link DataStream DataStreams} map
+ * to which {@link PTransform PTransforms}.
+ */
 public class FlinkStreamingTranslationContext {
 
   private final StreamExecutionEnvironment env;
@@ -80,12 +89,24 @@ public void setCurrentTransform(AppliedPTransform<?, ?, ?> currentTransform) {
   }
 
   @SuppressWarnings("unchecked")
-  public <I extends PInput> I getInput(PTransform<I, ?> transform) {
-    return (I) currentTransform.getInput();
+  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
+    Coder<T> valueCoder = collection.getCoder();
+    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            valueCoder,
+            collection.getWindowingStrategy().getWindowFn().windowCoder());
+
+    return new CoderTypeInformation<>(windowedValueCoder);
+  }
+
+
+  @SuppressWarnings("unchecked")
+  public <T extends PInput> T getInput(PTransform<T, ?> transform) {
+    return (T) currentTransform.getInput();
   }
 
   @SuppressWarnings("unchecked")
-  public <O extends POutput> O getOutput(PTransform<?, O> transform) {
-    return (O) currentTransform.getOutput();
+  public <T extends POutput> T getOutput(PTransform<?, T> transform) {
+    return (T) currentTransform.getOutput();
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
new file mode 100644
index 000000000000..7ea8c202f9d5
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * {@link org.apache.beam.sdk.transforms.windowing.WindowFn.AssignContext} for
+ * Flink functions.
+ */
+class FlinkAssignContext<InputT, W extends BoundedWindow>
+    extends WindowFn<InputT, W>.AssignContext {
+  private final WindowedValue<InputT> value;
+
+  FlinkAssignContext(WindowFn<InputT, W> fn, WindowedValue<InputT> value) {
+    fn.super();
+    this.value = value;
+  }
+
+  @Override
+  public InputT element() {
+    return value.getValue();
+  }
+
+  @Override
+  public Instant timestamp() {
+    return value.getTimestamp();
+  }
+
+  @Override
+  public Collection<? extends BoundedWindow> windows() {
+    return value.getWindows();
+  }
+
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
new file mode 100644
index 000000000000..e07e49a2f060
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+
+import java.util.Collection;
+
+/**
+ * Flink {@link FlatMapFunction} for implementing
+ * {@link org.apache.beam.sdk.transforms.windowing.Window.Bound}.
+ */
+public class FlinkAssignWindows<T, W extends BoundedWindow>
+    implements FlatMapFunction<WindowedValue<T>, WindowedValue<T>> {
+
+  private final WindowFn<T, W> windowFn;
+
+  public FlinkAssignWindows(WindowFn<T, W> windowFn) {
+    this.windowFn = windowFn;
+  }
+
+  @Override
+  public void flatMap(
+      WindowedValue<T> input, Collector<WindowedValue<T>> collector) throws Exception {
+    Collection<W> windows = windowFn.assignWindows(new FlinkAssignContext<>(windowFn, input));
+    for (W window: windows) {
+      collector.collect(
+          WindowedValue.of(input.getValue(), input.getTimestamp(), window, input.getPane()));
+    }
+  }
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java
deleted file mode 100644
index 8e7cdd75ca48..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCoGroupKeyedListAggregator.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGbkResultSchema;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.TupleTag;
-
-import org.apache.flink.api.common.functions.CoGroupFunction;
-import org.apache.flink.util.Collector;
-
-import java.util.ArrayList;
-import java.util.List;
-
-
-public class FlinkCoGroupKeyedListAggregator<K,V1,V2> implements CoGroupFunction<KV<K,V1>, KV<K,V2>, KV<K, CoGbkResult>>{
-
-  private CoGbkResultSchema schema;
-  private TupleTag<?> tupleTag1;
-  private TupleTag<?> tupleTag2;
-
-  public FlinkCoGroupKeyedListAggregator(CoGbkResultSchema schema, TupleTag<?> tupleTag1, TupleTag<?> tupleTag2) {
-    this.schema = schema;
-    this.tupleTag1 = tupleTag1;
-    this.tupleTag2 = tupleTag2;
-  }
-
-  @Override
-  public void coGroup(Iterable<KV<K,V1>> first, Iterable<KV<K,V2>> second, Collector<KV<K, CoGbkResult>> out) throws Exception {
-    K k = null;
-    List<RawUnionValue> result = new ArrayList<>();
-    int index1 = schema.getIndex(tupleTag1);
-    for (KV<K,?> entry : first) {
-      k = entry.getKey();
-      result.add(new RawUnionValue(index1, entry.getValue()));
-    }
-    int index2 = schema.getIndex(tupleTag2);
-    for (KV<K,?> entry : second) {
-      k = entry.getKey();
-      result.add(new RawUnionValue(index2, entry.getValue()));
-    }
-    out.collect(KV.of(k, new CoGbkResult(schema, result)));
-  }
-}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java
deleted file mode 100644
index e5ac7482cfcb..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkCreateFunction.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.runners.flink.translation.types.VoidCoderTypeSerializer;
-import org.apache.beam.sdk.coders.Coder;
-
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.util.Collector;
-
-import java.io.ByteArrayInputStream;
-import java.util.List;
-
-/**
- * This is a hack for transforming a {@link org.apache.beam.sdk.transforms.Create}
- * operation. Flink does not allow {@code null} in it's equivalent operation:
- * {@link org.apache.flink.api.java.ExecutionEnvironment#fromElements(Object[])}. Therefore
- * we use a DataSource with one dummy element and output the elements of the Create operation
- * inside this FlatMap.
- */
-public class FlinkCreateFunction<IN, OUT> implements FlatMapFunction<IN, OUT> {
-
-  private final List<byte[]> elements;
-  private final Coder<OUT> coder;
-
-  public FlinkCreateFunction(List<byte[]> elements, Coder<OUT> coder) {
-    this.elements = elements;
-    this.coder = coder;
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void flatMap(IN value, Collector<OUT> out) throws Exception {
-
-    for (byte[] element : elements) {
-      ByteArrayInputStream bai = new ByteArrayInputStream(element);
-      OUT outValue = coder.decode(bai, Coder.Context.OUTER);
-      if (outValue == null) {
-        // TODO Flink doesn't allow null values in records
-        out.collect((OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE);
-      } else {
-        out.collect(outValue);
-      }
-    }
-
-    out.close();
-  }
-}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
index 3566f7e1070e..89243a3ede28 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
@@ -18,173 +18,85 @@
 package org.apache.beam.runners.flink.translation.functions;
 
 import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
-import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
 import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.TimerInternals;
 import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingInternals;
-import org.apache.beam.sdk.util.state.StateInternals;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.common.collect.ImmutableList;
 
 import org.apache.flink.api.common.functions.RichMapPartitionFunction;
 import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
+import java.util.Map;
 
 /**
  * Encapsulates a {@link org.apache.beam.sdk.transforms.DoFn}
  * inside a Flink {@link org.apache.flink.api.common.functions.RichMapPartitionFunction}.
  */
-public class FlinkDoFnFunction<IN, OUT> extends RichMapPartitionFunction<IN, OUT> {
+public class FlinkDoFnFunction<InputT, OutputT>
+    extends RichMapPartitionFunction<WindowedValue<InputT>, WindowedValue<OutputT>> {
 
-  private final DoFn<IN, OUT> doFn;
+  private final DoFn<InputT, OutputT> doFn;
   private final SerializedPipelineOptions serializedOptions;
 
-  public FlinkDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options) {
-    this.doFn = doFn;
-    this.serializedOptions = new SerializedPipelineOptions(options);
-  }
-
-  @Override
-  public void mapPartition(Iterable<IN> values, Collector<OUT> out) throws Exception {
-    ProcessContext context = new ProcessContext(doFn, out);
-    this.doFn.startBundle(context);
-    for (IN value : values) {
-      context.inValue = value;
-      doFn.processElement(context);
-    }
-    this.doFn.finishBundle(context);
-  }
-  
-  private class ProcessContext extends DoFn<IN, OUT>.ProcessContext {
-
-    IN inValue;
-    Collector<OUT> outCollector;
-
-    public ProcessContext(DoFn<IN, OUT> fn, Collector<OUT> outCollector) {
-      fn.super();
-      super.setupDelegateAggregators();
-      this.outCollector = outCollector;
-    }
-
-    @Override
-    public IN element() {
-      return this.inValue;
-    }
-
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
 
-    @Override
-    public Instant timestamp() {
-      return Instant.now();
-    }
+  private final boolean requiresWindowAccess;
+  private final boolean hasSideInputs;
 
-    @Override
-    public BoundedWindow window() {
-      return GlobalWindow.INSTANCE;
-    }
-
-    @Override
-    public PaneInfo pane() {
-      return PaneInfo.NO_FIRING;
-    }
+  private final WindowingStrategy<?, ?> windowingStrategy;
 
-    @Override
-    public WindowingInternals<IN, OUT> windowingInternals() {
-      return new WindowingInternals<IN, OUT>() {
-        @Override
-        public StateInternals stateInternals() {
-          return null;
-        }
-
-        @Override
-        public void outputWindowedValue(OUT output, Instant timestamp, Collection<? extends BoundedWindow> windows, PaneInfo pane) {
-
-        }
-
-        @Override
-        public TimerInternals timerInternals() {
-          return null;
-        }
+  public FlinkDoFnFunction(
+      DoFn<InputT, OutputT> doFn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions options) {
+    this.doFn = doFn;
+    this.sideInputs = sideInputs;
+    this.serializedOptions = new SerializedPipelineOptions(options);
+    this.windowingStrategy = windowingStrategy;
 
-        @Override
-        public Collection<? extends BoundedWindow> windows() {
-          return ImmutableList.of(GlobalWindow.INSTANCE);
-        }
+    this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+    this.hasSideInputs = !sideInputs.isEmpty();
+  }
 
-        @Override
-        public PaneInfo pane() {
-          return PaneInfo.NO_FIRING;
-        }
+  @Override
+  public void mapPartition(
+      Iterable<WindowedValue<InputT>> values,
+      Collector<WindowedValue<OutputT>> out) throws Exception {
+
+    FlinkProcessContext<InputT, OutputT> context = new FlinkProcessContext<>(
+        serializedOptions.getPipelineOptions(),
+        getRuntimeContext(),
+        doFn,
+        windowingStrategy,
+        out,
+        sideInputs);
 
-        @Override
-        public <T> void writePCollectionViewData(TupleTag<?> tag, Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
-        }
+    this.doFn.startBundle(context);
 
-        @Override
-        public <T> T sideInput(PCollectionView<T> view, BoundedWindow mainInputWindow) {
-          throw new RuntimeException("sideInput() not implemented.");
+    if (!requiresWindowAccess || hasSideInputs) {
+      // we don't need to explode the windows
+      for (WindowedValue<InputT> value : values) {
+        context = context.forWindowedValue(value);
+        doFn.processElement(context);
+      }
+    } else {
+      // we need to explode the windows because we have per-window
+      // side inputs and window access also only works if an element
+      // is in only one window
+      for (WindowedValue<InputT> value : values) {
+        for (WindowedValue<InputT> explodedValue: value.explodeWindows()) {
+          context = context.forWindowedValue(value);
+          doFn.processElement(context);
         }
-      };
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return serializedOptions.getPipelineOptions();
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      List<T> sideInput = getRuntimeContext().getBroadcastVariable(view.getTagInternal().getId());
-      List<WindowedValue<?>> windowedValueList = new ArrayList<>(sideInput.size());
-      for (T input : sideInput) {
-        windowedValueList.add(WindowedValue.of(input, Instant.now(), ImmutableList.of(GlobalWindow.INSTANCE), pane()));
       }
-      return view.fromIterableInternal(windowedValueList);
     }
 
-    @Override
-    public void output(OUT output) {
-      outCollector.collect(output);
-    }
-
-    @Override
-    public void outputWithTimestamp(OUT output, Instant timestamp) {
-      // not FLink's way, just output normally
-      output(output);
-    }
-
-    @Override
-    public <T> void sideOutput(TupleTag<T> tag, T output) {
-      // ignore the side output, this can happen when a user does not register
-      // side outputs but then outputs using a freshly created TupleTag.
-    }
-
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      sideOutput(tag, output);
-    }
-
-    @Override
-    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
-      SerializableFnAggregatorWrapper<AggInputT, AggOutputT> wrapper = new SerializableFnAggregatorWrapper<>(combiner);
-      getRuntimeContext().addAccumulator(name, wrapper);
-      return wrapper;
-    }
-
-
+    // set the windowed value to null so that the logic
+    // or outputting in finishBundle kicks in
+    context = context.forWindowedValue(null);
+    this.doFn.finishBundle(context);
   }
+
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java
deleted file mode 100644
index 7c7084db287c..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkKeyedListAggregationFunction.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.sdk.values.KV;
-
-import org.apache.flink.api.common.functions.GroupReduceFunction;
-import org.apache.flink.util.Collector;
-
-import java.util.Iterator;
-
-/**
- * Flink {@link org.apache.flink.api.common.functions.GroupReduceFunction} for executing a
- * {@link org.apache.beam.sdk.transforms.GroupByKey} operation. This reads the input
- * {@link org.apache.beam.sdk.values.KV} elements, extracts the key and collects
- * the values in a {@code List}.
- */
-public class FlinkKeyedListAggregationFunction<K,V> implements GroupReduceFunction<KV<K, V>, KV<K, Iterable<V>>> {
-
-  @Override
-  public void reduce(Iterable<KV<K, V>> values, Collector<KV<K, Iterable<V>>> out) throws Exception {
-    Iterator<KV<K, V>> it = values.iterator();
-    KV<K, V> first = it.next();
-    Iterable<V> passThrough = new PassThroughIterable<>(first, it);
-    out.collect(KV.of(first.getKey(), passThrough));
-  }
-
-  private static class PassThroughIterable<K, V> implements Iterable<V>, Iterator<V>  {
-    private KV<K, V> first;
-    private Iterator<KV<K, V>> iterator;
-
-    public PassThroughIterable(KV<K, V> first, Iterator<KV<K, V>> iterator) {
-      this.first = first;
-      this.iterator = iterator;
-    }
-
-    @Override
-    public Iterator<V> iterator() {
-      return this;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return first != null || iterator.hasNext();
-    }
-
-    @Override
-    public V next() {
-      if (first != null) {
-        V result = first.getValue();
-        first = null;
-        return result;
-      } else {
-        return iterator.next().getValue();
-      }
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException("Cannot remove elements from input.");
-    }
-  }
-}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
new file mode 100644
index 000000000000..9074d72e0e15
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Special version of {@link FlinkReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ *
+ * <p>This is different from the pair of function for the non-merging windows case
+ * in that we cannot do combining before the shuffle because elements would not
+ * yet be in their correct windows for side-input access.
+ */
+public class FlinkMergingNonShuffleReduceFunction<
+      K, InputT, AccumT, OutputT, W extends IntervalWindow>
+    extends RichGroupReduceFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> {
+
+  private final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn;
+
+  private final DoFn<KV<K, InputT>, KV<K, OutputT>> doFn;
+
+  private final WindowingStrategy<?, W> windowingStrategy;
+
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  private final SerializedPipelineOptions serializedOptions;
+
+  public FlinkMergingNonShuffleReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+
+    this.combineFn = keyedCombineFn;
+
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+    // dummy DoFn because we need one for ProcessContext
+    this.doFn = new DoFn<KV<K, InputT>, KV<K, OutputT>>() {
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+
+      }
+    };
+  }
+
+  @Override
+  public void reduce(
+      Iterable<WindowedValue<KV<K, InputT>>> elements,
+      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+    FlinkProcessContext<KV<K, InputT>, KV<K, OutputT>> processContext =
+        new FlinkProcessContext<>(
+            serializedOptions.getPipelineOptions(),
+            getRuntimeContext(),
+            doFn,
+            windowingStrategy,
+            out,
+            sideInputs);
+
+    PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, InputT>> inputValue: elements) {
+      for (WindowedValue<KV<K, InputT>> exploded: inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, InputT>> o1,
+          WindowedValue<KV<K, InputT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // merge windows, we have to do it in an extra pre-processing step and
+    // can't do it as we go since the window of early elements would not
+    // be correct when calling the CombineFn
+    mergeWindow(sortedInput);
+
+    // iterate over the elements that are sorted by window timestamp
+    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+    // create accumulator using the first elements key
+    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+    InputT firstValue = currentValue.getValue().getValue();
+    processContext = processContext.forWindowedValue(currentValue);
+    AccumT accumulator = combineFnRunner.createAccumulator(key, processContext);
+    accumulator = combineFnRunner.addInput(key, accumulator, firstValue, processContext);
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn
+    Instant windowTimestamp =
+        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+      IntervalWindow nextWindow = (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (currentWindow.equals(nextWindow)) {
+        // continue accumulating and merge windows
+
+        InputT value = nextValue.getValue().getValue();
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+
+        windowTimestamp = outputTimeFn.combine(
+            windowTimestamp,
+            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+                windowTimestamp,
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        currentWindow = nextWindow;
+        InputT value = nextValue.getValue().getValue();
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.createAccumulator(key, processContext);
+        accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+      }
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+            windowTimestamp,
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+
+  /**
+   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+   * This replaces windows in the input list.
+   */
+  private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
+    int currentStart = 0;
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+    for (int i = 1; i < elements.size(); i++) {
+      WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+      if (currentWindow.intersects(nextWindow)) {
+        // we continue
+        currentWindow = currentWindow.span(nextWindow);
+      } else {
+        // retrofit the merged window to all windows up to "currentStart"
+        for (int j = i - 1; j >= currentStart; j--) {
+          WindowedValue<KV<K, InputT>> value = elements.get(j);
+          elements.set(
+              j,
+              WindowedValue.of(
+                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+        }
+        currentStart = i;
+        currentWindow = nextWindow;
+      }
+    }
+    if (currentStart < elements.size() - 1) {
+      // we have to retrofit the last batch
+      for (int j = elements.size() - 1; j >= currentStart; j--) {
+        WindowedValue<KV<K, InputT>> value = elements.get(j);
+        elements.set(
+            j,
+            WindowedValue.of(
+                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+      }
+    }
+  }
+
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
new file mode 100644
index 000000000000..c12e4204a3f0
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Special version of {@link FlinkPartialReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ */
+public class FlinkMergingPartialReduceFunction<K, InputT, AccumT, W extends IntervalWindow>
+  extends FlinkPartialReduceFunction<K, InputT, AccumT, W> {
+
+  public FlinkMergingPartialReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+    super(combineFn, windowingStrategy, sideInputs, pipelineOptions);
+  }
+
+  @Override
+  public void combine(
+      Iterable<WindowedValue<KV<K, InputT>>> elements,
+      Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
+
+    FlinkProcessContext<KV<K, InputT>, KV<K, AccumT>> processContext =
+        new FlinkProcessContext<>(
+            serializedOptions.getPipelineOptions(),
+            getRuntimeContext(),
+            doFn,
+            windowingStrategy,
+            out,
+            sideInputs);
+
+    PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, InputT>> inputValue: elements) {
+      for (WindowedValue<KV<K, InputT>> exploded: inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, InputT>> o1,
+          WindowedValue<KV<K, InputT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // merge windows, we have to do it in an extra pre-processing step and
+    // can't do it as we go since the window of early elements would not
+    // be correct when calling the CombineFn
+    mergeWindow(sortedInput);
+
+    // iterate over the elements that are sorted by window timestamp
+    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+    // create accumulator using the first elements key
+    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+    InputT firstValue = currentValue.getValue().getValue();
+    processContext = processContext.forWindowedValue(currentValue);
+    AccumT accumulator = combineFnRunner.createAccumulator(key, processContext);
+    accumulator = combineFnRunner.addInput(key, accumulator, firstValue, processContext);
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn
+    Instant windowTimestamp =
+        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+      IntervalWindow nextWindow = (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (currentWindow.equals(nextWindow)) {
+        // continue accumulating and merge windows
+
+        InputT value = nextValue.getValue().getValue();
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+
+        windowTimestamp = outputTimeFn.combine(
+            windowTimestamp,
+            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, accumulator),
+                windowTimestamp,
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        currentWindow = nextWindow;
+        InputT value = nextValue.getValue().getValue();
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.createAccumulator(key, processContext);
+        accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+      }
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, accumulator),
+            windowTimestamp,
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+
+  /**
+   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+   * This replaces windows in the input list.
+   */
+  private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
+    int currentStart = 0;
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+    for (int i = 1; i < elements.size(); i++) {
+      WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+      if (currentWindow.intersects(nextWindow)) {
+        // we continue
+        currentWindow = currentWindow.span(nextWindow);
+      } else {
+        // retrofit the merged window to all windows up to "currentStart"
+        for (int j = i - 1; j >= currentStart; j--) {
+          WindowedValue<KV<K, InputT>> value = elements.get(j);
+          elements.set(
+              j,
+              WindowedValue.of(
+                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+        }
+        currentStart = i;
+        currentWindow = nextWindow;
+      }
+    }
+    if (currentStart < elements.size() - 1) {
+      // we have to retrofit the last batch
+      for (int j = elements.size() - 1; j >= currentStart; j--) {
+        WindowedValue<KV<K, InputT>> value = elements.get(j);
+        elements.set(
+            j,
+            WindowedValue.of(
+                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+      }
+    }
+  }
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
new file mode 100644
index 000000000000..07d1c9741533
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Special version of {@link FlinkReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ */
+public class FlinkMergingReduceFunction<K, AccumT, OutputT, W extends IntervalWindow>
+    extends FlinkReduceFunction<K, AccumT, OutputT, W> {
+
+  public FlinkMergingReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+    super(keyedCombineFn, windowingStrategy, sideInputs, pipelineOptions);
+  }
+
+  @Override
+  public void reduce(
+      Iterable<WindowedValue<KV<K, AccumT>>> elements,
+      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+    FlinkProcessContext<KV<K, AccumT>, KV<K, OutputT>> processContext =
+        new FlinkProcessContext<>(
+            serializedOptions.getPipelineOptions(),
+            getRuntimeContext(),
+            doFn,
+            windowingStrategy,
+            out,
+            sideInputs);
+
+    PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, AccumT>> inputValue: elements) {
+      for (WindowedValue<KV<K, AccumT>> exploded: inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, AccumT>> o1,
+          WindowedValue<KV<K, AccumT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // merge windows, we have to do it in an extra pre-processing step and
+    // can't do it as we go since the window of early elements would not
+    // be correct when calling the CombineFn
+    mergeWindow(sortedInput);
+
+    // iterate over the elements that are sorted by window timestamp
+    final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
+
+    // get the first accumulator
+    WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+    AccumT accumulator = currentValue.getValue().getValue();
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn,
+    // in FlinkPartialReduceFunction we already merge the timestamps assigned
+    // to individual elements, here we just merge them
+    List<Instant> windowTimestamps = new ArrayList<>();
+    windowTimestamps.add(currentValue.getTimestamp());
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (nextWindow.equals(currentWindow)) {
+        // continue accumulating and merge windows
+
+        processContext = processContext.forWindowedValue(nextValue);
+
+        accumulator = combineFnRunner.mergeAccumulators(
+            key, ImmutableList.of(accumulator, nextValue.getValue().getValue()), processContext);
+
+        windowTimestamps.add(nextValue.getTimestamp());
+      } else {
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+                outputTimeFn.merge(currentWindow, windowTimestamps),
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        windowTimestamps.clear();
+
+        processContext = processContext.forWindowedValue(nextValue);
+
+        currentWindow = nextWindow;
+        accumulator = nextValue.getValue().getValue();
+        windowTimestamps.add(nextValue.getTimestamp());
+      }
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+            outputTimeFn.merge(currentWindow, windowTimestamps),
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+
+  /**
+   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+   * This replaces windows in the input list.
+   */
+  private void mergeWindow(List<WindowedValue<KV<K, AccumT>>> elements) {
+    int currentStart = 0;
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+    for (int i = 1; i < elements.size(); i++) {
+      WindowedValue<KV<K, AccumT>> nextValue = elements.get(i);
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+      if (currentWindow.intersects(nextWindow)) {
+        // we continue
+        currentWindow = currentWindow.span(nextWindow);
+      } else {
+        // retrofit the merged window to all windows up to "currentStart"
+        for (int j = i - 1; j >= currentStart; j--) {
+          WindowedValue<KV<K, AccumT>> value = elements.get(j);
+          elements.set(
+              j,
+              WindowedValue.of(
+                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+        }
+        currentStart = i;
+        currentWindow = nextWindow;
+      }
+    }
+    if (currentStart < elements.size() - 1) {
+      // we have to retrofit the last batch
+      for (int j = elements.size() - 1; j >= currentStart; j--) {
+        WindowedValue<KV<K, AccumT>> value = elements.get(j);
+        elements.set(
+            j,
+            WindowedValue.of(
+                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+      }
+    }
+  }
+
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
index 476dc5e5f8e5..f92e76fa60cb 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputDoFnFunction.java
@@ -18,28 +18,17 @@
 package org.apache.beam.runners.flink.translation.functions;
 
 import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
 import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
 import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingInternals;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 
-import com.google.common.collect.ImmutableList;
-
 import org.apache.flink.api.common.functions.RichMapPartitionFunction;
 import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
 
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Map;
 
 /**
@@ -50,112 +39,72 @@
  * and must tag all outputs with the output number. Afterwards a filter will filter out
  * those elements that are not to be in a specific output.
  */
-public class FlinkMultiOutputDoFnFunction<IN, OUT> extends RichMapPartitionFunction<IN, RawUnionValue> {
-
-  private final DoFn<IN, OUT> doFn;
-  private final SerializedPipelineOptions serializedPipelineOptions;
-  private final Map<TupleTag<?>, Integer> outputMap;
-
-  public FlinkMultiOutputDoFnFunction(DoFn<IN, OUT> doFn, PipelineOptions options, Map<TupleTag<?>, Integer> outputMap) {
-    this.doFn = doFn;
-    this.serializedPipelineOptions = new SerializedPipelineOptions(options);
-    this.outputMap = outputMap;
-  }
-
-  @Override
-  public void mapPartition(Iterable<IN> values, Collector<RawUnionValue> out) throws Exception {
-    ProcessContext context = new ProcessContext(doFn, out);
-    this.doFn.startBundle(context);
-    for (IN value : values) {
-      context.inValue = value;
-      doFn.processElement(context);
-    }
-    this.doFn.finishBundle(context);
-  }
+public class FlinkMultiOutputDoFnFunction<InputT, OutputT>
+    extends RichMapPartitionFunction<WindowedValue<InputT>, WindowedValue<RawUnionValue>> {
 
-  private class ProcessContext extends DoFn<IN, OUT>.ProcessContext {
+  private final DoFn<InputT, OutputT> doFn;
+  private final SerializedPipelineOptions serializedOptions;
 
-    IN inValue;
-    Collector<RawUnionValue> outCollector;
+  private final Map<TupleTag<?>, Integer> outputMap;
 
-    public ProcessContext(DoFn<IN, OUT> fn, Collector<RawUnionValue> outCollector) {
-      fn.super();
-      this.outCollector = outCollector;
-    }
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
 
-    @Override
-    public IN element() {
-      return this.inValue;
-    }
+  private final boolean requiresWindowAccess;
+  private final boolean hasSideInputs;
 
-    @Override
-    public Instant timestamp() {
-      return Instant.now();
-    }
+  private final WindowingStrategy<?, ?> windowingStrategy;
 
-    @Override
-    public BoundedWindow window() {
-      return GlobalWindow.INSTANCE;
-    }
+  public FlinkMultiOutputDoFnFunction(
+      DoFn<InputT, OutputT> doFn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions options,
+      Map<TupleTag<?>, Integer> outputMap) {
+    this.doFn = doFn;
+    this.serializedOptions = new SerializedPipelineOptions(options);
+    this.outputMap = outputMap;
 
-    @Override
-    public PaneInfo pane() {
-      return PaneInfo.NO_FIRING;
-    }
+    this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+    this.hasSideInputs = !sideInputs.isEmpty();
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+  }
 
-    @Override
-    public WindowingInternals<IN, OUT> windowingInternals() {
-      return null;
-    }
+  @Override
+  public void mapPartition(
+      Iterable<WindowedValue<InputT>> values,
+      Collector<WindowedValue<RawUnionValue>> out) throws Exception {
+
+    FlinkProcessContext<InputT, OutputT> context = new FlinkMultiOutputProcessContext<>(
+        serializedOptions.getPipelineOptions(),
+        getRuntimeContext(),
+        doFn,
+        windowingStrategy,
+        out,
+        outputMap,
+        sideInputs);
 
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return serializedPipelineOptions.getPipelineOptions();
-    }
+    this.doFn.startBundle(context);
 
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      List<T> sideInput = getRuntimeContext().getBroadcastVariable(view.getTagInternal()
-          .getId());
-      List<WindowedValue<?>> windowedValueList = new ArrayList<>(sideInput.size());
-      for (T input : sideInput) {
-        windowedValueList.add(WindowedValue.of(input, Instant.now(), ImmutableList.of(GlobalWindow.INSTANCE), pane()));
+    if (!requiresWindowAccess || hasSideInputs) {
+      // we don't need to explode the windows
+      for (WindowedValue<InputT> value : values) {
+        context = context.forWindowedValue(value);
+        doFn.processElement(context);
       }
-      return view.fromIterableInternal(windowedValueList);
-    }
-
-    @Override
-    public void output(OUT value) {
-      // assume that index 0 is the default output
-      outCollector.collect(new RawUnionValue(0, value));
-    }
-
-    @Override
-    public void outputWithTimestamp(OUT output, Instant timestamp) {
-      // not FLink's way, just output normally
-      output(output);
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public <T> void sideOutput(TupleTag<T> tag, T value) {
-      Integer index = outputMap.get(tag);
-      if (index != null) {
-        outCollector.collect(new RawUnionValue(index, value));
+    } else {
+      // we need to explode the windows because we have per-window
+      // side inputs and window access also only works if an element
+      // is in only one window
+      for (WindowedValue<InputT> value : values) {
+        for (WindowedValue<InputT> explodedValue: value.explodeWindows()) {
+          context = context.forWindowedValue(value);
+          doFn.processElement(context);
+        }
       }
     }
 
-    @Override
-    public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      sideOutput(tag, output);
-    }
-
-    @Override
-    protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT> createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
-      SerializableFnAggregatorWrapper<AggInputT, AggOutputT> wrapper = new SerializableFnAggregatorWrapper<>(combiner);
-      getRuntimeContext().addAccumulator(name, wrapper);
-      return null;
-    }
 
+    this.doFn.finishBundle(context);
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java
new file mode 100644
index 000000000000..71b6d27ddba0
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputProcessContext.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.util.Collection;
+import java.util.Map;
+
+/**
+ * {@link DoFn.ProcessContext} for {@link FlinkMultiOutputDoFnFunction} that supports
+ * side outputs.
+ */
+class FlinkMultiOutputProcessContext<InputT, OutputT>
+    extends FlinkProcessContext<InputT, OutputT> {
+
+  // we need a different Collector from the base class
+  private final Collector<WindowedValue<RawUnionValue>> collector;
+
+  private final Map<TupleTag<?>, Integer> outputMap;
+
+
+  FlinkMultiOutputProcessContext(
+      PipelineOptions pipelineOptions,
+      RuntimeContext runtimeContext,
+      DoFn<InputT, OutputT> doFn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Collector<WindowedValue<RawUnionValue>> collector,
+      Map<TupleTag<?>, Integer> outputMap,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
+    super(
+        pipelineOptions,
+        runtimeContext,
+        doFn,
+        windowingStrategy,
+        new Collector<WindowedValue<OutputT>>() {
+          @Override
+          public void collect(WindowedValue<OutputT> outputTWindowedValue) {
+
+          }
+
+          @Override
+          public void close() {
+
+          }
+        },
+        sideInputs);
+
+    this.collector = collector;
+    this.outputMap = outputMap;
+  }
+
+  @Override
+  public FlinkProcessContext<InputT, OutputT> forWindowedValue(
+      WindowedValue<InputT> windowedValue) {
+    this.windowedValue = windowedValue;
+    return this;
+  }
+
+  @Override
+  public void outputWithTimestamp(OutputT value, Instant timestamp) {
+    if (windowedValue == null) {
+      // we are in startBundle() or finishBundle()
+
+      try {
+        Collection windows = windowingStrategy.getWindowFn().assignWindows(
+            new FlinkNoElementAssignContext(
+                windowingStrategy.getWindowFn(),
+                value,
+                timestamp));
+
+        collector.collect(
+            WindowedValue.of(
+                new RawUnionValue(0, value),
+                timestamp != null ? timestamp : new Instant(Long.MIN_VALUE),
+                windows,
+                PaneInfo.NO_FIRING));
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    } else {
+      collector.collect(
+          WindowedValue.of(
+              new RawUnionValue(0, value),
+              windowedValue.getTimestamp(),
+              windowedValue.getWindows(),
+              windowedValue.getPane()));
+    }
+  }
+
+  @Override
+  protected void outputWithTimestampAndWindow(
+      OutputT value,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows,
+      PaneInfo pane) {
+    collector.collect(
+        WindowedValue.of(
+            new RawUnionValue(0, value), timestamp, windows, pane));
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public <T> void sideOutput(TupleTag<T> tag, T value) {
+    if (windowedValue != null) {
+      sideOutputWithTimestamp(tag, value, windowedValue.getTimestamp());
+    } else {
+      sideOutputWithTimestamp(tag, value, null);
+    }
+  }
+
+  @Override
+  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T value, Instant timestamp) {
+    Integer index = outputMap.get(tag);
+
+    if (index == null) {
+      throw new IllegalArgumentException("Unknown side output tag: " + tag);
+    }
+
+    if (windowedValue == null) {
+      // we are in startBundle() or finishBundle()
+
+      try {
+        Collection windows = windowingStrategy.getWindowFn().assignWindows(
+            new FlinkNoElementAssignContext(
+                windowingStrategy.getWindowFn(),
+                value,
+                timestamp));
+
+        collector.collect(
+            WindowedValue.of(
+                new RawUnionValue(index, value),
+                timestamp != null ? timestamp : new Instant(Long.MIN_VALUE),
+                windows,
+                PaneInfo.NO_FIRING));
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    } else {
+      collector.collect(
+          WindowedValue.of(
+              new RawUnionValue(index, value),
+              windowedValue.getTimestamp(),
+              windowedValue.getWindows(),
+              windowedValue.getPane()));
+    }
+
+  }
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
index 58a36b27c5dd..9205a5520f82 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
@@ -18,27 +18,34 @@
 package org.apache.beam.runners.flink.translation.functions;
 
 import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.util.WindowedValue;
 
 import org.apache.flink.api.common.functions.FlatMapFunction;
 import org.apache.flink.util.Collector;
 
 /**
- * A FlatMap function that filters out those elements that don't belong in this output. We need
- * this to implement MultiOutput ParDo functions.
+ * A {@link FlatMapFunction} function that filters out those elements that don't belong in this
+ * output. We need this to implement MultiOutput ParDo functions in combination with
+ * {@link FlinkMultiOutputDoFnFunction}.
  */
-public class FlinkMultiOutputPruningFunction<T> implements FlatMapFunction<RawUnionValue, T> {
+public class FlinkMultiOutputPruningFunction<T>
+    implements FlatMapFunction<WindowedValue<RawUnionValue>, WindowedValue<T>> {
 
-  private final int outputTag;
+  private final int ourOutputTag;
 
-  public FlinkMultiOutputPruningFunction(int outputTag) {
-    this.outputTag = outputTag;
+  public FlinkMultiOutputPruningFunction(int ourOutputTag) {
+    this.ourOutputTag = ourOutputTag;
   }
 
   @Override
   @SuppressWarnings("unchecked")
-  public void flatMap(RawUnionValue rawUnionValue, Collector<T> collector) throws Exception {
-    if (rawUnionValue.getUnionTag() == outputTag) {
-      collector.collect((T) rawUnionValue.getValue());
+  public void flatMap(
+      WindowedValue<RawUnionValue> windowedValue,
+      Collector<WindowedValue<T>> collector) throws Exception {
+    int unionTag = windowedValue.getValue().getUnionTag();
+    if (unionTag == ourOutputTag) {
+      collector.collect(
+          (WindowedValue<T>) windowedValue.withValue(windowedValue.getValue().getValue()));
     }
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java
new file mode 100644
index 000000000000..892f7a1f33f0
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoElementAssignContext.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+
+import org.joda.time.Instant;
+
+import java.util.Collection;
+
+/**
+ * {@link WindowFn.AssignContext} for calling a {@link WindowFn} for elements emitted from
+ * {@link org.apache.beam.sdk.transforms.DoFn#startBundle(DoFn.Context)}
+ * or {@link DoFn#finishBundle(DoFn.Context)}.
+ *
+ * <p>In those cases the {@code WindowFn} is not allowed to access any element information.
+ */
+class FlinkNoElementAssignContext<InputT, W extends BoundedWindow>
+    extends WindowFn<InputT, W>.AssignContext {
+
+  private final InputT element;
+  private final Instant timestamp;
+
+  FlinkNoElementAssignContext(
+      WindowFn<InputT, W> fn,
+      InputT element,
+      Instant timestamp) {
+    fn.super();
+
+    this.element = element;
+    // the timestamp can be null, in that case output is called
+    // without a timestamp
+    this.timestamp = timestamp;
+  }
+
+  @Override
+  public InputT element() {
+    return element;
+  }
+
+  @Override
+  public Instant timestamp() {
+    if (timestamp != null) {
+      return timestamp;
+    } else {
+      throw new UnsupportedOperationException("No timestamp available.");
+    }
+  }
+
+  @Override
+  public Collection<? extends BoundedWindow> windows() {
+    throw new UnsupportedOperationException("No windows available.");
+  }
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
index a2bab2b3060f..c29e1df2ceb0 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
@@ -17,45 +17,170 @@
  */
 package org.apache.beam.runners.flink.translation.functions;
 
-import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
 
-import org.apache.flink.api.common.functions.GroupCombineFunction;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+
+import org.apache.flink.api.common.functions.RichGroupCombineFunction;
 import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.Iterator;
+import java.util.Map;
 
 /**
- * Flink {@link org.apache.flink.api.common.functions.GroupCombineFunction} for executing a
- * {@link org.apache.beam.sdk.transforms.Combine.PerKey} operation. This reads the input
- * {@link org.apache.beam.sdk.values.KV} elements VI, extracts the key and emits accumulated
- * values which have the intermediate format VA.
+ * This is is the first step for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
+ * on Flink. The second part is {@link FlinkReduceFunction}. This function performs a local
+ * combine step before shuffling while the latter does the final combination after a shuffle.
+ *
+ * <p>The input to {@link #combine(Iterable, Collector)} are elements of the same key but
+ * for different windows. We have to ensure that we only combine elements of matching
+ * windows.
  */
-public class FlinkPartialReduceFunction<K, VI, VA> implements GroupCombineFunction<KV<K, VI>, KV<K, VA>> {
+public class FlinkPartialReduceFunction<K, InputT, AccumT, W extends BoundedWindow>
+    extends RichGroupCombineFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>> {
+
+  protected final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn;
+
+  protected final DoFn<KV<K, InputT>, KV<K, AccumT>> doFn;
+
+  protected final WindowingStrategy<?, W> windowingStrategy;
+
+  protected final SerializedPipelineOptions serializedOptions;
 
-  private final Combine.KeyedCombineFn<K, VI, VA, ?> keyedCombineFn;
+  protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
 
-  public FlinkPartialReduceFunction(Combine.KeyedCombineFn<K, VI, VA, ?>
-                                        keyedCombineFn) {
-    this.keyedCombineFn = keyedCombineFn;
+  public FlinkPartialReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+
+    this.combineFn = combineFn;
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+    // dummy DoFn because we need one for ProcessContext
+    this.doFn = new DoFn<KV<K, InputT>, KV<K, AccumT>>() {
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+
+      }
+    };
   }
 
   @Override
-  public void combine(Iterable<KV<K, VI>> elements, Collector<KV<K, VA>> out) throws Exception {
+  public void combine(
+      Iterable<WindowedValue<KV<K, InputT>>> elements,
+      Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
+
+    FlinkProcessContext<KV<K, InputT>, KV<K, AccumT>> processContext =
+        new FlinkProcessContext<>(
+            serializedOptions.getPipelineOptions(),
+            getRuntimeContext(),
+            doFn,
+            windowingStrategy,
+            out,
+            sideInputs);
+
+    PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    ArrayList<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, InputT>> inputValue: elements) {
+      for (WindowedValue<KV<K, InputT>> exploded: inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, InputT>> o1,
+          WindowedValue<KV<K, InputT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // iterate over the elements that are sorted by window timestamp
+    //
+    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
 
-    final Iterator<KV<K, VI>> iterator = elements.iterator();
     // create accumulator using the first elements key
-    KV<K, VI> first = iterator.next();
-    K key = first.getKey();
-    VI value = first.getValue();
-    VA accumulator = keyedCombineFn.createAccumulator(key);
-    accumulator = keyedCombineFn.addInput(key, accumulator, value);
-
-    while(iterator.hasNext()) {
-      value = iterator.next().getValue();
-      accumulator = keyedCombineFn.addInput(key, accumulator, value);
+    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
+    InputT firstValue = currentValue.getValue().getValue();
+    processContext = processContext.forWindowedValue(currentValue);
+    AccumT accumulator = combineFnRunner.createAccumulator(key, processContext);
+    accumulator = combineFnRunner.addInput(key, accumulator, firstValue, processContext);
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn
+    Instant windowTimestamp =
+        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+      BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (nextWindow.equals(currentWindow)) {
+        // continue accumulating
+        InputT value = nextValue.getValue().getValue();
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+
+        windowTimestamp = outputTimeFn.combine(
+            windowTimestamp,
+            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, accumulator),
+                windowTimestamp,
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        currentWindow = nextWindow;
+        InputT value = nextValue.getValue().getValue();
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.createAccumulator(key, processContext);
+        accumulator = combineFnRunner.addInput(key, accumulator, value, processContext);
+        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+      }
     }
 
-    out.collect(KV.of(key, accumulator));
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, accumulator),
+            windowTimestamp,
+            currentWindow,
+            PaneInfo.NO_FIRING));
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java
new file mode 100644
index 000000000000..0f1885ca5192
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkProcessContext.java
@@ -0,0 +1,324 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.TimerInternals;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingInternals;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.state.StateInternals;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Iterables;
+
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+
+/**
+ * {@link org.apache.beam.sdk.transforms.DoFn.ProcessContext} for our Flink Wrappers.
+ */
+class FlinkProcessContext<InputT, OutputT>
+    extends DoFn<InputT, OutputT>.ProcessContext {
+
+  private final PipelineOptions pipelineOptions;
+  private final RuntimeContext runtimeContext;
+  private Collector<WindowedValue<OutputT>> collector;
+  private final boolean requiresWindowAccess;
+
+  protected WindowedValue<InputT> windowedValue;
+
+  protected WindowingStrategy<?, ?> windowingStrategy;
+
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  FlinkProcessContext(
+      PipelineOptions pipelineOptions,
+      RuntimeContext runtimeContext,
+      DoFn<InputT, OutputT> doFn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Collector<WindowedValue<OutputT>> collector,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
+    doFn.super();
+    Preconditions.checkNotNull(pipelineOptions);
+    Preconditions.checkNotNull(runtimeContext);
+    Preconditions.checkNotNull(doFn);
+    Preconditions.checkNotNull(collector);
+
+    this.pipelineOptions = pipelineOptions;
+    this.runtimeContext = runtimeContext;
+    this.collector = collector;
+    this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+
+    super.setupDelegateAggregators();
+  }
+
+  FlinkProcessContext(
+      PipelineOptions pipelineOptions,
+      RuntimeContext runtimeContext,
+      DoFn<InputT, OutputT> doFn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs) {
+    doFn.super();
+    Preconditions.checkNotNull(pipelineOptions);
+    Preconditions.checkNotNull(runtimeContext);
+    Preconditions.checkNotNull(doFn);
+
+    this.pipelineOptions = pipelineOptions;
+    this.runtimeContext = runtimeContext;
+    this.collector = null;
+    this.requiresWindowAccess = doFn instanceof DoFn.RequiresWindowAccess;
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+
+    super.setupDelegateAggregators();
+  }
+
+  public FlinkProcessContext<InputT, OutputT> forOutput(
+      Collector<WindowedValue<OutputT>> collector) {
+    this.collector = collector;
+
+    // for now, returns ourselves, to be easy on the GC
+    return this;
+  }
+
+
+
+  public FlinkProcessContext<InputT, OutputT> forWindowedValue(
+      WindowedValue<InputT> windowedValue) {
+    this.windowedValue = windowedValue;
+
+    // for now, returns ourselves, to be easy on the GC
+    return this;
+  }
+
+  @Override
+  public InputT element() {
+    return this.windowedValue.getValue();
+  }
+
+
+  @Override
+  public Instant timestamp() {
+    return windowedValue.getTimestamp();
+  }
+
+  @Override
+  public BoundedWindow window() {
+    if (!requiresWindowAccess) {
+      throw new UnsupportedOperationException(
+          "window() is only available in the context of a DoFn marked as RequiresWindow.");
+    }
+    return Iterables.getOnlyElement(windowedValue.getWindows());
+  }
+
+  @Override
+  public PaneInfo pane() {
+    return windowedValue.getPane();
+  }
+
+  @Override
+  public WindowingInternals<InputT, OutputT> windowingInternals() {
+
+    return new WindowingInternals<InputT, OutputT>() {
+
+      @Override
+      public StateInternals stateInternals() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void outputWindowedValue(
+          OutputT value,
+          Instant timestamp,
+          Collection<? extends BoundedWindow> windows,
+          PaneInfo pane) {
+        collector.collect(WindowedValue.of(value, timestamp, windows, pane));
+        outputWithTimestampAndWindow(value, timestamp, windows, pane);
+      }
+
+      @Override
+      public TimerInternals timerInternals() {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public Collection<? extends BoundedWindow> windows() {
+        return windowedValue.getWindows();
+      }
+
+      @Override
+      public PaneInfo pane() {
+        return windowedValue.getPane();
+      }
+
+      @Override
+      public <T> void writePCollectionViewData(TupleTag<?> tag,
+          Iterable<WindowedValue<T>> data, Coder<T> elemCoder) throws IOException {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public <ViewT> ViewT sideInput(
+          PCollectionView<ViewT> view,
+          BoundedWindow mainInputWindow) {
+
+        Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
+        Preconditions.checkNotNull(
+            sideInputs.get(view),
+            "Side input for " + view + " not available.");
+
+        // get the side input strategy for mapping the window
+        WindowingStrategy<?, ?> windowingStrategy = sideInputs.get(view);
+
+        BoundedWindow sideInputWindow =
+            windowingStrategy.getWindowFn().getSideInputWindow(mainInputWindow);
+
+        Map<BoundedWindow, ViewT> sideInputs =
+            runtimeContext.getBroadcastVariableWithInitializer(
+                view.getTagInternal().getId(), new SideInputInitializer<>(view));
+        return sideInputs.get(sideInputWindow);
+      }
+    };
+  }
+
+  @Override
+  public PipelineOptions getPipelineOptions() {
+    return pipelineOptions;
+  }
+
+  @Override
+  public <ViewT> ViewT sideInput(PCollectionView<ViewT> view) {
+    Preconditions.checkNotNull(view, "View passed to sideInput cannot be null");
+    Preconditions.checkNotNull(sideInputs.get(view), "Side input for " + view + " not available.");
+    Iterator<? extends BoundedWindow> windowIter = windowedValue.getWindows().iterator();
+    BoundedWindow window;
+    if (!windowIter.hasNext()) {
+      throw new IllegalStateException(
+          "sideInput called when main input element is not in any windows");
+    } else {
+      window = windowIter.next();
+      if (windowIter.hasNext()) {
+        throw new IllegalStateException(
+            "sideInput called when main input element is in multiple windows");
+      }
+    }
+
+    // get the side input strategy for mapping the window
+    WindowingStrategy<?, ?> windowingStrategy = sideInputs.get(view);
+
+    BoundedWindow sideInputWindow =
+        windowingStrategy.getWindowFn().getSideInputWindow(window);
+
+    Map<BoundedWindow, ViewT> sideInputs =
+        runtimeContext.getBroadcastVariableWithInitializer(
+            view.getTagInternal().getId(), new SideInputInitializer<>(view));
+    ViewT result = sideInputs.get(sideInputWindow);
+    if (result == null) {
+      result = view.fromIterableInternal(Collections.<WindowedValue<?>>emptyList());
+    }
+    return result;
+  }
+
+  @Override
+  public void output(OutputT value) {
+    if (windowedValue != null) {
+      outputWithTimestamp(value, windowedValue.getTimestamp());
+    } else {
+      outputWithTimestamp(value, null);
+    }
+  }
+
+  @Override
+  public void outputWithTimestamp(OutputT value, Instant timestamp) {
+    if (windowedValue == null) {
+      // we are in startBundle() or finishBundle()
+
+      try {
+        Collection windows = windowingStrategy.getWindowFn().assignWindows(
+            new FlinkNoElementAssignContext(
+                windowingStrategy.getWindowFn(),
+                value,
+                timestamp));
+
+        collector.collect(
+            WindowedValue.of(
+                value,
+                timestamp != null ? timestamp : new Instant(Long.MIN_VALUE),
+                windows,
+                PaneInfo.NO_FIRING));
+      } catch (Exception e) {
+        throw new RuntimeException(e);
+      }
+    } else {
+      collector.collect(
+          WindowedValue.of(
+              value,
+              timestamp,
+              windowedValue.getWindows(),
+              windowedValue.getPane()));
+    }
+  }
+
+  protected void outputWithTimestampAndWindow(
+      OutputT value,
+      Instant timestamp,
+      Collection<? extends BoundedWindow> windows,
+      PaneInfo pane) {
+    collector.collect(
+        WindowedValue.of(
+            value, timestamp, windows, pane));
+  }
+
+  @Override
+  public <T> void sideOutput(TupleTag<T> tag, T output) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public <T> void sideOutputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+    sideOutput(tag, output);
+  }
+
+  @Override
+  protected <AggInputT, AggOutputT> Aggregator<AggInputT, AggOutputT>
+  createAggregatorInternal(String name, Combine.CombineFn<AggInputT, ?, AggOutputT> combiner) {
+    SerializableFnAggregatorWrapper<AggInputT, AggOutputT> wrapper =
+        new SerializableFnAggregatorWrapper<>(combiner);
+    runtimeContext.addAccumulator(name, wrapper);
+    return wrapper;
+  }
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
index 43e458fc3720..9cbc6b914765 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
@@ -17,43 +17,179 @@
  */
 package org.apache.beam.runners.flink.translation.functions;
 
-import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunner;
+import org.apache.beam.sdk.util.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 
-import org.apache.flink.api.common.functions.GroupReduceFunction;
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
 import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 
 /**
- * Flink {@link org.apache.flink.api.common.functions.GroupReduceFunction} for executing a
- * {@link org.apache.beam.sdk.transforms.Combine.PerKey} operation. This reads the input
- * {@link org.apache.beam.sdk.values.KV} elements, extracts the key and merges the
- * accumulators resulting from the PartialReduce which produced the input VA.
+ * This is the second part for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
+ * on Flink, the second part is {@link FlinkReduceFunction}. This function performs the final
+ * combination of the pre-combined values after a shuffle.
+ *
+ * <p>The input to {@link #reduce(Iterable, Collector)} are elements of the same key but
+ * for different windows. We have to ensure that we only combine elements of matching
+ * windows.
  */
-public class FlinkReduceFunction<K, VA, VO> implements GroupReduceFunction<KV<K, VA>, KV<K, VO>> {
+public class FlinkReduceFunction<K, AccumT, OutputT, W extends BoundedWindow>
+    extends RichGroupReduceFunction<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> {
+
+  protected final CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> combineFn;
+
+  protected final DoFn<KV<K, AccumT>, KV<K, OutputT>> doFn;
+
+  protected final WindowingStrategy<?, W> windowingStrategy;
+
+  protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  protected final SerializedPipelineOptions serializedOptions;
 
-  private final Combine.KeyedCombineFn<K, ?, VA, VO> keyedCombineFn;
+  public FlinkReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
 
-  public FlinkReduceFunction(Combine.KeyedCombineFn<K, ?, VA, VO> keyedCombineFn) {
-    this.keyedCombineFn = keyedCombineFn;
+    this.combineFn = keyedCombineFn;
+
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+    // dummy DoFn because we need one for ProcessContext
+    this.doFn = new DoFn<KV<K, AccumT>, KV<K, OutputT>>() {
+      @Override
+      public void processElement(ProcessContext c) throws Exception {
+
+      }
+    };
   }
 
   @Override
-  public void reduce(Iterable<KV<K, VA>> values, Collector<KV<K, VO>> out) throws Exception {
-    Iterator<KV<K, VA>> it = values.iterator();
+  public void reduce(
+      Iterable<WindowedValue<KV<K, AccumT>>> elements,
+      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+    FlinkProcessContext<KV<K, AccumT>, KV<K, OutputT>> processContext =
+        new FlinkProcessContext<>(
+            serializedOptions.getPipelineOptions(),
+            getRuntimeContext(),
+            doFn,
+            windowingStrategy,
+            out,
+            sideInputs);
+
+    PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
 
-    KV<K, VA> current = it.next();
-    K k = current.getKey();
-    VA accumulator = current.getValue();
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
 
-    while (it.hasNext()) {
-      current = it.next();
-      keyedCombineFn.mergeAccumulators(k, ImmutableList.of(accumulator, current.getValue()) );
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, AccumT>> inputValue: elements) {
+      for (WindowedValue<KV<K, AccumT>> exploded: inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, AccumT>> o1,
+          WindowedValue<KV<K, AccumT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // iterate over the elements that are sorted by window timestamp
+    //
+    final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
+
+    // get the first accumulator
+    WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
+    AccumT accumulator = currentValue.getValue().getValue();
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn,
+    // in FlinkPartialReduceFunction we already merge the timestamps assigned
+    // to individual elements, here we just merge them
+    List<Instant> windowTimestamps = new ArrayList<>();
+    windowTimestamps.add(currentValue.getTimestamp());
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
+      BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (nextWindow.equals(currentWindow)) {
+        // continue accumulating
+        processContext = processContext.forWindowedValue(nextValue);
+        accumulator = combineFnRunner.mergeAccumulators(
+            key, ImmutableList.of(accumulator, nextValue.getValue().getValue()), processContext);
+
+        windowTimestamps.add(nextValue.getTimestamp());
+      } else {
+        // emit the value that we currently have
+        processContext = processContext.forWindowedValue(currentValue);
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+                outputTimeFn.merge(currentWindow, windowTimestamps),
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        windowTimestamps.clear();
+
+        currentWindow = nextWindow;
+        accumulator = nextValue.getValue().getValue();
+        windowTimestamps.add(nextValue.getTimestamp());
+      }
+
+      // we have to keep track so that we can set the context to the right
+      // windowed value when windows change in the iterable
+      currentValue = nextValue;
     }
 
-    out.collect(KV.of(k, keyedCombineFn.extractOutput(k, accumulator)));
+    // if at the end of the iteration we have a change in windows
+    // the ProcessContext will not have been updated
+    processContext = processContext.forWindowedValue(currentValue);
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, combineFnRunner.extractOutput(key, accumulator, processContext)),
+            outputTimeFn.merge(currentWindow, windowTimestamps),
+            currentWindow,
+            PaneInfo.NO_FIRING));
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
new file mode 100644
index 000000000000..451b31b12c5e
--- /dev/null
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
+
+import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link BroadcastVariableInitializer} that initializes the broadcast input as a {@code Map}
+ * from window to side input.
+ */
+public class SideInputInitializer<ElemT, ViewT, W extends BoundedWindow>
+    implements BroadcastVariableInitializer<WindowedValue<ElemT>, Map<BoundedWindow, ViewT>> {
+
+  PCollectionView<ViewT> view;
+
+  public SideInputInitializer(PCollectionView<ViewT> view) {
+    this.view = view;
+  }
+
+  @Override
+  public Map<BoundedWindow, ViewT> initializeBroadcastVariable(
+      Iterable<WindowedValue<ElemT>> inputValues) {
+
+    // first partition into windows
+    Map<BoundedWindow, List<WindowedValue<ElemT>>> partitionedElements = new HashMap<>();
+    for (WindowedValue<ElemT> value: inputValues) {
+      for (BoundedWindow window: value.getWindows()) {
+        List<WindowedValue<ElemT>> windowedValues = partitionedElements.get(window);
+        if (windowedValues == null) {
+          windowedValues = new ArrayList<>();
+          partitionedElements.put(window, windowedValues);
+        }
+        windowedValues.add(value);
+      }
+    }
+
+    Map<BoundedWindow, ViewT> resultMap = new HashMap<>();
+
+    for (Map.Entry<BoundedWindow, List<WindowedValue<ElemT>>> elements:
+        partitionedElements.entrySet()) {
+
+      @SuppressWarnings("unchecked")
+      Iterable<WindowedValue<?>> elementsIterable =
+          (List<WindowedValue<?>>) (List<?>) elements.getValue();
+
+      resultMap.put(elements.getKey(), view.fromIterableInternal(elementsIterable));
+    }
+
+    return resultMap;
+  }
+}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java
deleted file mode 100644
index cc6fd8b70917..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/UnionCoder.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StandardCoder;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.util.PropertyNames;
-import org.apache.beam.sdk.util.VarInt;
-import org.apache.beam.sdk.util.common.ElementByteSizeObserver;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-
-/**
- * A UnionCoder encodes RawUnionValues.
- *
- * This file copied from {@link org.apache.beam.sdk.transforms.join.UnionCoder}
- */
-@SuppressWarnings("serial")
-public class UnionCoder extends StandardCoder<RawUnionValue> {
-  // TODO: Think about how to integrate this with a schema object (i.e.
-  // a tuple of tuple tags).
-  /**
-   * Builds a union coder with the given list of element coders.  This list
-   * corresponds to a mapping of union tag to Coder.  Union tags start at 0.
-   */
-  public static UnionCoder of(List<Coder<?>> elementCoders) {
-    return new UnionCoder(elementCoders);
-  }
-
-  @JsonCreator
-  public static UnionCoder jsonOf(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS)
-      List<Coder<?>> elements) {
-    return UnionCoder.of(elements);
-  }
-
-  private int getIndexForEncoding(RawUnionValue union) {
-    if (union == null) {
-      throw new IllegalArgumentException("cannot encode a null tagged union");
-    }
-    int index = union.getUnionTag();
-    if (index < 0 || index >= elementCoders.size()) {
-      throw new IllegalArgumentException(
-          "union value index " + index + " not in range [0.." +
-              (elementCoders.size() - 1) + "]");
-    }
-    return index;
-  }
-
-  @SuppressWarnings("unchecked")
-  @Override
-  public void encode(
-      RawUnionValue union,
-      OutputStream outStream,
-      Context context)
-      throws IOException  {
-    int index = getIndexForEncoding(union);
-    // Write out the union tag.
-    VarInt.encode(index, outStream);
-
-    // Write out the actual value.
-    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
-    coder.encode(
-        union.getValue(),
-        outStream,
-        context);
-  }
-
-  @Override
-  public RawUnionValue decode(InputStream inStream, Context context)
-      throws IOException {
-    int index = VarInt.decodeInt(inStream);
-    Object value = elementCoders.get(index).decode(inStream, context);
-    return new RawUnionValue(index, value);
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return null;
-  }
-
-  @Override
-  public List<? extends Coder<?>> getComponents() {
-    return elementCoders;
-  }
-
-  /**
-   * Since this coder uses elementCoders.get(index) and coders that are known to run in constant
-   * time, we defer the return value to that coder.
-   */
-  @Override
-  public boolean isRegisterByteSizeObserverCheap(RawUnionValue union, Context context) {
-    int index = getIndexForEncoding(union);
-    @SuppressWarnings("unchecked")
-    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
-    return coder.isRegisterByteSizeObserverCheap(union.getValue(), context);
-  }
-
-  /**
-   * Notifies ElementByteSizeObserver about the byte size of the encoded value using this coder.
-   */
-  @Override
-  public void registerByteSizeObserver(
-      RawUnionValue union, ElementByteSizeObserver observer, Context context)
-      throws Exception {
-    int index = getIndexForEncoding(union);
-    // Write out the union tag.
-    observer.update(VarInt.getLength(index));
-    // Write out the actual value.
-    @SuppressWarnings("unchecked")
-    Coder<Object> coder = (Coder<Object>) elementCoders.get(index);
-    coder.registerByteSizeObserver(union.getValue(), observer, context);
-  }
-
-  /////////////////////////////////////////////////////////////////////////////
-
-  private final List<Coder<?>> elementCoders;
-
-  private UnionCoder(List<Coder<?>> elementCoders) {
-    this.elementCoders = elementCoders;
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    verifyDeterministic(
-        "UnionCoder is only deterministic if all element coders are",
-        elementCoders);
-  }
-}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
index 895ecef1b92e..4434cf8726e1 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
@@ -18,7 +18,8 @@
 package org.apache.beam.runners.flink.translation.types;
 
 import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.util.WindowedValue;
 
 import com.google.common.base.Preconditions;
 
@@ -71,9 +72,6 @@ public boolean isKeyType() {
   @Override
   @SuppressWarnings("unchecked")
   public TypeSerializer<T> createSerializer(ExecutionConfig config) {
-    if (coder instanceof VoidCoder) {
-      return (TypeSerializer<T>) new VoidCoderTypeSerializer();
-    }
     return new CoderTypeSerializer<>(coder);
   }
 
@@ -84,8 +82,12 @@ public int getTotalFields() {
 
   @Override
   public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
 
     CoderTypeInformation that = (CoderTypeInformation) o;
 
@@ -113,6 +115,11 @@ public String toString() {
   @Override
   public TypeComparator<T> createComparator(boolean sortOrderAscending, ExecutionConfig
       executionConfig) {
-    return new CoderComparator<>(coder);
+    WindowedValue.WindowedValueCoder windowCoder = (WindowedValue.WindowedValueCoder) coder;
+    if (windowCoder.getValueCoder() instanceof KvCoder) {
+      return new KvCoderComperator(windowCoder);
+    } else {
+      return new CoderComparator<>(coder);
+    }
   }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
index c6f3921971a6..097316b242fd 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
@@ -33,7 +33,7 @@
 
 /**
  * Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for
- * Dataflow {@link org.apache.beam.sdk.coders.Coder}s
+ * Dataflow {@link org.apache.beam.sdk.coders.Coder Coders}.
  */
 public class CoderTypeSerializer<T> extends TypeSerializer<T> {
   
@@ -128,14 +128,20 @@ public T deserialize(T t, DataInputView dataInputView) throws IOException {
   }
 
   @Override
-  public void copy(DataInputView dataInputView, DataOutputView dataOutputView) throws IOException {
+  public void copy(
+      DataInputView dataInputView,
+      DataOutputView dataOutputView) throws IOException {
     serialize(deserialize(dataInputView), dataOutputView);
   }
 
   @Override
   public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
 
     CoderTypeSerializer that = (CoderTypeSerializer) o;
     return coder.equals(that.coder);
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
index 6f0c651406a2..79b127d1062c 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderComperator.java
@@ -20,6 +20,8 @@
 import org.apache.beam.runners.flink.translation.wrappers.DataInputViewWrapper;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.KV;
 
 import org.apache.flink.api.common.typeutils.TypeComparator;
@@ -31,14 +33,13 @@
 import java.io.ObjectInputStream;
 
 /**
- * Flink {@link org.apache.flink.api.common.typeutils.TypeComparator} for
- * {@link org.apache.beam.sdk.coders.KvCoder}. We have a special comparator
+ * Flink {@link TypeComparator} for {@link KvCoder}. We have a special comparator
  * for {@link KV} that always compares on the key only.
  */
-public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
+public class KvCoderComperator <K, V> extends TypeComparator<WindowedValue<KV<K, V>>> {
   
-  private KvCoder<K, V> coder;
-  private Coder<K> keyCoder;
+  private final WindowedValue.WindowedValueCoder<KV<K, V>> coder;
+  private final Coder<K> keyCoder;
 
   // We use these for internal encoding/decoding for creating copies and comparing
   // serialized forms using a Coder
@@ -52,9 +53,10 @@ public class KvCoderComperator <K, V> extends TypeComparator<KV<K, V>> {
   // For deserializing the key
   private transient DataInputViewWrapper inputWrapper;
 
-  public KvCoderComperator(KvCoder<K, V> coder) {
+  public KvCoderComperator(WindowedValue.WindowedValueCoder<KV<K, V>> coder) {
     this.coder = coder;
-    this.keyCoder = coder.getKeyCoder();
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
+    this.keyCoder = kvCoder.getKeyCoder();
 
     buffer1 = new InspectableByteArrayOutputStream();
     buffer2 = new InspectableByteArrayOutputStream();
@@ -74,8 +76,8 @@ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundE
   }
 
   @Override
-  public int hash(KV<K, V> record) {
-    K key = record.getKey();
+  public int hash(WindowedValue<KV<K, V>> record) {
+    K key = record.getValue().getKey();
     if (key != null) {
       return key.hashCode();
     } else {
@@ -84,27 +86,27 @@ public int hash(KV<K, V> record) {
   }
 
   @Override
-  public void setReference(KV<K, V> toCompare) {
+  public void setReference(WindowedValue<KV<K, V>> toCompare) {
     referenceBuffer.reset();
     try {
-      keyCoder.encode(toCompare.getKey(), referenceBuffer, Coder.Context.OUTER);
+      keyCoder.encode(toCompare.getValue().getKey(), referenceBuffer, Coder.Context.OUTER);
     } catch (IOException e) {
       throw new RuntimeException("Could not set reference " + toCompare + ": " + e);
     }
   }
 
   @Override
-  public boolean equalToReference(KV<K, V> candidate) {
+  public boolean equalToReference(WindowedValue<KV<K, V>> candidate) {
     try {
       buffer2.reset();
-      keyCoder.encode(candidate.getKey(), buffer2, Coder.Context.OUTER);
+      keyCoder.encode(candidate.getValue().getKey(), buffer2, Coder.Context.OUTER);
       byte[] arr = referenceBuffer.getBuffer();
       byte[] arrOther = buffer2.getBuffer();
       if (referenceBuffer.size() != buffer2.size()) {
         return false;
       }
       int len = buffer2.size();
-      for(int i = 0; i < len; i++ ) {
+      for (int i = 0; i < len; i++) {
         if (arr[i] != arrOther[i]) {
           return false;
         }
@@ -116,8 +118,9 @@ public boolean equalToReference(KV<K, V> candidate) {
   }
 
   @Override
-  public int compareToReference(TypeComparator<KV<K, V>> other) {
-    InspectableByteArrayOutputStream otherReferenceBuffer = ((KvCoderComperator<K, V>) other).referenceBuffer;
+  public int compareToReference(TypeComparator<WindowedValue<KV<K, V>>> other) {
+    InspectableByteArrayOutputStream otherReferenceBuffer =
+        ((KvCoderComperator<K, V>) other).referenceBuffer;
 
     byte[] arr = referenceBuffer.getBuffer();
     byte[] arrOther = otherReferenceBuffer.getBuffer();
@@ -135,19 +138,19 @@ public int compareToReference(TypeComparator<KV<K, V>> other) {
 
 
   @Override
-  public int compare(KV<K, V> first, KV<K, V> second) {
+  public int compare(WindowedValue<KV<K, V>> first, WindowedValue<KV<K, V>> second) {
     try {
       buffer1.reset();
       buffer2.reset();
-      keyCoder.encode(first.getKey(), buffer1, Coder.Context.OUTER);
-      keyCoder.encode(second.getKey(), buffer2, Coder.Context.OUTER);
+      keyCoder.encode(first.getValue().getKey(), buffer1, Coder.Context.OUTER);
+      keyCoder.encode(second.getValue().getKey(), buffer2, Coder.Context.OUTER);
       byte[] arr = buffer1.getBuffer();
       byte[] arrOther = buffer2.getBuffer();
       if (buffer1.size() != buffer2.size()) {
         return buffer1.size() - buffer2.size();
       }
       int len = buffer1.size();
-      for(int i = 0; i < len; i++ ) {
+      for (int i = 0; i < len; i++) {
         if (arr[i] != arrOther[i]) {
           return arr[i] - arrOther[i];
         }
@@ -159,38 +162,19 @@ public int compare(KV<K, V> first, KV<K, V> second) {
   }
 
   @Override
-  public int compareSerialized(DataInputView firstSource, DataInputView secondSource) throws IOException {
-
+  public int compareSerialized(
+      DataInputView firstSource,
+      DataInputView secondSource) throws IOException {
     inputWrapper.setInputView(firstSource);
-    K firstKey = keyCoder.decode(inputWrapper, Coder.Context.NESTED);
+    WindowedValue<KV<K, V>> first = coder.decode(inputWrapper, Coder.Context.NESTED);
     inputWrapper.setInputView(secondSource);
-    K secondKey = keyCoder.decode(inputWrapper, Coder.Context.NESTED);
-
-    try {
-      buffer1.reset();
-      buffer2.reset();
-      keyCoder.encode(firstKey, buffer1, Coder.Context.OUTER);
-      keyCoder.encode(secondKey, buffer2, Coder.Context.OUTER);
-      byte[] arr = buffer1.getBuffer();
-      byte[] arrOther = buffer2.getBuffer();
-      if (buffer1.size() != buffer2.size()) {
-        return buffer1.size() - buffer2.size();
-      }
-      int len = buffer1.size();
-      for(int i = 0; i < len; i++ ) {
-        if (arr[i] != arrOther[i]) {
-          return arr[i] - arrOther[i];
-        }
-      }
-      return 0;
-    } catch (IOException e) {
-      throw new RuntimeException("Could not compare reference.", e);
-    }
+    WindowedValue<KV<K, V>> second = coder.decode(inputWrapper, Coder.Context.NESTED);
+    return compare(first, second);
   }
 
   @Override
   public boolean supportsNormalizedKey() {
-    return true;
+    return false;
   }
 
   @Override
@@ -209,12 +193,18 @@ public boolean isNormalizedKeyPrefixOnly(int keyBytes) {
   }
 
   @Override
-  public void putNormalizedKey(KV<K, V> record, MemorySegment target, int offset, int numBytes) {
+  public void putNormalizedKey(
+      WindowedValue<KV<K, V>> record,
+      MemorySegment target,
+      int offset,
+      int numBytes) {
+
     buffer1.reset();
     try {
-      keyCoder.encode(record.getKey(), buffer1, Coder.Context.NESTED);
+      keyCoder.encode(record.getValue().getKey(), buffer1, Coder.Context.NESTED);
     } catch (IOException e) {
-      throw new RuntimeException("Could not serializer " + record + " using coder " + coder + ": " + e);
+      throw new RuntimeException(
+          "Could not serializer " + record + " using coder " + coder + ": " + e);
     }
     final byte[] data = buffer1.getBuffer();
     final int limit = offset + numBytes;
@@ -231,12 +221,16 @@ public void putNormalizedKey(KV<K, V> record, MemorySegment target, int offset,
   }
 
   @Override
-  public void writeWithKeyNormalization(KV<K, V> record, DataOutputView target) throws IOException {
+  public void writeWithKeyNormalization(
+      WindowedValue<KV<K, V>> record,
+      DataOutputView target) throws IOException {
     throw new UnsupportedOperationException();
   }
 
   @Override
-  public KV<K, V> readWithKeyDenormalization(KV<K, V> reuse, DataInputView source) throws IOException {
+  public WindowedValue<KV<K, V>> readWithKeyDenormalization(
+      WindowedValue<KV<K, V>> reuse,
+      DataInputView source) throws IOException {
     throw new UnsupportedOperationException();
   }
 
@@ -246,14 +240,14 @@ public boolean invertNormalizedKey() {
   }
 
   @Override
-  public TypeComparator<KV<K, V>> duplicate() {
+  public TypeComparator<WindowedValue<KV<K, V>>> duplicate() {
     return new KvCoderComperator<>(coder);
   }
 
   @Override
   public int extractKeys(Object record, Object[] target, int index) {
-    KV<K, V> kv = (KV<K, V>) record;
-    K k = kv.getKey();
+    WindowedValue<KV<K, V>> kv = (WindowedValue<KV<K, V>>) record;
+    K k = kv.getValue().getKey();
     target[index] = k;
     return 1;
   }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
index 74f3821dfb2f..ba53f640bb81 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvCoderTypeInformation.java
@@ -18,6 +18,7 @@
 package org.apache.beam.runners.flink.translation.types;
 
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.KV;
 
 import com.google.common.base.Preconditions;
@@ -31,27 +32,32 @@
 import java.util.List;
 
 /**
- * Flink {@link org.apache.flink.api.common.typeinfo.TypeInformation} for
- * Dataflow {@link org.apache.beam.sdk.coders.KvCoder}.
+ * Flink {@link TypeInformation} for {@link KvCoder}. This creates special comparator
+ * for {@link KV} that always compares on the key only.
  */
-public class KvCoderTypeInformation<K, V> extends CompositeType<KV<K, V>> {
+public class KvCoderTypeInformation<K, V> extends CompositeType<WindowedValue<KV<K, V>>> {
 
-  private KvCoder<K, V> coder;
+  private final WindowedValue.WindowedValueCoder<KV<K, V>> coder;
+//  private KvCoder<K, V> coder;
 
   // We don't have the Class, so we have to pass null here. What a shame...
-  private static Object DUMMY = new Object();
+  private static Object dummy = new Object();
 
   @SuppressWarnings("unchecked")
-  public KvCoderTypeInformation(KvCoder<K, V> coder) {
-    super(((Class<KV<K,V>>) DUMMY.getClass()));
+  public KvCoderTypeInformation(WindowedValue.WindowedValueCoder<KV<K, V>> coder) {
+    super((Class) dummy.getClass());
     this.coder = coder;
     Preconditions.checkNotNull(coder);
   }
 
   @Override
   @SuppressWarnings("unchecked")
-  public TypeComparator<KV<K, V>> createComparator(int[] logicalKeyFields, boolean[] orders, int logicalFieldOffset, ExecutionConfig config) {
-    return new KvCoderComperator((KvCoder) coder);
+  public TypeComparator<WindowedValue<KV<K, V>>> createComparator(
+      int[] logicalKeyFields,
+      boolean[] orders,
+      int logicalFieldOffset,
+      ExecutionConfig config) {
+    return new KvCoderComperator(coder);
   }
 
   @Override
@@ -71,7 +77,7 @@ public int getArity() {
 
   @Override
   @SuppressWarnings("unchecked")
-  public Class<KV<K, V>> getTypeClass() {
+  public Class<WindowedValue<KV<K, V>>> getTypeClass() {
     return privateGetTypeClass();
   }
 
@@ -87,7 +93,7 @@ public boolean isKeyType() {
 
   @Override
   @SuppressWarnings("unchecked")
-  public TypeSerializer<KV<K, V>> createSerializer(ExecutionConfig config) {
+  public TypeSerializer<WindowedValue<KV<K, V>>> createSerializer(ExecutionConfig config) {
     return new CoderTypeSerializer<>(coder);
   }
 
@@ -98,8 +104,12 @@ public int getTotalFields() {
 
   @Override
   public boolean equals(Object o) {
-    if (this == o) return true;
-    if (o == null || getClass() != o.getClass()) return false;
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
 
     KvCoderTypeInformation that = (KvCoderTypeInformation) o;
 
@@ -122,10 +132,11 @@ public String toString() {
   @Override
   @SuppressWarnings("unchecked")
   public <X> TypeInformation<X> getTypeAt(int pos) {
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
     if (pos == 0) {
-      return (TypeInformation<X>) new CoderTypeInformation<>(coder.getKeyCoder());
+      return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getKeyCoder());
     } else if (pos == 1) {
-      return (TypeInformation<X>) new CoderTypeInformation<>(coder.getValueCoder());
+      return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getValueCoder());
     } else {
       throw new RuntimeException("Invalid field position " + pos);
     }
@@ -134,11 +145,12 @@ public <X> TypeInformation<X> getTypeAt(int pos) {
   @Override
   @SuppressWarnings("unchecked")
   public <X> TypeInformation<X> getTypeAt(String fieldExpression) {
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
     switch (fieldExpression) {
       case "key":
-        return (TypeInformation<X>) new CoderTypeInformation<>(coder.getKeyCoder());
+        return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getKeyCoder());
       case "value":
-        return (TypeInformation<X>) new CoderTypeInformation<>(coder.getValueCoder());
+        return (TypeInformation<X>) new CoderTypeInformation<>(kvCoder.getValueCoder());
       default:
         throw new UnsupportedOperationException("Only KvCoder has fields.");
     }
@@ -162,17 +174,24 @@ public int getFieldIndex(String fieldName) {
   }
 
   @Override
-  public void getFlatFields(String fieldExpression, int offset, List<FlatFieldDescriptor> result) {
-      CoderTypeInformation keyTypeInfo = new CoderTypeInformation<>(coder.getKeyCoder());
+  public void getFlatFields(
+      String fieldExpression,
+      int offset,
+      List<FlatFieldDescriptor> result) {
+    KvCoder<K, V> kvCoder = (KvCoder<K, V>) coder.getValueCoder();
+
+    CoderTypeInformation keyTypeInfo =
+        new CoderTypeInformation<>(kvCoder.getKeyCoder());
       result.add(new FlatFieldDescriptor(0, keyTypeInfo));
   }
 
   @Override
-  protected TypeComparatorBuilder<KV<K, V>> createTypeComparatorBuilder() {
+  protected TypeComparatorBuilder<WindowedValue<KV<K, V>>> createTypeComparatorBuilder() {
     return new KvCoderTypeComparatorBuilder();
   }
 
-  private class KvCoderTypeComparatorBuilder implements TypeComparatorBuilder<KV<K, V>> {
+  private class KvCoderTypeComparatorBuilder
+      implements TypeComparatorBuilder<WindowedValue<KV<K, V>>> {
 
     @Override
     public void initializeTypeComparatorBuilder(int size) {}
@@ -181,7 +200,7 @@ public void initializeTypeComparatorBuilder(int size) {}
     public void addComparatorField(int fieldId, TypeComparator<?> comparator) {}
 
     @Override
-    public TypeComparator<KV<K, V>> createTypeComparator(ExecutionConfig config) {
+    public TypeComparator<WindowedValue<KV<K, V>>> createTypeComparator(ExecutionConfig config) {
       return new KvCoderComperator<>(coder);
     }
   }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java
deleted file mode 100644
index 7b48208845fd..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/VoidCoderTypeSerializer.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-import org.apache.flink.core.memory.DataInputView;
-import org.apache.flink.core.memory.DataOutputView;
-
-import java.io.IOException;
-
-/**
- * Special Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for
- * {@link org.apache.beam.sdk.coders.VoidCoder}. We need this because Flink does not
- * allow returning {@code null} from an input reader. We return a {@link VoidValue} instead
- * that behaves like a {@code null}, hopefully.
- */
-public class VoidCoderTypeSerializer extends TypeSerializer<VoidCoderTypeSerializer.VoidValue> {
-
-  @Override
-  public boolean isImmutableType() {
-    return false;
-  }
-
-  @Override
-  public VoidCoderTypeSerializer duplicate() {
-    return this;
-  }
-
-  @Override
-  public VoidValue createInstance() {
-    return VoidValue.INSTANCE;
-  }
-
-  @Override
-  public VoidValue copy(VoidValue from) {
-    return from;
-  }
-
-  @Override
-  public VoidValue copy(VoidValue from, VoidValue reuse) {
-    return from;
-  }
-
-  @Override
-  public int getLength() {
-    return 0;
-  }
-
-  @Override
-  public void serialize(VoidValue record, DataOutputView target) throws IOException {
-    target.writeByte(1);
-  }
-
-  @Override
-  public VoidValue deserialize(DataInputView source) throws IOException {
-    source.readByte();
-    return VoidValue.INSTANCE;
-  }
-
-  @Override
-  public VoidValue deserialize(VoidValue reuse, DataInputView source) throws IOException {
-    return deserialize(source);
-  }
-
-  @Override
-  public void copy(DataInputView source, DataOutputView target) throws IOException {
-    source.readByte();
-    target.writeByte(1);
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (obj instanceof VoidCoderTypeSerializer) {
-      VoidCoderTypeSerializer other = (VoidCoderTypeSerializer) obj;
-      return other.canEqual(this);
-    } else {
-      return false;
-    }
-  }
-
-  @Override
-  public boolean canEqual(Object obj) {
-    return obj instanceof VoidCoderTypeSerializer;
-  }
-
-  @Override
-  public int hashCode() {
-    return 0;
-  }
-
-  public static class VoidValue {
-    private VoidValue() {}
-    
-    public static VoidValue INSTANCE = new VoidValue();
-  }
-
-}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java
deleted file mode 100644
index e5567d3ea3b2..000000000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/CombineFnAggregatorWrapper.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
-
-import com.google.common.collect.Lists;
-
-import org.apache.flink.api.common.accumulators.Accumulator;
-
-import java.io.Serializable;
-
-/**
- * Wrapper that wraps a {@link org.apache.beam.sdk.transforms.Combine.CombineFn}
- * in a Flink {@link org.apache.flink.api.common.accumulators.Accumulator} for using
- * the combine function as an aggregator in a {@link org.apache.beam.sdk.transforms.ParDo}
- * operation.
- */
-public class CombineFnAggregatorWrapper<AI, AA, AR> implements Aggregator<AI, AR>, Accumulator<AI, Serializable> {
-  
-  private AA aa;
-  private Combine.CombineFn<? super AI, AA, AR> combiner;
-
-  public CombineFnAggregatorWrapper() {
-  }
-
-  public CombineFnAggregatorWrapper(Combine.CombineFn<? super AI, AA, AR> combiner) {
-    this.combiner = combiner;
-    this.aa = combiner.createAccumulator();
-  }
-
-  @Override
-  public void add(AI value) {
-    combiner.addInput(aa, value);
-  }
-
-  @Override
-  public Serializable getLocalValue() {
-    return (Serializable) combiner.extractOutput(aa);
-  }
-
-  @Override
-  public void resetLocal() {
-    aa = combiner.createAccumulator();
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void merge(Accumulator<AI, Serializable> other) {
-    aa = combiner.mergeAccumulators(Lists.newArrayList(aa, ((CombineFnAggregatorWrapper<AI, AA, AR>)other).aa));
-  }
-
-  @Override
-  public Accumulator<AI, Serializable> clone() {
-    // copy it by merging
-    AA aaCopy = combiner.mergeAccumulators(Lists.newArrayList(aa));
-    CombineFnAggregatorWrapper<AI, AA, AR> result = new
-        CombineFnAggregatorWrapper<>(combiner);
-    result.aa = aaCopy;
-    return result;
-  }
-
-  @Override
-  public void addValue(AI value) {
-    add(value);
-  }
-
-  @Override
-  public String getName() {
-    return "CombineFn: " + combiner.toString();
-  }
-
-  @Override
-  public Combine.CombineFn getCombineFn() {
-    return combiner;
-  }
-
-}
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
index eb32fa2fd74a..82d3fb8ffae3 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
@@ -33,20 +33,21 @@
  * the function as an aggregator in a {@link org.apache.beam.sdk.transforms.ParDo}
  * operation.
  */
-public class SerializableFnAggregatorWrapper<AI, AO> implements Aggregator<AI, AO>, Accumulator<AI, Serializable> {
+public class SerializableFnAggregatorWrapper<InputT, OutputT>
+    implements Aggregator<InputT, OutputT>, Accumulator<InputT, Serializable> {
 
-  private AO aa;
-  private Combine.CombineFn<AI, ?, AO> combiner;
+  private OutputT aa;
+  private Combine.CombineFn<InputT, ?, OutputT> combiner;
 
-  public SerializableFnAggregatorWrapper(Combine.CombineFn<AI, ?, AO> combiner) {
+  public SerializableFnAggregatorWrapper(Combine.CombineFn<InputT, ?, OutputT> combiner) {
     this.combiner = combiner;
     resetLocal();
   }
-  
+
   @Override
   @SuppressWarnings("unchecked")
-  public void add(AI value) {
-    this.aa = combiner.apply(ImmutableList.of((AI) aa, value));
+  public void add(InputT value) {
+    this.aa = combiner.apply(ImmutableList.of((InputT) aa, value));
   }
 
   @Override
@@ -56,17 +57,17 @@ public Serializable getLocalValue() {
 
   @Override
   public void resetLocal() {
-    this.aa = combiner.apply(ImmutableList.<AI>of());
+    this.aa = combiner.apply(ImmutableList.<InputT>of());
   }
 
   @Override
   @SuppressWarnings("unchecked")
-  public void merge(Accumulator<AI, Serializable> other) {
-    this.aa = combiner.apply(ImmutableList.of((AI) aa, (AI) other.getLocalValue()));
+  public void merge(Accumulator<InputT, Serializable> other) {
+    this.aa = combiner.apply(ImmutableList.of((InputT) aa, (InputT) other.getLocalValue()));
   }
 
   @Override
-  public void addValue(AI value) {
+  public void addValue(InputT value) {
     add(value);
   }
 
@@ -76,15 +77,15 @@ public String getName() {
   }
 
   @Override
-  public Combine.CombineFn<AI, ?, AO> getCombineFn() {
+  public Combine.CombineFn<InputT, ?, OutputT> getCombineFn() {
     return combiner;
   }
 
   @Override
-  public Accumulator<AI, Serializable> clone() {
+  public Accumulator<InputT, Serializable> clone() {
     // copy it by merging
-    AO resultCopy = combiner.apply(Lists.newArrayList((AI) aa));
-    SerializableFnAggregatorWrapper<AI, AO> result = new
+    OutputT resultCopy = combiner.apply(Lists.newArrayList((InputT) aa));
+    SerializableFnAggregatorWrapper<InputT, OutputT> result = new
         SerializableFnAggregatorWrapper<>(combiner);
 
     result.aa = resultCopy;
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
index 53e544d9e8fc..c0a71329fe3f 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SinkOutputFormat.java
@@ -22,6 +22,7 @@
 import org.apache.beam.sdk.io.Sink;
 import org.apache.beam.sdk.io.Write;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.WindowedValue;
 
 import org.apache.flink.api.common.io.OutputFormat;
 import org.apache.flink.configuration.Configuration;
@@ -31,10 +32,11 @@
 import java.lang.reflect.Field;
 
 /**
- * Wrapper class to use generic Write.Bound transforms as sinks.
+ * Wrapper for executing a {@link Sink} on Flink as an {@link OutputFormat}.
+ *
  * @param <T> The type of the incoming records.
  */
-public class SinkOutputFormat<T> implements OutputFormat<T> {
+public class SinkOutputFormat<T> implements OutputFormat<WindowedValue<T>> {
 
   private final Sink<T> sink;
 
@@ -75,9 +77,9 @@ public void open(int taskNumber, int numTasks) throws IOException {
   }
 
   @Override
-  public void writeRecord(T record) throws IOException {
+  public void writeRecord(WindowedValue<T> record) throws IOException {
     try {
-      writer.write(record);
+      writer.write(record.getValue());
     } catch (Exception e) {
       throw new IOException("Couldn't write record.", e);
     }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
index debd1a14d525..1d06b1ac2fc9 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
@@ -21,12 +21,16 @@
 import org.apache.beam.sdk.io.BoundedSource;
 import org.apache.beam.sdk.io.Source;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
 
 import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
 import org.apache.flink.api.common.io.InputFormat;
 import org.apache.flink.api.common.io.statistics.BaseStatistics;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.io.InputSplitAssigner;
+import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -35,10 +39,10 @@
 
 
 /**
- * A Flink {@link org.apache.flink.api.common.io.InputFormat} that wraps a
- * Dataflow {@link org.apache.beam.sdk.io.Source}.
+ * Wrapper for executing a {@link Source} as a Flink {@link InputFormat}.
  */
-public class SourceInputFormat<T> implements InputFormat<T, SourceInputSplit<T>> {
+public class SourceInputFormat<T>
+    implements InputFormat<WindowedValue<T>, SourceInputSplit<T>> {
   private static final Logger LOG = LoggerFactory.getLogger(SourceInputFormat.class);
 
   private final BoundedSource<T> initialSource;
@@ -122,12 +126,16 @@ public boolean reachedEnd() throws IOException {
   }
 
   @Override
-  public T nextRecord(T t) throws IOException {
+  public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException {
     if (inputAvailable) {
       final T current = reader.getCurrent();
+      final Instant timestamp = reader.getCurrentTimestamp();
       // advance reader to have a record ready next time
       inputAvailable = reader.advance();
-      return current;
+      return WindowedValue.of(
+          current,
+          timestamp,
+          GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
     }
 
     return null;
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
index 3bf566bce762..6b69d547cf12 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/FlinkGroupByKeyWrapper.java
@@ -18,7 +18,6 @@
 package org.apache.beam.runners.flink.translation.wrappers.streaming;
 
 import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.types.VoidCoderTypeSerializer;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.VoidCoder;
@@ -54,7 +53,7 @@ public static <K, V> KeyedStream<WindowedValue<KV<K, V>>, K> groupStreamByKey(Da
 
           @Override
           public K getKey(WindowedValue<KV<K, V>> value) throws Exception {
-            return isKeyVoid ? (K) VoidCoderTypeSerializer.VoidValue.INSTANCE :
+            return isKeyVoid ? (K) VoidValue.INSTANCE :
                 value.getValue().getKey();
           }
 
@@ -64,4 +63,11 @@ public TypeInformation<K> getProducedType() {
           }
         });
   }
+
+  // special type to return as key for null key
+  public static class VoidValue {
+    private VoidValue() {}
+
+    public static VoidValue INSTANCE = new VoidValue();
+  }
 }
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
index d6aff7d7a4ee..8cd8351021b4 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/FlinkStreamingCreateFunction.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
 
-import org.apache.beam.runners.flink.translation.types.VoidCoderTypeSerializer;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
@@ -47,17 +46,11 @@ public FlinkStreamingCreateFunction(List<byte[]> elements, Coder<OUT> coder) {
   @Override
   public void flatMap(IN value, Collector<WindowedValue<OUT>> out) throws Exception {
 
-    @SuppressWarnings("unchecked")
-    OUT voidValue = (OUT) VoidCoderTypeSerializer.VoidValue.INSTANCE;
     for (byte[] element : elements) {
       ByteArrayInputStream bai = new ByteArrayInputStream(element);
       OUT outValue = coder.decode(bai, Coder.Context.OUTER);
 
-      if (outValue == null) {
-        out.collect(WindowedValue.of(voidValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
-      } else {
-        out.collect(WindowedValue.of(outValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
-      }
+      out.collect(WindowedValue.of(outValue, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING));
     }
 
     out.close();
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java
deleted file mode 100644
index 113fee0881de..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/AvroITCase.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.AvroCoder;
-import org.apache.beam.sdk.io.AvroIO;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-
-public class AvroITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-  protected String tmpPath;
-
-  public AvroITCase(){
-  }
-
-  static final String[] EXPECTED_RESULT = new String[] {
-      "Joe red 3",
-      "Mary blue 4",
-      "Mark green 1",
-      "Julia purple 5"
-  };
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-    tmpPath = getTempDirPath("tmp");
-
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-    runProgram(tmpPath, resultPath);
-  }
-
-  private static void runProgram(String tmpPath, String resultPath) {
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    p
-      .apply(Create.of(
-          new User("Joe", 3, "red"),
-          new User("Mary", 4, "blue"),
-          new User("Mark", 1, "green"),
-          new User("Julia", 5, "purple"))
-        .withCoder(AvroCoder.of(User.class)))
-
-      .apply(AvroIO.Write.to(tmpPath)
-        .withSchema(User.class));
-
-    p.run();
-
-    p = FlinkTestPipeline.createForBatch();
-
-    p
-      .apply(AvroIO.Read.from(tmpPath).withSchema(User.class).withoutValidation())
-
-        .apply(ParDo.of(new DoFn<User, String>() {
-          @Override
-          public void processElement(ProcessContext c) throws Exception {
-            User u = c.element();
-            String result = u.getName() + " " + u.getFavoriteColor() + " " + u.getFavoriteNumber();
-            c.output(result);
-          }
-        }))
-
-      .apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-
-  private static class User {
-
-    private String name;
-    private int favoriteNumber;
-    private String favoriteColor;
-
-    public User() {}
-
-    public User(String name, int favoriteNumber, String favoriteColor) {
-      this.name = name;
-      this.favoriteNumber = favoriteNumber;
-      this.favoriteColor = favoriteColor;
-    }
-
-    public String getName() {
-      return name;
-    }
-
-    public String getFavoriteColor() {
-      return favoriteColor;
-    }
-
-    public int getFavoriteNumber() {
-      return favoriteNumber;
-    }
-  }
-
-}
-
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java
deleted file mode 100644
index ac0a3d7d4d67..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlattenizeITCase.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionList;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-public class FlattenizeITCase extends JavaProgramTestBase {
-
-  private String resultPath;
-  private String resultPath2;
-
-  private static final String[] words = {"hello", "this", "is", "a", "DataSet!"};
-  private static final String[] words2 = {"hello", "this", "is", "another", "DataSet!"};
-  private static final String[] words3 = {"hello", "this", "is", "yet", "another", "DataSet!"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-    resultPath2 = getTempDirPath("result2");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    String join = Joiner.on('\n').join(words);
-    String join2 = Joiner.on('\n').join(words2);
-    String join3 = Joiner.on('\n').join(words3);
-    compareResultsByLinesInMemory(join + "\n" + join2, resultPath);
-    compareResultsByLinesInMemory(join + "\n" + join2 + "\n" + join3, resultPath2);
-  }
-
-
-  @Override
-  protected void testProgram() throws Exception {
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<String> p1 = p.apply(Create.of(words));
-    PCollection<String> p2 = p.apply(Create.of(words2));
-
-    PCollectionList<String> list = PCollectionList.of(p1).and(p2);
-
-    list.apply(Flatten.<String>pCollections()).apply(TextIO.Write.to(resultPath));
-
-    PCollection<String> p3 = p.apply(Create.of(words3));
-
-    PCollectionList<String> list2 = list.and(p3);
-
-    list2.apply(Flatten.<String>pCollections()).apply(TextIO.Write.to(resultPath2));
-
-    p.run();
-  }
-
-}
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
index f015a6680568..edde925c330c 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
@@ -60,7 +60,7 @@ public static FlinkTestPipeline createForStreaming() {
    * @return The Test Pipeline.
    */
   private static FlinkTestPipeline create(boolean streaming) {
-    FlinkPipelineRunner flinkRunner = FlinkPipelineRunner.createForTest(streaming);
+    TestFlinkPipelineRunner flinkRunner = TestFlinkPipelineRunner.create(streaming);
     return new FlinkTestPipeline(flinkRunner, flinkRunner.getPipelineOptions());
   }
 
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java
deleted file mode 100644
index 47685b6be6f3..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/JoinExamplesITCase.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.runners.flink.util.JoinExamples;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-/**
- * Unfortunately we need to copy the code from the Dataflow SDK because it is not public there.
- */
-public class JoinExamplesITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public JoinExamplesITCase(){
-  }
-
-  private static final TableRow row1 = new TableRow()
-      .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
-      .set("Actor1Name", "BANGKOK").set("SOURCEURL", "http://cnn.com");
-  private static final TableRow row2 = new TableRow()
-      .set("ActionGeo_CountryCode", "VM").set("SQLDATE", "20141212")
-      .set("Actor1Name", "LAOS").set("SOURCEURL", "http://www.chicagotribune.com");
-  private static final TableRow row3 = new TableRow()
-      .set("ActionGeo_CountryCode", "BE").set("SQLDATE", "20141213")
-      .set("Actor1Name", "AFGHANISTAN").set("SOURCEURL", "http://cnn.com");
-  static final TableRow[] EVENTS = new TableRow[] {
-      row1, row2, row3
-  };
-  static final List<TableRow> EVENT_ARRAY = Arrays.asList(EVENTS);
-
-  private static final TableRow cc1 = new TableRow()
-      .set("FIPSCC", "VM").set("HumanName", "Vietnam");
-  private static final TableRow cc2 = new TableRow()
-      .set("FIPSCC", "BE").set("HumanName", "Belgium");
-  static final TableRow[] CCS = new TableRow[] {
-      cc1, cc2
-  };
-  static final List<TableRow> CC_ARRAY = Arrays.asList(CCS);
-
-  static final String[] JOINED_EVENTS = new String[] {
-      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: LAOS, "
-          + "url: http://www.chicagotribune.com",
-      "Country code: VM, Country name: Vietnam, Event info: Date: 20141212, Actor1: BANGKOK, "
-          + "url: http://cnn.com",
-      "Country code: BE, Country name: Belgium, Event info: Date: 20141213, Actor1: AFGHANISTAN, "
-          + "url: http://cnn.com"
-  };
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(JOINED_EVENTS), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<TableRow> input1 = p.apply(Create.of(EVENT_ARRAY));
-    PCollection<TableRow> input2 = p.apply(Create.of(CC_ARRAY));
-
-    PCollection<String> output = JoinExamples.joinEvents(input1, input2);
-
-    output.apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-}
-
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java
deleted file mode 100644
index 4d66fa421c5e..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/MaybeEmptyTestITCase.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.VoidCoder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.io.Serializable;
-
-public class MaybeEmptyTestITCase extends JavaProgramTestBase implements Serializable {
-
-  protected String resultPath;
-
-  protected final String expected = "test";
-
-  public MaybeEmptyTestITCase() {
-  }
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(expected, resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    p.apply(Create.of((Void) null)).setCoder(VoidCoder.of())
-        .apply(ParDo.of(
-            new DoFn<Void, String>() {
-              @Override
-              public void processElement(DoFn<Void, String>.ProcessContext c) {
-                c.output(expected);
-              }
-            })).apply(TextIO.Write.to(resultPath));
-    p.run();
-  }
-
-}
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java
deleted file mode 100644
index a2ef4e29f403..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ParDoMultiOutputITCase.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.TupleTagList;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.io.Serializable;
-
-public class ParDoMultiOutputITCase extends JavaProgramTestBase implements Serializable {
-
-  private String resultPath;
-
-  private static String[] expectedWords = {"MAAA", "MAAFOOO"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on("\n").join(expectedWords), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<String> words = p.apply(Create.of("Hello", "Whatupmyman", "hey", "SPECIALthere", "MAAA", "MAAFOOO"));
-
-    // Select words whose length is below a cut off,
-    // plus the lengths of words that are above the cut off.
-    // Also select words starting with "MARKER".
-    final int wordLengthCutOff = 3;
-    // Create tags to use for the main and side outputs.
-    final TupleTag<String> wordsBelowCutOffTag = new TupleTag<String>(){};
-    final TupleTag<Integer> wordLengthsAboveCutOffTag = new TupleTag<Integer>(){};
-    final TupleTag<String> markedWordsTag = new TupleTag<String>(){};
-
-    PCollectionTuple results =
-        words.apply(ParDo
-            .withOutputTags(wordsBelowCutOffTag, TupleTagList.of(wordLengthsAboveCutOffTag)
-                .and(markedWordsTag))
-            .of(new DoFn<String, String>() {
-              final TupleTag<String> specialWordsTag = new TupleTag<String>() {
-              };
-
-              public void processElement(ProcessContext c) {
-                String word = c.element();
-                if (word.length() <= wordLengthCutOff) {
-                  c.output(word);
-                } else {
-                  c.sideOutput(wordLengthsAboveCutOffTag, word.length());
-                }
-                if (word.startsWith("MAA")) {
-                  c.sideOutput(markedWordsTag, word);
-                }
-
-                if (word.startsWith("SPECIAL")) {
-                  c.sideOutput(specialWordsTag, word);
-                }
-              }
-            }));
-
-    // Extract the PCollection results, by tag.
-    PCollection<String> wordsBelowCutOff = results.get(wordsBelowCutOffTag);
-    PCollection<Integer> wordLengthsAboveCutOff = results.get
-        (wordLengthsAboveCutOffTag);
-    PCollection<String> markedWords = results.get(markedWordsTag);
-
-    markedWords.apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-}
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
index 66c959eea90c..bb79b270945c 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
@@ -28,6 +28,9 @@
 
 import org.apache.flink.test.util.JavaProgramTestBase;
 
+import java.io.File;
+import java.net.URI;
+
 /**
  * Reads from a bounded source in batch execution.
  */
@@ -44,6 +47,13 @@ public ReadSourceITCase(){
   @Override
   protected void preSubmit() throws Exception {
     resultPath = getTempDirPath("result");
+
+    // need to create the dir, otherwise Beam sinks don't
+    // work for these tests
+
+    if (!new File(new URI(resultPath)).mkdirs()) {
+      throw new RuntimeException("Could not create output dir.");
+    }
   }
 
   @Override
@@ -56,7 +66,7 @@ protected void testProgram() throws Exception {
     runProgram(resultPath);
   }
 
-  private static void runProgram(String resultPath) {
+  private static void runProgram(String resultPath) throws Exception {
 
     Pipeline p = FlinkTestPipeline.createForBatch();
 
@@ -69,7 +79,7 @@ public void processElement(ProcessContext c) throws Exception {
           }
         }));
 
-    result.apply(TextIO.Write.to(resultPath));
+    result.apply(TextIO.Write.to(new URI(resultPath).getPath() + "/part"));
 
     p.run();
   }
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java
deleted file mode 100644
index 471d3262a36c..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesEmptyITCase.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.RemoveDuplicates;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Collections;
-import java.util.List;
-
-
-public class RemoveDuplicatesEmptyITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public RemoveDuplicatesEmptyITCase(){
-  }
-
-  static final String[] EXPECTED_RESULT = new String[] {};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    List<String> strings = Collections.emptyList();
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<String> input =
-        p.apply(Create.of(strings))
-            .setCoder(StringUtf8Coder.of());
-
-    PCollection<String> output =
-        input.apply(RemoveDuplicates.<String>create());
-
-    output.apply(TextIO.Write.to(resultPath));
-    p.run();
-  }
-}
-
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java
deleted file mode 100644
index 0544f20eb310..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/RemoveDuplicatesITCase.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.RemoveDuplicates;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-public class RemoveDuplicatesITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public RemoveDuplicatesITCase(){
-  }
-
-  static final String[] EXPECTED_RESULT = new String[] {
-      "k1", "k5", "k2", "k3"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    List<String> strings = Arrays.asList("k1", "k5", "k5", "k2", "k1", "k2", "k3");
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<String> input =
-        p.apply(Create.of(strings))
-            .setCoder(StringUtf8Coder.of());
-
-    PCollection<String> output =
-        input.apply(RemoveDuplicates.<String>create());
-
-    output.apply(TextIO.Write.to(resultPath));
-    p.run();
-  }
-}
-
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java
deleted file mode 100644
index 2c7c65e8af3d..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/SideInputITCase.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.values.PCollectionView;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.io.Serializable;
-
-public class SideInputITCase extends JavaProgramTestBase implements Serializable {
-
-  private static final String expected = "Hello!";
-
-  protected String resultPath;
-
-  @Override
-  protected void testProgram() throws Exception {
-
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-
-    final PCollectionView<String> sidesInput = p
-        .apply(Create.of(expected))
-        .apply(View.<String>asSingleton());
-
-    p.apply(Create.of("bli"))
-        .apply(ParDo.of(new DoFn<String, String>() {
-          @Override
-          public void processElement(ProcessContext c) throws Exception {
-            String s = c.sideInput(sidesInput);
-            c.output(s);
-          }
-        }).withSideInputs(sidesInput)).apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(expected, resultPath);
-  }
-}
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java
deleted file mode 100644
index 547f3c3a4660..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/TfIdfITCase.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.examples.complete.TfIdf;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringDelegateCoder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.Keys;
-import org.apache.beam.sdk.transforms.RemoveDuplicates;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.net.URI;
-
-
-public class TfIdfITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public TfIdfITCase(){
-  }
-
-  static final String[] EXPECTED_RESULT = new String[] {
-      "a", "m", "n", "b", "c", "d"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline pipeline = FlinkTestPipeline.createForBatch();
-
-    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
-
-    PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf = pipeline
-        .apply(Create.of(
-            KV.of(new URI("x"), "a b c d"),
-            KV.of(new URI("y"), "a b c"),
-            KV.of(new URI("z"), "a m n")))
-        .apply(new TfIdf.ComputeTfIdf());
-
-    PCollection<String> words = wordToUriAndTfIdf
-        .apply(Keys.<String>create())
-        .apply(RemoveDuplicates.<String>create());
-
-    words.apply(TextIO.Write.to(resultPath));
-
-    pipeline.run();
-  }
-}
-
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java
deleted file mode 100644
index 3254e7885db8..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountITCase.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.examples.WordCount;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.values.PCollection;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-public class WordCountITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public WordCountITCase(){
-  }
-
-  static final String[] WORDS_ARRAY = new String[] {
-      "hi there", "hi", "hi sue bob",
-      "hi sue", "", "bob hi"};
-
-  static final List<String> WORDS = Arrays.asList(WORDS_ARRAY);
-
-  static final String[] COUNTS_ARRAY = new String[] {
-      "hi: 5", "there: 1", "sue: 2", "bob: 2"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(COUNTS_ARRAY), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<String> input = p.apply(Create.of(WORDS)).setCoder(StringUtf8Coder.of());
-
-    input
-        .apply(new WordCount.CountWords())
-        .apply(MapElements.via(new WordCount.FormatAsTextFn()))
-        .apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-}
-
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java
deleted file mode 100644
index 6570e7df5508..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin2ITCase.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-
-public class WordCountJoin2ITCase extends JavaProgramTestBase {
-
-  static final String[] WORDS_1 = new String[] {
-      "hi there", "hi", "hi sue bob",
-      "hi sue", "", "bob hi"};
-
-  static final String[] WORDS_2 = new String[] {
-      "hi tim", "beauty", "hooray sue bob",
-      "hi there", "", "please say hi"};
-
-  static final String[] RESULTS = new String[] {
-      "beauty -> Tag1: Tag2: 1",
-      "bob -> Tag1: 2 Tag2: 1",
-      "hi -> Tag1: 5 Tag2: 3",
-      "hooray -> Tag1: Tag2: 1",
-      "please -> Tag1: Tag2: 1",
-      "say -> Tag1: Tag2: 1",
-      "sue -> Tag1: 2 Tag2: 1",
-      "there -> Tag1: 1 Tag2: 1",
-      "tim -> Tag1: Tag2: 1"
-  };
-
-  static final TupleTag<Long> tag1 = new TupleTag<>("Tag1");
-  static final TupleTag<Long> tag2 = new TupleTag<>("Tag2");
-
-  protected String resultPath;
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(RESULTS), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    /* Create two PCollections and join them */
-    PCollection<KV<String,Long>> occurences1 = p.apply(Create.of(WORDS_1))
-        .apply(ParDo.of(new ExtractWordsFn()))
-        .apply(Count.<String>perElement());
-
-    PCollection<KV<String,Long>> occurences2 = p.apply(Create.of(WORDS_2))
-        .apply(ParDo.of(new ExtractWordsFn()))
-        .apply(Count.<String>perElement());
-
-    /* CoGroup the two collections */
-    PCollection<KV<String, CoGbkResult>> mergedOccurences = KeyedPCollectionTuple
-        .of(tag1, occurences1)
-        .and(tag2, occurences2)
-        .apply(CoGroupByKey.<String>create());
-
-    /* Format output */
-    mergedOccurences.apply(ParDo.of(new FormatCountsFn()))
-        .apply(TextIO.Write.named("test").to(resultPath));
-
-    p.run();
-  }
-
-
-  static class ExtractWordsFn extends DoFn<String, String> {
-
-    @Override
-    public void startBundle(Context c) {
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  static class FormatCountsFn extends DoFn<KV<String, CoGbkResult>, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      CoGbkResult value = c.element().getValue();
-      String key = c.element().getKey();
-      String countTag1 = tag1.getId() + ": ";
-      String countTag2 = tag2.getId() + ": ";
-      for (Long count : value.getAll(tag1)) {
-        countTag1 += count + " ";
-      }
-      for (Long count : value.getAll(tag2)) {
-        countTag2 += count;
-      }
-      c.output(key + " -> " + countTag1 + countTag2);
-    }
-  }
-
-
-}
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java
deleted file mode 100644
index 60dc74af90b6..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WordCountJoin3ITCase.java
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.common.base.Joiner;
-
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-
-public class WordCountJoin3ITCase extends JavaProgramTestBase {
-
-  static final String[] WORDS_1 = new String[] {
-      "hi there", "hi", "hi sue bob",
-      "hi sue", "", "bob hi"};
-
-  static final String[] WORDS_2 = new String[] {
-      "hi tim", "beauty", "hooray sue bob",
-      "hi there", "", "please say hi"};
-
-  static final String[] WORDS_3 = new String[] {
-      "hi stephan", "beauty", "hooray big fabian",
-      "hi yo", "", "please say hi"};
-
-  static final String[] RESULTS = new String[] {
-      "beauty -> Tag1: Tag2: 1 Tag3: 1",
-      "bob -> Tag1: 2 Tag2: 1 Tag3: ",
-      "hi -> Tag1: 5 Tag2: 3 Tag3: 3",
-      "hooray -> Tag1: Tag2: 1 Tag3: 1",
-      "please -> Tag1: Tag2: 1 Tag3: 1",
-      "say -> Tag1: Tag2: 1 Tag3: 1",
-      "sue -> Tag1: 2 Tag2: 1 Tag3: ",
-      "there -> Tag1: 1 Tag2: 1 Tag3: ",
-      "tim -> Tag1: Tag2: 1 Tag3: ",
-      "stephan -> Tag1: Tag2: Tag3: 1",
-      "yo -> Tag1: Tag2: Tag3: 1",
-      "fabian -> Tag1: Tag2: Tag3: 1",
-      "big -> Tag1: Tag2: Tag3: 1"
-  };
-
-  static final TupleTag<Long> tag1 = new TupleTag<>("Tag1");
-  static final TupleTag<Long> tag2 = new TupleTag<>("Tag2");
-  static final TupleTag<Long> tag3 = new TupleTag<>("Tag3");
-
-  protected String resultPath;
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(RESULTS), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    /* Create two PCollections and join them */
-    PCollection<KV<String,Long>> occurences1 = p.apply(Create.of(WORDS_1))
-        .apply(ParDo.of(new ExtractWordsFn()))
-        .apply(Count.<String>perElement());
-
-    PCollection<KV<String,Long>> occurences2 = p.apply(Create.of(WORDS_2))
-        .apply(ParDo.of(new ExtractWordsFn()))
-        .apply(Count.<String>perElement());
-
-    PCollection<KV<String,Long>> occurences3 = p.apply(Create.of(WORDS_3))
-        .apply(ParDo.of(new ExtractWordsFn()))
-        .apply(Count.<String>perElement());
-
-    /* CoGroup the two collections */
-    PCollection<KV<String, CoGbkResult>> mergedOccurences = KeyedPCollectionTuple
-        .of(tag1, occurences1)
-        .and(tag2, occurences2)
-        .and(tag3, occurences3)
-        .apply(CoGroupByKey.<String>create());
-
-    /* Format output */
-    mergedOccurences.apply(ParDo.of(new FormatCountsFn()))
-        .apply(TextIO.Write.named("test").to(resultPath));
-
-    p.run();
-  }
-
-
-  static class ExtractWordsFn extends DoFn<String, String> {
-
-    @Override
-    public void startBundle(Context c) {
-    }
-
-    @Override
-    public void processElement(ProcessContext c) {
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  static class FormatCountsFn extends DoFn<KV<String, CoGbkResult>, String> {
-    @Override
-    public void processElement(ProcessContext c) {
-      CoGbkResult value = c.element().getValue();
-      String key = c.element().getKey();
-      String countTag1 = tag1.getId() + ": ";
-      String countTag2 = tag2.getId() + ": ";
-      String countTag3 = tag3.getId() + ": ";
-      for (Long count : value.getAll(tag1)) {
-        countTag1 += count + " ";
-      }
-      for (Long count : value.getAll(tag2)) {
-        countTag2 += count + " ";
-      }
-      for (Long count : value.getAll(tag3)) {
-        countTag3 += count;
-      }
-      c.output(key + " -> " + countTag1 + countTag2 + countTag3);
-    }
-  }
-
-}
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
index c76af657b9ab..3e5a17dbdfea 100644
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
+++ b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupAlsoByWindowTest.java
@@ -44,6 +44,7 @@
 import org.apache.flink.streaming.api.watermark.Watermark;
 import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
 import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
+import org.apache.flink.streaming.util.StreamingMultipleProgramsTestBase;
 import org.apache.flink.streaming.util.TestHarnessUtil;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -53,7 +54,7 @@
 import java.util.Comparator;
 import java.util.concurrent.ConcurrentLinkedQueue;
 
-public class GroupAlsoByWindowTest {
+public class GroupAlsoByWindowTest extends StreamingMultipleProgramsTestBase {
 
   private final Combine.CombineFn combiner = new Sum.SumIntegerFn();
 
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java
deleted file mode 100644
index e6b7f64f69a1..000000000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/util/JoinExamples.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.util;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.BigQueryIO;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.Validation;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-
-import com.google.api.services.bigquery.model.TableRow;
-
-/**
- * Copied from {@link org.apache.beam.examples.JoinExamples} because the code
- * is private there.
- */
-public class JoinExamples {
-
-  // A 1000-row sample of the GDELT data here: gdelt-bq:full.events.
-  private static final String GDELT_EVENTS_TABLE =
-      "clouddataflow-readonly:samples.gdelt_sample";
-  // A table that maps country codes to country names.
-  private static final String COUNTRY_CODES =
-      "gdelt-bq:full.crosswalk_geocountrycodetohuman";
-
-  /**
-   * Join two collections, using country code as the key.
-   */
-  public static PCollection<String> joinEvents(PCollection<TableRow> eventsTable,
-                                        PCollection<TableRow> countryCodes) throws Exception {
-
-    final TupleTag<String> eventInfoTag = new TupleTag<>();
-    final TupleTag<String> countryInfoTag = new TupleTag<>();
-
-    // transform both input collections to tuple collections, where the keys are country
-    // codes in both cases.
-    PCollection<KV<String, String>> eventInfo = eventsTable.apply(
-        ParDo.of(new ExtractEventDataFn()));
-    PCollection<KV<String, String>> countryInfo = countryCodes.apply(
-        ParDo.of(new ExtractCountryInfoFn()));
-
-    // country code 'key' -> CGBKR (<event info>, <country name>)
-    PCollection<KV<String, CoGbkResult>> kvpCollection = KeyedPCollectionTuple
-        .of(eventInfoTag, eventInfo)
-        .and(countryInfoTag, countryInfo)
-        .apply(CoGroupByKey.<String>create());
-
-    // Process the CoGbkResult elements generated by the CoGroupByKey transform.
-    // country code 'key' -> string of <event info>, <country name>
-    PCollection<KV<String, String>> finalResultCollection =
-        kvpCollection.apply(ParDo.of(new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
-          @Override
-          public void processElement(ProcessContext c) {
-            KV<String, CoGbkResult> e = c.element();
-            CoGbkResult val = e.getValue();
-            String countryCode = e.getKey();
-            String countryName;
-            countryName = e.getValue().getOnly(countryInfoTag, "Kostas");
-            for (String eventInfo : c.element().getValue().getAll(eventInfoTag)) {
-              // Generate a string that combines information from both collection values
-              c.output(KV.of(countryCode, "Country name: " + countryName
-                  + ", Event info: " + eventInfo));
-            }
-          }
-        }));
-
-    // write to GCS
-    return finalResultCollection
-        .apply(ParDo.of(new DoFn<KV<String, String>, String>() {
-          @Override
-          public void processElement(ProcessContext c) {
-            String outputstring = "Country code: " + c.element().getKey()
-                + ", " + c.element().getValue();
-            c.output(outputstring);
-          }
-        }));
-  }
-
-  /**
-   * Examines each row (event) in the input table. Output a KV with the key the country
-   * code of the event, and the value a string encoding event information.
-   */
-  static class ExtractEventDataFn extends DoFn<TableRow, KV<String, String>> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = c.element();
-      String countryCode = (String) row.get("ActionGeo_CountryCode");
-      String sqlDate = (String) row.get("SQLDATE");
-      String actor1Name = (String) row.get("Actor1Name");
-      String sourceUrl = (String) row.get("SOURCEURL");
-      String eventInfo = "Date: " + sqlDate + ", Actor1: " + actor1Name + ", url: " + sourceUrl;
-      c.output(KV.of(countryCode, eventInfo));
-    }
-  }
-
-
-  /**
-   * Examines each row (country info) in the input table. Output a KV with the key the country
-   * code, and the value the country name.
-   */
-  static class ExtractCountryInfoFn extends DoFn<TableRow, KV<String, String>> {
-    @Override
-    public void processElement(ProcessContext c) {
-      TableRow row = c.element();
-      String countryCode = (String) row.get("FIPSCC");
-      String countryName = (String) row.get("HumanName");
-      c.output(KV.of(countryCode, countryName));
-    }
-  }
-
-
-  /**
-   * Options supported by {@link JoinExamples}.
-   * <p>
-   * Inherits standard configuration options.
-   */
-  private interface Options extends PipelineOptions {
-    @Description("Path of the file to write to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args) throws Exception {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    Pipeline p = Pipeline.create(options);
-    // the following two 'applys' create multiple inputs to our pipeline, one for each
-    // of our two input sources.
-    PCollection<TableRow> eventsTable = p.apply(BigQueryIO.Read.from(GDELT_EVENTS_TABLE));
-    PCollection<TableRow> countryCodes = p.apply(BigQueryIO.Read.from(COUNTRY_CODES));
-    PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes);
-    formattedResults.apply(TextIO.Write.to(options.getOutput()));
-    p.run();
-  }
-
-}
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
index 2ca7014691af..29240e7bb863 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/join/UnionCoder.java
@@ -35,7 +35,7 @@
 /**
  * A UnionCoder encodes RawUnionValues.
  */
-class UnionCoder extends StandardCoder<RawUnionValue> {
+public class UnionCoder extends StandardCoder<RawUnionValue> {
   // TODO: Think about how to integrate this with a schema object (i.e.
   // a tuple of tuple tags).
   /**