apache · pabloem · Jan 10, 2022 · Dec 7, 2021 · Dec 7, 2021 · Dec 8, 2021
diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java
@@ -17,6 +17,17 @@
  */
 package org.apache.beam.examples;
 
+// beam-playground:
+//   name: DebuggingWordCount
+//   description: An example that counts words in Shakespeare/kinglear.txt includes regex
+//     filter("Flourish|stomach").
+//   multifile: false
+//   pipeline_options: --output output.txt
+//   categories:
+//     - Debugging
+//     - Filtering
+//     - Options
+
 import java.util.Arrays;
 import java.util.List;
 import java.util.regex.Pattern;

diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java
@@ -17,6 +17,17 @@
  */
 package org.apache.beam.examples;
 
+// beam-playground:
+//   name: MinimalWordCount
+//   description: An example that counts words in Shakespeare's works.
+//   multifile: false
+//   pipeline_options:
+//   categories:
+//     - Combiners
+//     - Filtering
+//     - IO
+//     - Core Transforms
+
 import java.util.Arrays;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.io.TextIO;
@@ -87,8 +98,8 @@ public static void main(String[] args) {
     // of input text files. TextIO.Read returns a PCollection where each element is one line from
     // the input text (a set of Shakespeare's texts).
 
-    // This example reads a public data set consisting of the complete works of Shakespeare.
-    p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*"))
+    // This example reads from a public dataset containing the text of King Lear.
+    p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/kinglear.txt"))
 
         // Concept #2: Apply a FlatMapElements transform the PCollection of text lines.
         // This transform splits the lines in PCollection<String>, where each element is an

diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
@@ -17,6 +17,15 @@
  */
 package org.apache.beam.examples;
 
+// beam-playground:
+//   name: WordCount
+//   description: An example that counts words in Shakespeare's works.
+//   multifile: false
+//   pipeline_options: --output output.txt
+//   categories:
+//     - Combiners
+//     - Options
+
 import org.apache.beam.examples.common.ExampleUtils;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.io.TextIO;

diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java
@@ -17,6 +17,18 @@
  */
 package org.apache.beam.examples.cookbook;
 
+// beam-playground:
+//   name: DistinctExample
+//   description: An example uses as input text of King Lear,
+//     by William Shakespeare as plain text files and removes
+//     duplicate lines across all the files.
+//   multifile: false
+//   pipeline_options: --output output.txt
+//   categories:
+//     - Filtering
+//     - Options
+//     - Core Transforms
+
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
 import org.apache.beam.sdk.io.TextIO;
@@ -28,8 +40,8 @@
 import org.apache.beam.sdk.transforms.Distinct;
 
 /**
- * This example uses as input Shakespeare's plays as plaintext files, and will remove any duplicate
- * lines across all the files. (The output does not preserve any input order).
+ * This example uses as input text of King Lear, by William Shakespeare as plaintext files, and will
+ * remove any duplicate lines from this file. (The output does not preserve any input order).
  *
  * <p>Concepts: the Distinct transform, and how to wire transforms together. Demonstrates {@link
  * org.apache.beam.sdk.io.TextIO.Read}/ {@link Distinct}/{@link
@@ -46,7 +58,7 @@
  *
  * See examples/java/README.md for instructions about how to configure different runners.
  *
- * <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can be overridden
+ * <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/kinglear.txt} and can be overridden
  * with {@code --input}.
  */
 public class DistinctExample {
@@ -58,7 +70,7 @@ public class DistinctExample {
    */
   public interface Options extends PipelineOptions {
     @Description("Path to the directory or GCS prefix containing files to read from")
-    @Default.String("gs://apache-beam-samples/shakespeare/*")
+    @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt")
     String getInput();
 
     void setInput(String value);

diff --git a/playground/categories.yaml b/playground/categories.yaml
@@ -35,4 +35,5 @@ categories:
   - Branching
   - Flatten
   - Core Transforms
-  - Windowing
+  - Windowing
+  - Debugging
diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go
@@ -15,6 +15,16 @@
 
 package main
 
+// beam-playground:
+//   name: Contains
+//   description: An example counts received substring in Shakespeare's works.
+//   multifile: false
+//   pipeline_options: --search king
+//   categories:
+//     - Filtering
+//     - Options
+//     - Debugging
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go
@@ -41,6 +41,16 @@
 // with --input.
 package main
 
+// beam-playground:
+//   name: DebuggingWordCount
+//   description: An example that counts words in Shakespeare's works includes regex filter("Flourish|stomach").
+//   multifile: false
+//   pipeline_options: --output output.txt
+//   categories:
+//     - Options
+//     - Filtering
+//     - Debugging
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go
@@ -27,6 +27,20 @@
 // orders.
 package main
 
+// beam-playground:
+//   name: Forest
+//   description: An example that shows that pipeline construction is normal Go
+//     code -- the pipeline "forest" is created recursively and uses a global
+//     variable -- and that a pipeline may contain non-connected parts. The pipeline
+//     generated has the shape of a forest where the output of each singleton leaf
+//     is flattened together over several rounds. This is most clearly seen via a
+//     visual representation of the pipeline, such as the one produced by the 'dot' runner.
+//   multifile: false
+//   pipeline_options:
+//   categories:
+//     - Flatten
+//     - Branching
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go
@@ -15,6 +15,16 @@
 
 package main
 
+// beam-playground:
+//   name: Grades
+//   description: An example that combines grades data.
+//   multifile: false
+//   pipeline_options:
+//   categories:
+//     - Debugging
+//     - Combiners
+//     - Filtering
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go
@@ -13,7 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// minimal_wordcount is an example that counts words in Shakespeare.
+// minimal_wordcount is an example that counts words in King Lear,
+// by William Shakespeare.
 //
 // This example is the first in a series of four successively more detailed
 // 'word count' examples. Here, for simplicity, we don't show any
@@ -36,6 +37,17 @@
 // "wordcounts.txt" in your current working directory.
 package main
 
+// beam-playground:
+//   name: MinimalWordCount
+//   description: An example that counts words in King Lear,
+//     by William Shakespeare.
+//   multifile: false
+//   pipeline_options:
+//   categories:
+//     - IO
-//     - IO
+//     - IO
+//     - Combiners
+//     - Core Transforms
-//     - IO
+//     - IO
+//     - Combiners
+//     - Core Transforms
+//     - Combiners
+//     - Core Transforms
+
 import (
 	"context"
 	"fmt"
@@ -67,9 +79,9 @@ func main() {
 	// PCollection where each element is one line from the input text
 	// (one of of Shakespeare's texts).
 
-	// This example reads a public data set consisting of the complete works
-	// of Shakespeare.
-	lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/*")
+	// This example reads from a public dataset containing the text
+	// of King Lear.
+	lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/kinglear.txt")
 
 	// Concept #2: Invoke a ParDo transform on our PCollection of text lines.
 	// This ParDo invokes a DoFn (defined in-line) on each element that

diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go
@@ -17,6 +17,19 @@
 // and writes 2 output files.
 package main
 
+// beam-playground:
+//   name: MultiOut
+//   description: An example that counts words in Shakespeare's works and writes 2 output files,
+//     -- big - for small words,
+//     -- small - for big words.
+//   multifile: false
+//   pipeline_options: --small sOutput.txt --big bOutput.txt
+//   categories:
+//     - IO
+//     - Options
-//     - Options
+//     - Options
+//     - Branching
-//     - Options
+//     - Options
+//     - Branching
+//     - Branching
+//     - Multiple Outputs
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/stringsplit/stringsplit.go b/sdks/go/examples/stringsplit/stringsplit.go
@@ -36,6 +36,15 @@
 // phrase "StringSplit Output".
 package main
 
+// beam-playground:
+//   name: StringSplit
+//   description: An example of using a Splittable DoFn in the Go SDK with a portable runner.
+//   multifile: false
+//   pipeline_options:
+//   categories:
+//     - Debugging
+//     - Flatten
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go
@@ -34,6 +34,16 @@
 //  5. Accessing the window of an element
 package main
 
+// beam-playground:
+//   name: WindowedWordCount
+//   description: An example that counts words in text, and can run over either unbounded or bounded input collections.
+//   multifile: false
+//   pipeline_options: --output output.txt
+//   categories:
+//     - Windowing
+//     - Options
+//     - Combiners
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go
@@ -55,6 +55,15 @@
 // with --input.
 package main
 
+// beam-playground:
+//   name: WordCount
+//   description: An example that counts words in Shakespeare's works.
+//   multifile: false
+//   pipeline_options: --output output.txt
+//   categories:
+//     - Combiners
+//     - Options
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/go/examples/yatzy/yatzy.go b/sdks/go/examples/yatzy/yatzy.go
@@ -18,6 +18,16 @@
 // non-deterministic and produce different pipelines on each invocation.
 package main
 
+// beam-playground:
+//   name: Yatzy
+//   description: An examples shows that pipeline construction is normal Go code.
+//     It can even be non-deterministic and produce different pipelines on each invocation.
+//   multifile: false
+//   pipeline_options:
+//   categories:
+//     - IO
+//     - Side Input
+
 import (
 	"context"
 	"flag"

diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
@@ -51,6 +51,19 @@
 
 # pytype: skip-file
 
+# beam-playground:
+#   name: MultipleOutputPardo
+#   description: This is a slightly modified version
+#     of the basic wordcount example. In this example words
+#     are divided into 2 buckets as shorts
+#     words (3 characters in length or less) and words (other).
+#   multifile: false
+#   pipeline_options: --output output.txt
+#   categories:
+#     - IO
+#     - Options
+#     - Multiple Outputs
+
 import argparse
 import logging
 import re

diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py
@@ -19,6 +19,15 @@
 
 # pytype: skip-file
 
+# beam-playground:
+#   name: WordCount
+#   description: An example that counts words in Shakespeare's works.
+#   multifile: false
+#   pipeline_options: --output output.txt
+#   categories:
+#     - Combiners
+#     - Options
+
 import argparse
 import logging
 import re

diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py
@@ -42,6 +42,19 @@
 
 # pytype: skip-file
 
+# beam-playground:
+#   name: WordCountDebugging
+#   description: An example that counts words in Shakespeare's works.
+#     includes regex filter("Flourish|stomach").
+#   multifile: false
+#   pipeline_options: --output output.txt
+#   categories:
+#     - Flatten
+#     - Debugging
+#     - Options
-#     - Options
+#     - Options
+#     - Combiners
+#     - Filtering
-#     - Options
+#     - Options
+#     - Combiners
+#     - Filtering
+#     - Combiners
+#     - Filtering
+
 import argparse
 import logging
 import re

diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py
@@ -46,6 +46,18 @@
 
 # pytype: skip-file
 
+# beam-playground:
+#   name: WordCountMinimal
+#   description: An example that counts words in Shakespeare's works.
+#   multifile: false
+#   pipeline_options: --output output.txt
+#   categories:
+#     - IO
-#     - IO
+#     - IO
+#     - Core Transforms
+#     - Flatten
-#     - IO
+#     - IO
+#     - Core Transforms
+#     - Flatten
+#     - Core Transforms
+#     - Flatten
+#     - Options
+#     - Combiners
+
 import argparse
 import logging
 import re