diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index 47725e0a9435..1d909fc86810 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -17,6 +17,17 @@ */ package org.apache.beam.examples; +// beam-playground: +// name: DebuggingWordCount +// description: An example that counts words in Shakespeare/kinglear.txt includes regex +// filter("Flourish|stomach"). +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - Debugging +// - Filtering +// - Options + import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index b807d678872f..d4a5a5655db8 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -17,6 +17,17 @@ */ package org.apache.beam.examples; +// beam-playground: +// name: MinimalWordCount +// description: An example that counts words in Shakespeare's works. +// multifile: false +// pipeline_options: +// categories: +// - Combiners +// - Filtering +// - IO +// - Core Transforms + import java.util.Arrays; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; @@ -87,8 +98,8 @@ public static void main(String[] args) { // of input text files. TextIO.Read returns a PCollection where each element is one line from // the input text (a set of Shakespeare's texts). - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) + // This example reads from a public dataset containing the text of King Lear. + p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/kinglear.txt")) // Concept #2: Apply a FlatMapElements transform the PCollection of text lines. // This transform splits the lines in PCollection, where each element is an diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index 90b8dc3f7761..d02cb1225851 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -17,6 +17,15 @@ */ package org.apache.beam.examples; +// beam-playground: +// name: WordCount +// description: An example that counts words in Shakespeare's works. +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - Combiners +// - Options + import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index c6f0a2344a7e..5aa349e589ec 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -17,6 +17,18 @@ */ package org.apache.beam.examples.cookbook; +// beam-playground: +// name: DistinctExample +// description: An example uses as input text of King Lear, +// by William Shakespeare as plain text files and removes +// duplicate lines across all the files. +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - Filtering +// - Options +// - Core Transforms + import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; import org.apache.beam.sdk.io.TextIO; @@ -28,8 +40,8 @@ import org.apache.beam.sdk.transforms.Distinct; /** - * This example uses as input Shakespeare's plays as plaintext files, and will remove any duplicate - * lines across all the files. (The output does not preserve any input order). + * This example uses as input text of King Lear, by William Shakespeare as plaintext files, and will + * remove any duplicate lines from this file. (The output does not preserve any input order). * *

Concepts: the Distinct transform, and how to wire transforms together. Demonstrates {@link * org.apache.beam.sdk.io.TextIO.Read}/ {@link Distinct}/{@link @@ -46,7 +58,7 @@ * * See examples/java/README.md for instructions about how to configure different runners. * - *

The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can be overridden + *

The input defaults to {@code gs://apache-beam-samples/shakespeare/kinglear.txt} and can be overridden * with {@code --input}. */ public class DistinctExample { @@ -58,7 +70,7 @@ public class DistinctExample { */ public interface Options extends PipelineOptions { @Description("Path to the directory or GCS prefix containing files to read from") - @Default.String("gs://apache-beam-samples/shakespeare/*") + @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") String getInput(); void setInput(String value); diff --git a/playground/categories.yaml b/playground/categories.yaml index 91ef3345fdea..5d9e8f98e6a7 100644 --- a/playground/categories.yaml +++ b/playground/categories.yaml @@ -35,4 +35,5 @@ categories: - Branching - Flatten - Core Transforms - - Windowing \ No newline at end of file + - Windowing + - Debugging \ No newline at end of file diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go index aaa554322294..1a352ceddf9c 100644 --- a/sdks/go/examples/contains/contains.go +++ b/sdks/go/examples/contains/contains.go @@ -15,6 +15,16 @@ package main +// beam-playground: +// name: Contains +// description: An example counts received substring in Shakespeare's works. +// multifile: false +// pipeline_options: --search king +// categories: +// - Filtering +// - Options +// - Debugging + import ( "context" "flag" diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go index 399a991321f4..4c69f6b99acb 100644 --- a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go +++ b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go @@ -41,6 +41,16 @@ // with --input. package main +// beam-playground: +// name: DebuggingWordCount +// description: An example that counts words in Shakespeare's works includes regex filter("Flourish|stomach"). +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - Options +// - Filtering +// - Debugging + import ( "context" "flag" diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index 5f6c55cd2d6e..9e0fe9de0fcb 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -27,6 +27,20 @@ // orders. package main +// beam-playground: +// name: Forest +// description: An example that shows that pipeline construction is normal Go +// code -- the pipeline "forest" is created recursively and uses a global +// variable -- and that a pipeline may contain non-connected parts. The pipeline +// generated has the shape of a forest where the output of each singleton leaf +// is flattened together over several rounds. This is most clearly seen via a +// visual representation of the pipeline, such as the one produced by the 'dot' runner. +// multifile: false +// pipeline_options: +// categories: +// - Flatten +// - Branching + import ( "context" "flag" diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go index 38da4771c30f..f0fa3f1d7e4a 100644 --- a/sdks/go/examples/grades/grades.go +++ b/sdks/go/examples/grades/grades.go @@ -15,6 +15,16 @@ package main +// beam-playground: +// name: Grades +// description: An example that combines grades data. +// multifile: false +// pipeline_options: +// categories: +// - Debugging +// - Combiners +// - Filtering + import ( "context" "flag" diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index b35aab2beacb..1607a7badd76 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -13,7 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// minimal_wordcount is an example that counts words in Shakespeare. +// minimal_wordcount is an example that counts words in King Lear, +// by William Shakespeare. // // This example is the first in a series of four successively more detailed // 'word count' examples. Here, for simplicity, we don't show any @@ -36,6 +37,17 @@ // "wordcounts.txt" in your current working directory. package main +// beam-playground: +// name: MinimalWordCount +// description: An example that counts words in King Lear, +// by William Shakespeare. +// multifile: false +// pipeline_options: +// categories: +// - IO +// - Combiners +// - Core Transforms + import ( "context" "fmt" @@ -67,9 +79,9 @@ func main() { // PCollection where each element is one line from the input text // (one of of Shakespeare's texts). - // This example reads a public data set consisting of the complete works - // of Shakespeare. - lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/*") + // This example reads from a public dataset containing the text + // of King Lear. + lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/kinglear.txt") // Concept #2: Invoke a ParDo transform on our PCollection of text lines. // This ParDo invokes a DoFn (defined in-line) on each element that diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index 141118fd6a1c..af26390db314 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -17,6 +17,19 @@ // and writes 2 output files. package main +// beam-playground: +// name: MultiOut +// description: An example that counts words in Shakespeare's works and writes 2 output files, +// -- big - for small words, +// -- small - for big words. +// multifile: false +// pipeline_options: --small sOutput.txt --big bOutput.txt +// categories: +// - IO +// - Options +// - Branching +// - Multiple Outputs + import ( "context" "flag" diff --git a/sdks/go/examples/stringsplit/stringsplit.go b/sdks/go/examples/stringsplit/stringsplit.go index 20450dccd3fc..4b51e2698466 100644 --- a/sdks/go/examples/stringsplit/stringsplit.go +++ b/sdks/go/examples/stringsplit/stringsplit.go @@ -36,6 +36,15 @@ // phrase "StringSplit Output". package main +// beam-playground: +// name: StringSplit +// description: An example of using a Splittable DoFn in the Go SDK with a portable runner. +// multifile: false +// pipeline_options: +// categories: +// - Debugging +// - Flatten + import ( "context" "flag" diff --git a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go index 040c659b85aa..2e04ec48414d 100644 --- a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go +++ b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go @@ -34,6 +34,16 @@ // 5. Accessing the window of an element package main +// beam-playground: +// name: WindowedWordCount +// description: An example that counts words in text, and can run over either unbounded or bounded input collections. +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - Windowing +// - Options +// - Combiners + import ( "context" "flag" diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go index 55855d7712c3..d34bd8c8d59c 100644 --- a/sdks/go/examples/wordcount/wordcount.go +++ b/sdks/go/examples/wordcount/wordcount.go @@ -55,6 +55,15 @@ // with --input. package main +// beam-playground: +// name: WordCount +// description: An example that counts words in Shakespeare's works. +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - Combiners +// - Options + import ( "context" "flag" diff --git a/sdks/go/examples/yatzy/yatzy.go b/sdks/go/examples/yatzy/yatzy.go index e75dfe1abbb3..86d9e52fd10a 100644 --- a/sdks/go/examples/yatzy/yatzy.go +++ b/sdks/go/examples/yatzy/yatzy.go @@ -18,6 +18,16 @@ // non-deterministic and produce different pipelines on each invocation. package main +// beam-playground: +// name: Yatzy +// description: An examples shows that pipeline construction is normal Go code. +// It can even be non-deterministic and produce different pipelines on each invocation. +// multifile: false +// pipeline_options: +// categories: +// - IO +// - Side Input + import ( "context" "flag" diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py index 1dde20a49d09..5e373ca6328c 100644 --- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py +++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py @@ -51,6 +51,19 @@ # pytype: skip-file +# beam-playground: +# name: MultipleOutputPardo +# description: This is a slightly modified version +# of the basic wordcount example. In this example words +# are divided into 2 buckets as shorts +# words (3 characters in length or less) and words (other). +# multifile: false +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options +# - Multiple Outputs + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index b59baa61a469..b32e41bc377a 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -19,6 +19,15 @@ # pytype: skip-file +# beam-playground: +# name: WordCount +# description: An example that counts words in Shakespeare's works. +# multifile: false +# pipeline_options: --output output.txt +# categories: +# - Combiners +# - Options + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index 404c123161ea..859124041586 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -42,6 +42,19 @@ # pytype: skip-file +# beam-playground: +# name: WordCountDebugging +# description: An example that counts words in Shakespeare's works. +# includes regex filter("Flourish|stomach"). +# multifile: false +# pipeline_options: --output output.txt +# categories: +# - Flatten +# - Debugging +# - Options +# - Combiners +# - Filtering + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py index f259bb0236bc..4f1f40169102 100644 --- a/sdks/python/apache_beam/examples/wordcount_minimal.py +++ b/sdks/python/apache_beam/examples/wordcount_minimal.py @@ -46,6 +46,18 @@ # pytype: skip-file +# beam-playground: +# name: WordCountMinimal +# description: An example that counts words in Shakespeare's works. +# multifile: false +# pipeline_options: --output output.txt +# categories: +# - IO +# - Core Transforms +# - Flatten +# - Options +# - Combiners + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_with_metrics.py b/sdks/python/apache_beam/examples/wordcount_with_metrics.py index 8e1dd057fc41..65f40d8cbade 100644 --- a/sdks/python/apache_beam/examples/wordcount_with_metrics.py +++ b/sdks/python/apache_beam/examples/wordcount_with_metrics.py @@ -19,6 +19,16 @@ # pytype: skip-file +# beam-playground: +# name: WordCountWithMetrics +# description: A word-counting workflow with metrics. +# multifile: false +# pipeline_options: --output output.txt +# categories: +# - Combiners +# - Options +# - Metrics + import argparse import logging import re