From d1fc4c68cc00d6e8bf427fc4cc9bdccc9deda48a Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Tue, 7 Dec 2021 13:14:55 +0300 Subject: [PATCH 01/13] Tag examples --- .../org/apache/beam/examples/DebuggingWordCount.java | 9 +++++++++ .../org/apache/beam/examples/MinimalWordCount.java | 9 +++++++++ .../main/java/org/apache/beam/examples/WordCount.java | 9 +++++++++ .../apache/beam/examples/cookbook/DistinctExample.java | 9 +++++++++ sdks/go/examples/contains/contains.go | 8 ++++++++ .../debugging_wordcount/debugging_wordcount.go | 8 ++++++++ sdks/go/examples/forest/forest.go | 8 ++++++++ sdks/go/examples/grades/grades.go | 6 ++++++ .../go/examples/minimal_wordcount/minimal_wordcount.go | 7 +++++++ sdks/go/examples/multiout/multiout.go | 10 ++++++++++ sdks/go/examples/stringsplit/stringsplit.go | 6 ++++++ .../examples/windowed_wordcount/windowed_wordcount.go | 8 ++++++++ sdks/go/examples/wordcount/wordcount.go | 8 ++++++++ sdks/go/examples/yatzy/yatzy.go | 8 ++++++++ sdks/python/apache_beam/examples/avro_bitcoin.py | 9 +++++++++ .../examples/cookbook/multiple_output_pardo.py | 10 ++++++++++ sdks/python/apache_beam/examples/wordcount.py | 9 +++++++++ .../python/apache_beam/examples/wordcount_debugging.py | 9 +++++++++ sdks/python/apache_beam/examples/wordcount_minimal.py | 8 ++++++++ .../apache_beam/examples/wordcount_with_metrics.py | 8 ++++++++ .../python/apache_beam/examples/wordcount_xlang_sql.py | 8 ++++++++ 21 files changed, 174 insertions(+) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index 47725e0a9435..81d1ea711a4b 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -17,6 +17,15 @@ */ package org.apache.beam.examples; +/* beam-playground: + * name: DebuggingWordCount + * description: An example that counts words in Shakespeare/kinglear.txt includes regax filter("Flourish|stomach"). + * pipeline_options: --output output.txt + * categories: + * - IO + * - Options + */ + import java.util.Arrays; import java.util.List; import java.util.regex.Pattern; diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index b807d678872f..054fbc62004f 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -17,6 +17,15 @@ */ package org.apache.beam.examples; +/* beam-playground: + * name: MinimalWordCount + * description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. + * pipeline_options: + * categories: + * - IO + * - Options + */ + import java.util.Arrays; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index 90b8dc3f7761..302a6e4fed5b 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -17,6 +17,15 @@ */ package org.apache.beam.examples; +/* beam-playground: + * name: WordCount + * description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. + * pipeline_options: --output output.txt + * categories: + * - IO + * - Options + */ + import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.io.TextIO; diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index c6f0a2344a7e..1740ed1f6946 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -17,6 +17,15 @@ */ package org.apache.beam.examples.cookbook; +/* beam-playground: + * name: DistinctExample + * description: An example that uses Shakespeare's plays as plain text files, and removes duplicate lines across all the files. + * pipeline_options: --output output.txt + * categories: + * - IO + * - Options + */ + import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; import org.apache.beam.sdk.io.TextIO; diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go index aaa554322294..625ce1365623 100644 --- a/sdks/go/examples/contains/contains.go +++ b/sdks/go/examples/contains/contains.go @@ -15,6 +15,14 @@ package main +// beam-playground: +// name: Contains +// description: An example counts received substring in Shakespeare/kinglear.txt +// pipeline_options: --search king +// categories: +// - IO +// - Options + import ( "context" "flag" diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go index 399a991321f4..f900ae0cc123 100644 --- a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go +++ b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go @@ -41,6 +41,14 @@ // with --input. package main +// beam-playground: +// name: DebuggingWordCount +// description: An example that counts words in Shakespeare/kinglear.txt includes regax filter("Flourish|stomach"). +// pipeline_options: --output output.txt +// categories: +// - IO +// - Options + import ( "context" "flag" diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index 5f6c55cd2d6e..8be2d21c7843 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -27,6 +27,14 @@ // orders. package main +// beam-playground: +// name: Forest +// description: An example that shows that pipeline construction is normal Go code +// -- the pipeline "forest" is created recursively and uses a global variable +// -- and that a pipeline may contain non-connected parts. +// pipeline_options: +// categories: + import ( "context" "flag" diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go index 38da4771c30f..fb8265cdf05c 100644 --- a/sdks/go/examples/grades/grades.go +++ b/sdks/go/examples/grades/grades.go @@ -15,6 +15,12 @@ package main +// beam-playground: +// name: Grades +// description: +// pipeline_options: +// categories: + import ( "context" "flag" diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index b35aab2beacb..18f89dbd557c 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -36,6 +36,13 @@ // "wordcounts.txt" in your current working directory. package main +// beam-playground: +// name: MinimalWordCount +// description: An example that counts words in Shakespeare and includes Beam best practices. +// pipeline_options: +// categories: +// - IO + import ( "context" "fmt" diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index 141118fd6a1c..3b7e1f03afe4 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -17,6 +17,16 @@ // and writes 2 output files. package main +// beam-playground: +// name: MultiOut +// description: An example that counts words in Shakespeare/kinglear.txt and writes 2 output files: +// -- small: file for small words +// -- big: file for big words +// pipeline_options: --small sOutput.txt --big bOutput.txt +// categories: +// - IO +// - Options + import ( "context" "flag" diff --git a/sdks/go/examples/stringsplit/stringsplit.go b/sdks/go/examples/stringsplit/stringsplit.go index 20450dccd3fc..bd12ddaa5fa6 100644 --- a/sdks/go/examples/stringsplit/stringsplit.go +++ b/sdks/go/examples/stringsplit/stringsplit.go @@ -36,6 +36,12 @@ // phrase "StringSplit Output". package main +// beam-playground: +// name: StringsSplit +// description: An example of using a Splittable DoFn in the Go SDK with a portable runner. +// pipeline_options: +// categories: + import ( "context" "flag" diff --git a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go index 040c659b85aa..357ce81e4bae 100644 --- a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go +++ b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go @@ -34,6 +34,14 @@ // 5. Accessing the window of an element package main +// beam-playground: +// name: WindowedWordCount +// description: An example that counts words in text, and can run over either unbounded or bounded input collections. +// pipeline_options: --output output.txt +// categories: +// - IO +// - Options + import ( "context" "flag" diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go index 55855d7712c3..09360d3ed8f1 100644 --- a/sdks/go/examples/wordcount/wordcount.go +++ b/sdks/go/examples/wordcount/wordcount.go @@ -55,6 +55,14 @@ // with --input. package main +// beam-playground: +// name: WordCount +// description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// pipeline_options: --output output.txt +// categories: +// - IO +// - Options + import ( "context" "flag" diff --git a/sdks/go/examples/yatzy/yatzy.go b/sdks/go/examples/yatzy/yatzy.go index e75dfe1abbb3..21f4df02a4c7 100644 --- a/sdks/go/examples/yatzy/yatzy.go +++ b/sdks/go/examples/yatzy/yatzy.go @@ -18,6 +18,14 @@ // non-deterministic and produce different pipelines on each invocation. package main +// beam-playground: +// name: Yatzy +// description: An examples shows that pipeline construction is normal Go code. +// It can even be non-deterministic and produce different pipelines on each invocation. +// pipeline_options: +// categories: +// - IO + import ( "context" "flag" diff --git a/sdks/python/apache_beam/examples/avro_bitcoin.py b/sdks/python/apache_beam/examples/avro_bitcoin.py index 0b4c555548eb..be1879f722bf 100644 --- a/sdks/python/apache_beam/examples/avro_bitcoin.py +++ b/sdks/python/apache_beam/examples/avro_bitcoin.py @@ -26,6 +26,15 @@ # pytype: skip-file +# beam-playground: +# name: AvroBitcoin +# description: An example that collect statistics on transactions +# in a public bitcoin dataset that was exported to avro +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py index 1dde20a49d09..b2367ce4ff4a 100644 --- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py +++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py @@ -51,6 +51,16 @@ # pytype: skip-file +# beam-playground: +# name: MultipleOutputPardo +# description: This is a slightly modified version of the +# basic wordcount example. In this example words are divided into 2 buckets +# as shorts words (3 characters in length or less) and words (other). +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index b59baa61a469..a11326f13541 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -19,6 +19,15 @@ # pytype: skip-file +# beam-playground: +# name: WordCount +# description: An example that counts words in Shakespeare/kinglear.txt +# and includes Beam best practices. +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index 404c123161ea..939671696a4a 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -42,6 +42,15 @@ # pytype: skip-file +# beam-playground: +# name: WordCountDebugging +# description: An example that counts words in Shakespeare/kinglear.txt +# includes regax filter("Flourish|stomach"). +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py index f259bb0236bc..833a635aa69f 100644 --- a/sdks/python/apache_beam/examples/wordcount_minimal.py +++ b/sdks/python/apache_beam/examples/wordcount_minimal.py @@ -46,6 +46,14 @@ # pytype: skip-file +# beam-playground: +# name: WordCountMinimal +# description: An example that counts words in Shakespeare/kinglear.txt. +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_with_metrics.py b/sdks/python/apache_beam/examples/wordcount_with_metrics.py index 8e1dd057fc41..7c19473cf1bf 100644 --- a/sdks/python/apache_beam/examples/wordcount_with_metrics.py +++ b/sdks/python/apache_beam/examples/wordcount_with_metrics.py @@ -19,6 +19,14 @@ # pytype: skip-file +# beam-playground: +# name: WordCountWithMetrics +# description: A word-counting workflow with metrics. +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging import re diff --git a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py index 97a43d386c75..71247148b587 100644 --- a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py +++ b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py @@ -22,6 +22,14 @@ Docker must also be available to run this pipeline locally. """ +# beam-playground: +# name: WordCountXLangSQL +# description: A word-counting workflow that uses the SQL transform. +# pipeline_options: --output output.txt +# categories: +# - IO +# - Options + import argparse import logging import re From 5857448a7e4570f1420181e95f3845b4738a872a Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Tue, 7 Dec 2021 14:39:11 +0300 Subject: [PATCH 02/13] Refactoring tags --- .../apache_beam/examples/cookbook/multiple_output_pardo.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py index b2367ce4ff4a..0d6b38622b93 100644 --- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py +++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py @@ -53,9 +53,10 @@ # beam-playground: # name: MultipleOutputPardo -# description: This is a slightly modified version of the -# basic wordcount example. In this example words are divided into 2 buckets -# as shorts words (3 characters in length or less) and words (other). +# description: This is a slightly modified version +# of the basic wordcount example. In this example words +# are divided into 2 buckets as shorts +# words (3 characters in length or less) and words (other). # pipeline_options: --output output.txt # categories: # - IO From 30976c3d4e6417b986a1b6a410aefc5d5f075ae6 Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Wed, 8 Dec 2021 14:59:40 +0300 Subject: [PATCH 03/13] Add multifile to tag --- .../main/java/org/apache/beam/examples/DebuggingWordCount.java | 1 + .../main/java/org/apache/beam/examples/MinimalWordCount.java | 1 + .../java/src/main/java/org/apache/beam/examples/WordCount.java | 1 + .../org/apache/beam/examples/cookbook/DistinctExample.java | 1 + sdks/go/examples/contains/contains.go | 1 + sdks/go/examples/debugging_wordcount/debugging_wordcount.go | 1 + sdks/go/examples/forest/forest.go | 1 + sdks/go/examples/grades/grades.go | 1 + sdks/go/examples/minimal_wordcount/minimal_wordcount.go | 1 + sdks/go/examples/multiout/multiout.go | 1 + sdks/go/examples/stringsplit/stringsplit.go | 1 + sdks/go/examples/windowed_wordcount/windowed_wordcount.go | 1 + sdks/go/examples/wordcount/wordcount.go | 1 + sdks/go/examples/yatzy/yatzy.go | 1 + sdks/python/apache_beam/examples/avro_bitcoin.py | 3 ++- .../apache_beam/examples/cookbook/multiple_output_pardo.py | 1 + sdks/python/apache_beam/examples/wordcount.py | 1 + sdks/python/apache_beam/examples/wordcount_debugging.py | 1 + sdks/python/apache_beam/examples/wordcount_minimal.py | 1 + sdks/python/apache_beam/examples/wordcount_with_metrics.py | 2 ++ sdks/python/apache_beam/examples/wordcount_xlang_sql.py | 1 + 21 files changed, 23 insertions(+), 1 deletion(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index 81d1ea711a4b..a774a4bdb5dd 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -20,6 +20,7 @@ /* beam-playground: * name: DebuggingWordCount * description: An example that counts words in Shakespeare/kinglear.txt includes regax filter("Flourish|stomach"). + * multifile: false * pipeline_options: --output output.txt * categories: * - IO diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index 054fbc62004f..ef1fbb1aecb7 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -20,6 +20,7 @@ /* beam-playground: * name: MinimalWordCount * description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. + * multifile: false * pipeline_options: * categories: * - IO diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index 302a6e4fed5b..72629424aa27 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -20,6 +20,7 @@ /* beam-playground: * name: WordCount * description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. + * multifile: false * pipeline_options: --output output.txt * categories: * - IO diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 1740ed1f6946..1e9873674a21 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -20,6 +20,7 @@ /* beam-playground: * name: DistinctExample * description: An example that uses Shakespeare's plays as plain text files, and removes duplicate lines across all the files. + * multifile: false * pipeline_options: --output output.txt * categories: * - IO diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go index 625ce1365623..e871dabad2dd 100644 --- a/sdks/go/examples/contains/contains.go +++ b/sdks/go/examples/contains/contains.go @@ -18,6 +18,7 @@ package main // beam-playground: // name: Contains // description: An example counts received substring in Shakespeare/kinglear.txt +// multifile: false // pipeline_options: --search king // categories: // - IO diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go index f900ae0cc123..73430bd513fb 100644 --- a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go +++ b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go @@ -44,6 +44,7 @@ package main // beam-playground: // name: DebuggingWordCount // description: An example that counts words in Shakespeare/kinglear.txt includes regax filter("Flourish|stomach"). +// multifile: false // pipeline_options: --output output.txt // categories: // - IO diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index 8be2d21c7843..f47ffce5b9eb 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -32,6 +32,7 @@ package main // description: An example that shows that pipeline construction is normal Go code // -- the pipeline "forest" is created recursively and uses a global variable // -- and that a pipeline may contain non-connected parts. +// multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go index fb8265cdf05c..6a22020c20c8 100644 --- a/sdks/go/examples/grades/grades.go +++ b/sdks/go/examples/grades/grades.go @@ -18,6 +18,7 @@ package main // beam-playground: // name: Grades // description: +// multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index 18f89dbd557c..48c310275756 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -39,6 +39,7 @@ package main // beam-playground: // name: MinimalWordCount // description: An example that counts words in Shakespeare and includes Beam best practices. +// multifile: false // pipeline_options: // categories: // - IO diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index 3b7e1f03afe4..4ccee5e7516c 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -22,6 +22,7 @@ package main // description: An example that counts words in Shakespeare/kinglear.txt and writes 2 output files: // -- small: file for small words // -- big: file for big words +// multifile: false // pipeline_options: --small sOutput.txt --big bOutput.txt // categories: // - IO diff --git a/sdks/go/examples/stringsplit/stringsplit.go b/sdks/go/examples/stringsplit/stringsplit.go index bd12ddaa5fa6..7d53c18770b6 100644 --- a/sdks/go/examples/stringsplit/stringsplit.go +++ b/sdks/go/examples/stringsplit/stringsplit.go @@ -39,6 +39,7 @@ package main // beam-playground: // name: StringsSplit // description: An example of using a Splittable DoFn in the Go SDK with a portable runner. +// multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go index 357ce81e4bae..87f840948433 100644 --- a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go +++ b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go @@ -37,6 +37,7 @@ package main // beam-playground: // name: WindowedWordCount // description: An example that counts words in text, and can run over either unbounded or bounded input collections. +// multifile: false // pipeline_options: --output output.txt // categories: // - IO diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go index 09360d3ed8f1..aae99a967f50 100644 --- a/sdks/go/examples/wordcount/wordcount.go +++ b/sdks/go/examples/wordcount/wordcount.go @@ -58,6 +58,7 @@ package main // beam-playground: // name: WordCount // description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// multifile: false // pipeline_options: --output output.txt // categories: // - IO diff --git a/sdks/go/examples/yatzy/yatzy.go b/sdks/go/examples/yatzy/yatzy.go index 21f4df02a4c7..cf44b20983ce 100644 --- a/sdks/go/examples/yatzy/yatzy.go +++ b/sdks/go/examples/yatzy/yatzy.go @@ -22,6 +22,7 @@ package main // name: Yatzy // description: An examples shows that pipeline construction is normal Go code. // It can even be non-deterministic and produce different pipelines on each invocation. +// multifile: false // pipeline_options: // categories: // - IO diff --git a/sdks/python/apache_beam/examples/avro_bitcoin.py b/sdks/python/apache_beam/examples/avro_bitcoin.py index be1879f722bf..baa4ba3861fd 100644 --- a/sdks/python/apache_beam/examples/avro_bitcoin.py +++ b/sdks/python/apache_beam/examples/avro_bitcoin.py @@ -29,7 +29,8 @@ # beam-playground: # name: AvroBitcoin # description: An example that collect statistics on transactions -# in a public bitcoin dataset that was exported to avro +# in a public bitcoin dataset that was exported to avro. +# multifile: false # pipeline_options: --output output.txt # categories: # - IO diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py index 0d6b38622b93..2bdb16cf1bbc 100644 --- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py +++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py @@ -57,6 +57,7 @@ # of the basic wordcount example. In this example words # are divided into 2 buckets as shorts # words (3 characters in length or less) and words (other). +# multifile: false # pipeline_options: --output output.txt # categories: # - IO diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index a11326f13541..8bb5ae2a7bd4 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -23,6 +23,7 @@ # name: WordCount # description: An example that counts words in Shakespeare/kinglear.txt # and includes Beam best practices. +# multifile: false # pipeline_options: --output output.txt # categories: # - IO diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index 939671696a4a..d979cf0e9f0c 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -46,6 +46,7 @@ # name: WordCountDebugging # description: An example that counts words in Shakespeare/kinglear.txt # includes regax filter("Flourish|stomach"). +# multifile: false # pipeline_options: --output output.txt # categories: # - IO diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py index 833a635aa69f..9b277d02d613 100644 --- a/sdks/python/apache_beam/examples/wordcount_minimal.py +++ b/sdks/python/apache_beam/examples/wordcount_minimal.py @@ -49,6 +49,7 @@ # beam-playground: # name: WordCountMinimal # description: An example that counts words in Shakespeare/kinglear.txt. +# multifile: false # pipeline_options: --output output.txt # categories: # - IO diff --git a/sdks/python/apache_beam/examples/wordcount_with_metrics.py b/sdks/python/apache_beam/examples/wordcount_with_metrics.py index 7c19473cf1bf..693b6bdf1be4 100644 --- a/sdks/python/apache_beam/examples/wordcount_with_metrics.py +++ b/sdks/python/apache_beam/examples/wordcount_with_metrics.py @@ -22,10 +22,12 @@ # beam-playground: # name: WordCountWithMetrics # description: A word-counting workflow with metrics. +# multifile: false # pipeline_options: --output output.txt # categories: # - IO # - Options +# - Metrics import argparse import logging diff --git a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py index 71247148b587..b50872ec1170 100644 --- a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py +++ b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py @@ -25,6 +25,7 @@ # beam-playground: # name: WordCountXLangSQL # description: A word-counting workflow that uses the SQL transform. +# multifile: false # pipeline_options: --output output.txt # categories: # - IO From 09e0678fce9eb18b5636e767e3f6c63d241ca230 Mon Sep 17 00:00:00 2001 From: Artur Khanin Date: Wed, 8 Dec 2021 17:03:48 +0300 Subject: [PATCH 04/13] Updated categories of some examples --- .../java/org/apache/beam/examples/DebuggingWordCount.java | 2 +- playground/categories.yaml | 1 + sdks/go/examples/contains/contains.go | 2 +- sdks/go/examples/debugging_wordcount/debugging_wordcount.go | 2 +- sdks/go/examples/forest/forest.go | 1 + sdks/go/examples/grades/grades.go | 4 +++- sdks/go/examples/multiout/multiout.go | 1 + sdks/go/examples/stringsplit/stringsplit.go | 3 ++- sdks/go/examples/windowed_wordcount/windowed_wordcount.go | 3 ++- sdks/go/examples/yatzy/yatzy.go | 3 ++- sdks/python/apache_beam/examples/avro_bitcoin.py | 1 + .../apache_beam/examples/cookbook/multiple_output_pardo.py | 1 + sdks/python/apache_beam/examples/wordcount_debugging.py | 2 +- sdks/python/apache_beam/examples/wordcount_xlang_sql.py | 1 + 14 files changed, 19 insertions(+), 8 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index a774a4bdb5dd..814a3a146a34 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -19,7 +19,7 @@ /* beam-playground: * name: DebuggingWordCount - * description: An example that counts words in Shakespeare/kinglear.txt includes regax filter("Flourish|stomach"). + * description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). * multifile: false * pipeline_options: --output output.txt * categories: diff --git a/playground/categories.yaml b/playground/categories.yaml index b5cce06a0094..fbc7cb5050d1 100644 --- a/playground/categories.yaml +++ b/playground/categories.yaml @@ -30,3 +30,4 @@ categories: - Options - Coders - Stateful Processing + - Beam SQL diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go index e871dabad2dd..ea15df9fc229 100644 --- a/sdks/go/examples/contains/contains.go +++ b/sdks/go/examples/contains/contains.go @@ -22,7 +22,7 @@ package main // pipeline_options: --search king // categories: // - IO -// - Options +// - Options import ( "context" diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go index 73430bd513fb..edc9e2ede534 100644 --- a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go +++ b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go @@ -43,7 +43,7 @@ package main // beam-playground: // name: DebuggingWordCount -// description: An example that counts words in Shakespeare/kinglear.txt includes regax filter("Flourish|stomach"). +// description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index f47ffce5b9eb..f2dd61ca8063 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -35,6 +35,7 @@ package main // multifile: false // pipeline_options: // categories: +// - Testing import ( "context" diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go index 6a22020c20c8..158f37946804 100644 --- a/sdks/go/examples/grades/grades.go +++ b/sdks/go/examples/grades/grades.go @@ -17,10 +17,12 @@ package main // beam-playground: // name: Grades -// description: +// description: An example that combines grades data // multifile: false // pipeline_options: // categories: +// - Testing +// - Combiners import ( "context" diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index 4ccee5e7516c..1b9bacfaf87d 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -27,6 +27,7 @@ package main // categories: // - IO // - Options +// - Multiple Outputs import ( "context" diff --git a/sdks/go/examples/stringsplit/stringsplit.go b/sdks/go/examples/stringsplit/stringsplit.go index 7d53c18770b6..a1335efb02d5 100644 --- a/sdks/go/examples/stringsplit/stringsplit.go +++ b/sdks/go/examples/stringsplit/stringsplit.go @@ -37,11 +37,12 @@ package main // beam-playground: -// name: StringsSplit +// name: StringSplit // description: An example of using a Splittable DoFn in the Go SDK with a portable runner. // multifile: false // pipeline_options: // categories: +// - Testing import ( "context" diff --git a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go index 87f840948433..98ebfe96548d 100644 --- a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go +++ b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go @@ -41,7 +41,8 @@ package main // pipeline_options: --output output.txt // categories: // - IO -// - Options +// - Options +// - Combiners import ( "context" diff --git a/sdks/go/examples/yatzy/yatzy.go b/sdks/go/examples/yatzy/yatzy.go index cf44b20983ce..fb0973d87181 100644 --- a/sdks/go/examples/yatzy/yatzy.go +++ b/sdks/go/examples/yatzy/yatzy.go @@ -20,12 +20,13 @@ package main // beam-playground: // name: Yatzy -// description: An examples shows that pipeline construction is normal Go code. +// description: An example shows that pipeline construction is a normal Go code. // It can even be non-deterministic and produce different pipelines on each invocation. // multifile: false // pipeline_options: // categories: // - IO +// - Side Input import ( "context" diff --git a/sdks/python/apache_beam/examples/avro_bitcoin.py b/sdks/python/apache_beam/examples/avro_bitcoin.py index baa4ba3861fd..1b4a1d5728c3 100644 --- a/sdks/python/apache_beam/examples/avro_bitcoin.py +++ b/sdks/python/apache_beam/examples/avro_bitcoin.py @@ -35,6 +35,7 @@ # categories: # - IO # - Options +# - Schemas import argparse import logging diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py index 2bdb16cf1bbc..9d74a7cec258 100644 --- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py +++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py @@ -62,6 +62,7 @@ # categories: # - IO # - Options +# - Multiple Outputs import argparse import logging diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index d979cf0e9f0c..ef7a41ae45d0 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -45,7 +45,7 @@ # beam-playground: # name: WordCountDebugging # description: An example that counts words in Shakespeare/kinglear.txt -# includes regax filter("Flourish|stomach"). +# includes regex filter("Flourish|stomach"). # multifile: false # pipeline_options: --output output.txt # categories: diff --git a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py index b50872ec1170..bbbeb452464a 100644 --- a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py +++ b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py @@ -30,6 +30,7 @@ # categories: # - IO # - Options +# - Beam SQL import argparse import logging From f6e4d8324f400ca7c606ff6b4c761a4255a18ad8 Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Mon, 13 Dec 2021 18:26:12 +0300 Subject: [PATCH 05/13] Edit tags --- sdks/go/examples/forest/forest.go | 6 +++--- sdks/go/examples/multiout/multiout.go | 6 +++--- sdks/go/examples/yatzy/yatzy.go | 4 ++-- sdks/python/apache_beam/examples/avro_bitcoin.py | 11 ----------- .../apache_beam/examples/wordcount_xlang_sql.py | 10 ---------- 5 files changed, 8 insertions(+), 29 deletions(-) diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index f2dd61ca8063..f84da506d443 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -29,9 +29,9 @@ package main // beam-playground: // name: Forest -// description: An example that shows that pipeline construction is normal Go code -// -- the pipeline "forest" is created recursively and uses a global variable -// -- and that a pipeline may contain non-connected parts. +// description: An example that shows that pipeline construction is normal Go code, +// -- the pipeline "forest" is created recursively and uses a global variable +// -- and that a pipeline may contain non-connected parts. // multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index 1b9bacfaf87d..930e10efe670 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -19,9 +19,9 @@ package main // beam-playground: // name: MultiOut -// description: An example that counts words in Shakespeare/kinglear.txt and writes 2 output files: -// -- small: file for small words -// -- big: file for big words +// description: An example that counts words in Shakespeare/kinglear.txt and writes 2 output files, +// -- big - for small words, +// -- small - for big words. // multifile: false // pipeline_options: --small sOutput.txt --big bOutput.txt // categories: diff --git a/sdks/go/examples/yatzy/yatzy.go b/sdks/go/examples/yatzy/yatzy.go index fb0973d87181..86d9e52fd10a 100644 --- a/sdks/go/examples/yatzy/yatzy.go +++ b/sdks/go/examples/yatzy/yatzy.go @@ -20,8 +20,8 @@ package main // beam-playground: // name: Yatzy -// description: An example shows that pipeline construction is a normal Go code. -// It can even be non-deterministic and produce different pipelines on each invocation. +// description: An examples shows that pipeline construction is normal Go code. +// It can even be non-deterministic and produce different pipelines on each invocation. // multifile: false // pipeline_options: // categories: diff --git a/sdks/python/apache_beam/examples/avro_bitcoin.py b/sdks/python/apache_beam/examples/avro_bitcoin.py index 1b4a1d5728c3..0b4c555548eb 100644 --- a/sdks/python/apache_beam/examples/avro_bitcoin.py +++ b/sdks/python/apache_beam/examples/avro_bitcoin.py @@ -26,17 +26,6 @@ # pytype: skip-file -# beam-playground: -# name: AvroBitcoin -# description: An example that collect statistics on transactions -# in a public bitcoin dataset that was exported to avro. -# multifile: false -# pipeline_options: --output output.txt -# categories: -# - IO -# - Options -# - Schemas - import argparse import logging diff --git a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py index bbbeb452464a..97a43d386c75 100644 --- a/sdks/python/apache_beam/examples/wordcount_xlang_sql.py +++ b/sdks/python/apache_beam/examples/wordcount_xlang_sql.py @@ -22,16 +22,6 @@ Docker must also be available to run this pipeline locally. """ -# beam-playground: -# name: WordCountXLangSQL -# description: A word-counting workflow that uses the SQL transform. -# multifile: false -# pipeline_options: --output output.txt -# categories: -# - IO -# - Options -# - Beam SQL - import argparse import logging import re From d44a2515bea7fe4e4d1436b757b48cbb71ed9d23 Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Tue, 14 Dec 2021 11:49:12 +0300 Subject: [PATCH 06/13] Edit tags --- .../beam/examples/DebuggingWordCount.java | 18 +++++++++--------- .../apache/beam/examples/MinimalWordCount.java | 18 +++++++++--------- .../org/apache/beam/examples/WordCount.java | 18 +++++++++--------- .../examples/cookbook/DistinctExample.java | 18 +++++++++--------- .../examples/cookbook/multiple_output_pardo.py | 6 +++--- sdks/python/apache_beam/examples/wordcount.py | 2 +- .../examples/wordcount_debugging.py | 2 +- 7 files changed, 41 insertions(+), 41 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index 814a3a146a34..a08382d2548c 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -17,15 +17,15 @@ */ package org.apache.beam.examples; -/* beam-playground: - * name: DebuggingWordCount - * description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). - * multifile: false - * pipeline_options: --output output.txt - * categories: - * - IO - * - Options - */ +// beam-playground: +// name: DebuggingWordCount +// description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - IO +// - Options +// import java.util.Arrays; import java.util.List; diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index ef1fbb1aecb7..351126df85b6 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -17,15 +17,15 @@ */ package org.apache.beam.examples; -/* beam-playground: - * name: MinimalWordCount - * description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. - * multifile: false - * pipeline_options: - * categories: - * - IO - * - Options - */ +// beam-playground: +// name: MinimalWordCount +// description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// multifile: false +// pipeline_options: +// categories: +// - IO +// - Options +// import java.util.Arrays; import org.apache.beam.sdk.Pipeline; diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index 72629424aa27..fd7a8e568d16 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -17,15 +17,15 @@ */ package org.apache.beam.examples; -/* beam-playground: - * name: WordCount - * description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. - * multifile: false - * pipeline_options: --output output.txt - * categories: - * - IO - * - Options - */ +// beam-playground: +// name: WordCount +// description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - IO +// - Options +// import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 1e9873674a21..434acb5598cb 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -17,15 +17,15 @@ */ package org.apache.beam.examples.cookbook; -/* beam-playground: - * name: DistinctExample - * description: An example that uses Shakespeare's plays as plain text files, and removes duplicate lines across all the files. - * multifile: false - * pipeline_options: --output output.txt - * categories: - * - IO - * - Options - */ +// beam-playground: +// name: DistinctExample +// description: An example that uses Shakespeare's plays as plain text files, and removes duplicate lines across all the files. +// multifile: false +// pipeline_options: --output output.txt +// categories: +// - IO +// - Options +// import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; diff --git a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py index 9d74a7cec258..5e373ca6328c 100644 --- a/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py +++ b/sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py @@ -54,9 +54,9 @@ # beam-playground: # name: MultipleOutputPardo # description: This is a slightly modified version -# of the basic wordcount example. In this example words -# are divided into 2 buckets as shorts -# words (3 characters in length or less) and words (other). +# of the basic wordcount example. In this example words +# are divided into 2 buckets as shorts +# words (3 characters in length or less) and words (other). # multifile: false # pipeline_options: --output output.txt # categories: diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index 8bb5ae2a7bd4..a23d6b47d3ff 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -22,7 +22,7 @@ # beam-playground: # name: WordCount # description: An example that counts words in Shakespeare/kinglear.txt -# and includes Beam best practices. +# and includes Beam best practices. # multifile: false # pipeline_options: --output output.txt # categories: diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index ef7a41ae45d0..345aa894411e 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -45,7 +45,7 @@ # beam-playground: # name: WordCountDebugging # description: An example that counts words in Shakespeare/kinglear.txt -# includes regex filter("Flourish|stomach"). +# includes regex filter("Flourish|stomach"). # multifile: false # pipeline_options: --output output.txt # categories: From 181882eb1ecc455f3364b4eec4ffcd0ed29aa2e3 Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Tue, 14 Dec 2021 15:28:16 +0300 Subject: [PATCH 07/13] Update tags --- .../java/org/apache/beam/examples/MinimalWordCount.java | 2 +- .../main/java/org/apache/beam/examples/WordCount.java | 2 +- .../apache/beam/examples/cookbook/DistinctExample.java | 2 +- sdks/go/examples/forest/forest.go | 9 ++++++--- sdks/go/examples/grades/grades.go | 2 +- sdks/go/examples/minimal_wordcount/minimal_wordcount.go | 2 +- sdks/go/examples/wordcount/wordcount.go | 2 +- sdks/python/apache_beam/examples/wordcount.py | 3 +-- 8 files changed, 13 insertions(+), 11 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index 351126df85b6..7d7a3830fe63 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -19,7 +19,7 @@ // beam-playground: // name: MinimalWordCount -// description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// description: An example that counts words in Shakespeare/kinglear.txt. // multifile: false // pipeline_options: // categories: diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index fd7a8e568d16..d6875b52469b 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -19,7 +19,7 @@ // beam-playground: // name: WordCount -// description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// description: An example that counts words in Shakespeare/kinglear.txt. // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 434acb5598cb..8d3a3fd56ffc 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -19,7 +19,7 @@ // beam-playground: // name: DistinctExample -// description: An example that uses Shakespeare's plays as plain text files, and removes duplicate lines across all the files. +// description: An example uses Shakespeare's plays as plain text files and removes duplicate lines across all the files. // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index f84da506d443..bde0fa1d6cc7 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -29,9 +29,12 @@ package main // beam-playground: // name: Forest -// description: An example that shows that pipeline construction is normal Go code, -// -- the pipeline "forest" is created recursively and uses a global variable -// -- and that a pipeline may contain non-connected parts. +// description: An example that shows that pipeline construction is normal Go +// code -- the pipeline "forest" is created recursively and uses a global +// variable -- and that a pipeline may contain non-connected parts. The pipeline +// generated has the shape of a forest where the output of each singleton leaf +// is flattened together over several rounds. This is most clearly seen via a +// visual representation of the pipeline, such as the one produced by the 'dot' runner. // multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go index 158f37946804..7ebc1227cd2a 100644 --- a/sdks/go/examples/grades/grades.go +++ b/sdks/go/examples/grades/grades.go @@ -17,7 +17,7 @@ package main // beam-playground: // name: Grades -// description: An example that combines grades data +// description: An example that combines grades data. // multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index 48c310275756..822f028d4344 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -38,7 +38,7 @@ package main // beam-playground: // name: MinimalWordCount -// description: An example that counts words in Shakespeare and includes Beam best practices. +// description: An example that counts words in Shakespeare. // multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go index aae99a967f50..16e2ebec6109 100644 --- a/sdks/go/examples/wordcount/wordcount.go +++ b/sdks/go/examples/wordcount/wordcount.go @@ -57,7 +57,7 @@ package main // beam-playground: // name: WordCount -// description: An example that counts words in Shakespeare/kinglear.txt and includes Beam best practices. +// description: An example that counts words in Shakespeare/kinglear.txt. // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index a23d6b47d3ff..9b192631b3ae 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -21,8 +21,7 @@ # beam-playground: # name: WordCount -# description: An example that counts words in Shakespeare/kinglear.txt -# and includes Beam best practices. +# description: An example that counts words in Shakespeare/kinglear.txt. # multifile: false # pipeline_options: --output output.txt # categories: From f364ecd0f89b77a2e38fd585f0fcfb378437b917 Mon Sep 17 00:00:00 2001 From: daria-malkova Date: Mon, 20 Dec 2021 15:50:08 +0300 Subject: [PATCH 08/13] fix spotless check --- .../main/java/org/apache/beam/examples/DebuggingWordCount.java | 3 ++- .../org/apache/beam/examples/cookbook/DistinctExample.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index a08382d2548c..a210cffe765d 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -19,7 +19,8 @@ // beam-playground: // name: DebuggingWordCount -// description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). +// description: An example that counts words in Shakespeare/kinglear.txt includes regex +// filter("Flourish|stomach"). // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 8d3a3fd56ffc..14af58f75684 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -19,7 +19,8 @@ // beam-playground: // name: DistinctExample -// description: An example uses Shakespeare's plays as plain text files and removes duplicate lines across all the files. +// description: An example uses Shakespeare's plays as plain text files and removes duplicate +// lines across all the files. // multifile: false // pipeline_options: --output output.txt // categories: From 48653b6559cdf187923af13a4524d1087676e144 Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Mon, 20 Dec 2021 18:56:56 +0300 Subject: [PATCH 09/13] Update tags --- .../main/java/org/apache/beam/examples/DebuggingWordCount.java | 2 +- .../main/java/org/apache/beam/examples/MinimalWordCount.java | 2 +- .../java/src/main/java/org/apache/beam/examples/WordCount.java | 2 +- sdks/go/examples/contains/contains.go | 2 +- sdks/go/examples/debugging_wordcount/debugging_wordcount.go | 2 +- sdks/go/examples/minimal_wordcount/minimal_wordcount.go | 2 +- sdks/go/examples/multiout/multiout.go | 2 +- sdks/go/examples/wordcount/wordcount.go | 2 +- sdks/python/apache_beam/examples/wordcount.py | 2 +- sdks/python/apache_beam/examples/wordcount_debugging.py | 2 +- sdks/python/apache_beam/examples/wordcount_minimal.py | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index a08382d2548c..ae2a8ef7de70 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -19,7 +19,7 @@ // beam-playground: // name: DebuggingWordCount -// description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). +// description: An example that counts words in Shakespeare's works includes regex filter("Flourish|stomach"). // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index 7d7a3830fe63..cbbfae57547d 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -19,7 +19,7 @@ // beam-playground: // name: MinimalWordCount -// description: An example that counts words in Shakespeare/kinglear.txt. +// description: An example that counts words in Shakespeare's works. // multifile: false // pipeline_options: // categories: diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index d6875b52469b..fc4c9b570a51 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -19,7 +19,7 @@ // beam-playground: // name: WordCount -// description: An example that counts words in Shakespeare/kinglear.txt. +// description: An example that counts words in Shakespeare's works. // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go index ea15df9fc229..63742c20a665 100644 --- a/sdks/go/examples/contains/contains.go +++ b/sdks/go/examples/contains/contains.go @@ -17,7 +17,7 @@ package main // beam-playground: // name: Contains -// description: An example counts received substring in Shakespeare/kinglear.txt +// description: An example counts received substring in Shakespeare's works. // multifile: false // pipeline_options: --search king // categories: diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go index edc9e2ede534..856a9cbdf8f8 100644 --- a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go +++ b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go @@ -43,7 +43,7 @@ package main // beam-playground: // name: DebuggingWordCount -// description: An example that counts words in Shakespeare/kinglear.txt includes regex filter("Flourish|stomach"). +// description: An example that counts words in Shakespeare's works includes regex filter("Flourish|stomach"). // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index 822f028d4344..7617bb139e5a 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -38,7 +38,7 @@ package main // beam-playground: // name: MinimalWordCount -// description: An example that counts words in Shakespeare. +// description: An example that counts words in Shakespeare's works. // multifile: false // pipeline_options: // categories: diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index 930e10efe670..e222f8378baf 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -19,7 +19,7 @@ package main // beam-playground: // name: MultiOut -// description: An example that counts words in Shakespeare/kinglear.txt and writes 2 output files, +// description: An example that counts words in Shakespeare's works and writes 2 output files, // -- big - for small words, // -- small - for big words. // multifile: false diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go index 16e2ebec6109..649c463eb3d9 100644 --- a/sdks/go/examples/wordcount/wordcount.go +++ b/sdks/go/examples/wordcount/wordcount.go @@ -57,7 +57,7 @@ package main // beam-playground: // name: WordCount -// description: An example that counts words in Shakespeare/kinglear.txt. +// description: An example that counts words in Shakespeare's works. // multifile: false // pipeline_options: --output output.txt // categories: diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index 9b192631b3ae..d9add5c23a06 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -21,7 +21,7 @@ # beam-playground: # name: WordCount -# description: An example that counts words in Shakespeare/kinglear.txt. +# description: An example that counts words in Shakespeare's works. # multifile: false # pipeline_options: --output output.txt # categories: diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index 345aa894411e..c581c19ccdbe 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -44,7 +44,7 @@ # beam-playground: # name: WordCountDebugging -# description: An example that counts words in Shakespeare/kinglear.txt +# description: An example that counts words in Shakespeare's works. # includes regex filter("Flourish|stomach"). # multifile: false # pipeline_options: --output output.txt diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py index 9b277d02d613..d3335bb425c3 100644 --- a/sdks/python/apache_beam/examples/wordcount_minimal.py +++ b/sdks/python/apache_beam/examples/wordcount_minimal.py @@ -48,7 +48,7 @@ # beam-playground: # name: WordCountMinimal -# description: An example that counts words in Shakespeare/kinglear.txt. +# description: An example that counts words in Shakespeare's works. # multifile: false # pipeline_options: --output output.txt # categories: From bae4a6eb7be004eb64327b63a39867198f75b03a Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Thu, 23 Dec 2021 17:50:57 +0300 Subject: [PATCH 10/13] MinimalWordCount.java and DistinctExample.java reads only one file --- .../java/org/apache/beam/examples/MinimalWordCount.java | 4 ++-- .../org/apache/beam/examples/cookbook/DistinctExample.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index cbbfae57547d..be5b3d5d6ab2 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -97,8 +97,8 @@ public static void main(String[] args) { // of input text files. TextIO.Read returns a PCollection where each element is one line from // the input text (a set of Shakespeare's texts). - // This example reads a public data set consisting of the complete works of Shakespeare. - p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*")) + // This example reads from a public dataset containing the text of King Lear. + p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/kinglear.txt")) // Concept #2: Apply a FlatMapElements transform the PCollection of text lines. // This transform splits the lines in PCollection, where each element is an diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 14af58f75684..603ced45701c 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -39,8 +39,8 @@ import org.apache.beam.sdk.transforms.Distinct; /** - * This example uses as input Shakespeare's plays as plaintext files, and will remove any duplicate - * lines across all the files. (The output does not preserve any input order). + * This example uses as input text of King Lear, by William Shakespeare as plaintext files, and will remove any duplicate + * lines from this file. (The output does not preserve any input order). * *

Concepts: the Distinct transform, and how to wire transforms together. Demonstrates {@link * org.apache.beam.sdk.io.TextIO.Read}/ {@link Distinct}/{@link @@ -69,7 +69,7 @@ public class DistinctExample { */ public interface Options extends PipelineOptions { @Description("Path to the directory or GCS prefix containing files to read from") - @Default.String("gs://apache-beam-samples/shakespeare/*") + @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") String getInput(); void setInput(String value); From 95b601f0ec9fab2075cb799807bf58004c2d5b03 Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Thu, 23 Dec 2021 18:33:09 +0300 Subject: [PATCH 11/13] Fix checks --- .../org/apache/beam/examples/cookbook/DistinctExample.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 603ced45701c..b87fc2b4f7ec 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -39,8 +39,8 @@ import org.apache.beam.sdk.transforms.Distinct; /** - * This example uses as input text of King Lear, by William Shakespeare as plaintext files, and will remove any duplicate - * lines from this file. (The output does not preserve any input order). + * This example uses as input text of King Lear, by William Shakespeare as plaintext files, and will + * remove any duplicate lines from this file. (The output does not preserve any input order). * *

Concepts: the Distinct transform, and how to wire transforms together. Demonstrates {@link * org.apache.beam.sdk.io.TextIO.Read}/ {@link Distinct}/{@link From fdbc523328e03561b992b23446c1eb08784e873d Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Mon, 27 Dec 2021 16:30:18 +0300 Subject: [PATCH 12/13] Change examples which read * from bucket to read one file --- .../org/apache/beam/examples/DebuggingWordCount.java | 1 - .../org/apache/beam/examples/MinimalWordCount.java | 1 - .../java/org/apache/beam/examples/WordCount.java | 1 - .../beam/examples/cookbook/DistinctExample.java | 7 ++++--- .../examples/minimal_wordcount/minimal_wordcount.go | 12 +++++++----- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index a210cffe765d..5641dccbd7ed 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -26,7 +26,6 @@ // categories: // - IO // - Options -// import java.util.Arrays; import java.util.List; diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index be5b3d5d6ab2..96aad0b28a4a 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -25,7 +25,6 @@ // categories: // - IO // - Options -// import java.util.Arrays; import org.apache.beam.sdk.Pipeline; diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index fc4c9b570a51..8f56e979b0fc 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -25,7 +25,6 @@ // categories: // - IO // - Options -// import org.apache.beam.examples.common.ExampleUtils; import org.apache.beam.sdk.Pipeline; diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index b87fc2b4f7ec..5b19539ce1db 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -19,8 +19,9 @@ // beam-playground: // name: DistinctExample -// description: An example uses Shakespeare's plays as plain text files and removes duplicate -// lines across all the files. +// description: An example uses as input text of King Lear, +// by William Shakespeare as plain text files and removes +// duplicate lines across all the files. // multifile: false // pipeline_options: --output output.txt // categories: @@ -57,7 +58,7 @@ * * See examples/java/README.md for instructions about how to configure different runners. * - *

The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can be overridden + *

The input defaults to {@code gs://apache-beam-samples/shakespeare/kinglear.txt} and can be overridden * with {@code --input}. */ public class DistinctExample { diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index 7617bb139e5a..618ea12cd346 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -13,7 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// minimal_wordcount is an example that counts words in Shakespeare. +// minimal_wordcount is an example that counts words in King Lear, +// by William Shakespeare. // // This example is the first in a series of four successively more detailed // 'word count' examples. Here, for simplicity, we don't show any @@ -38,7 +39,8 @@ package main // beam-playground: // name: MinimalWordCount -// description: An example that counts words in Shakespeare's works. +// description: An example that counts words in King Lear, +// by William Shakespeare. // multifile: false // pipeline_options: // categories: @@ -75,9 +77,9 @@ func main() { // PCollection where each element is one line from the input text // (one of of Shakespeare's texts). - // This example reads a public data set consisting of the complete works - // of Shakespeare. - lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/*") + // This example reads from a public dataset containing the text + // of King Lear. + lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/kinglear.txt") // Concept #2: Invoke a ParDo transform on our PCollection of text lines. // This ParDo invokes a DoFn (defined in-line) on each element that From 3a3e076557df13af9aa2c4f25798fcf9291518bd Mon Sep 17 00:00:00 2001 From: Pavel Avilov Date: Tue, 28 Dec 2021 16:08:05 +0300 Subject: [PATCH 13/13] Update tags; --- .../java/org/apache/beam/examples/DebuggingWordCount.java | 3 ++- .../main/java/org/apache/beam/examples/MinimalWordCount.java | 4 +++- .../src/main/java/org/apache/beam/examples/WordCount.java | 2 +- .../org/apache/beam/examples/cookbook/DistinctExample.java | 4 ++-- playground/categories.yaml | 4 ++++ sdks/go/examples/contains/contains.go | 3 ++- sdks/go/examples/debugging_wordcount/debugging_wordcount.go | 5 +++-- sdks/go/examples/forest/forest.go | 3 ++- sdks/go/examples/grades/grades.go | 3 ++- sdks/go/examples/minimal_wordcount/minimal_wordcount.go | 2 ++ sdks/go/examples/multiout/multiout.go | 1 + sdks/go/examples/stringsplit/stringsplit.go | 3 ++- sdks/go/examples/windowed_wordcount/windowed_wordcount.go | 2 +- sdks/go/examples/wordcount/wordcount.go | 2 +- sdks/python/apache_beam/examples/wordcount.py | 2 +- sdks/python/apache_beam/examples/wordcount_debugging.py | 5 ++++- sdks/python/apache_beam/examples/wordcount_minimal.py | 3 +++ sdks/python/apache_beam/examples/wordcount_with_metrics.py | 2 +- 18 files changed, 37 insertions(+), 16 deletions(-) diff --git a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java index 5641dccbd7ed..1d909fc86810 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/DebuggingWordCount.java @@ -24,7 +24,8 @@ // multifile: false // pipeline_options: --output output.txt // categories: -// - IO +// - Debugging +// - Filtering // - Options import java.util.Arrays; diff --git a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java index 96aad0b28a4a..d4a5a5655db8 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/MinimalWordCount.java @@ -23,8 +23,10 @@ // multifile: false // pipeline_options: // categories: +// - Combiners +// - Filtering // - IO -// - Options +// - Core Transforms import java.util.Arrays; import org.apache.beam.sdk.Pipeline; diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java index 8f56e979b0fc..d02cb1225851 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java +++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java @@ -23,7 +23,7 @@ // multifile: false // pipeline_options: --output output.txt // categories: -// - IO +// - Combiners // - Options import org.apache.beam.examples.common.ExampleUtils; diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java index 5b19539ce1db..5aa349e589ec 100644 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java +++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DistinctExample.java @@ -25,9 +25,9 @@ // multifile: false // pipeline_options: --output output.txt // categories: -// - IO +// - Filtering // - Options -// +// - Core Transforms import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath; diff --git a/playground/categories.yaml b/playground/categories.yaml index fbc7cb5050d1..bff77b2db7ae 100644 --- a/playground/categories.yaml +++ b/playground/categories.yaml @@ -31,3 +31,7 @@ categories: - Coders - Stateful Processing - Beam SQL + - Debugging + - Filtering + - Flatten + - Core Transforms diff --git a/sdks/go/examples/contains/contains.go b/sdks/go/examples/contains/contains.go index 63742c20a665..1a352ceddf9c 100644 --- a/sdks/go/examples/contains/contains.go +++ b/sdks/go/examples/contains/contains.go @@ -21,8 +21,9 @@ package main // multifile: false // pipeline_options: --search king // categories: -// - IO +// - Filtering // - Options +// - Debugging import ( "context" diff --git a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go index 856a9cbdf8f8..4c69f6b99acb 100644 --- a/sdks/go/examples/debugging_wordcount/debugging_wordcount.go +++ b/sdks/go/examples/debugging_wordcount/debugging_wordcount.go @@ -47,8 +47,9 @@ package main // multifile: false // pipeline_options: --output output.txt // categories: -// - IO -// - Options +// - Options +// - Filtering +// - Debugging import ( "context" diff --git a/sdks/go/examples/forest/forest.go b/sdks/go/examples/forest/forest.go index bde0fa1d6cc7..9e0fe9de0fcb 100644 --- a/sdks/go/examples/forest/forest.go +++ b/sdks/go/examples/forest/forest.go @@ -38,7 +38,8 @@ package main // multifile: false // pipeline_options: // categories: -// - Testing +// - Flatten +// - Branching import ( "context" diff --git a/sdks/go/examples/grades/grades.go b/sdks/go/examples/grades/grades.go index 7ebc1227cd2a..f0fa3f1d7e4a 100644 --- a/sdks/go/examples/grades/grades.go +++ b/sdks/go/examples/grades/grades.go @@ -21,8 +21,9 @@ package main // multifile: false // pipeline_options: // categories: -// - Testing +// - Debugging // - Combiners +// - Filtering import ( "context" diff --git a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go index 618ea12cd346..1607a7badd76 100644 --- a/sdks/go/examples/minimal_wordcount/minimal_wordcount.go +++ b/sdks/go/examples/minimal_wordcount/minimal_wordcount.go @@ -45,6 +45,8 @@ package main // pipeline_options: // categories: // - IO +// - Combiners +// - Core Transforms import ( "context" diff --git a/sdks/go/examples/multiout/multiout.go b/sdks/go/examples/multiout/multiout.go index e222f8378baf..af26390db314 100644 --- a/sdks/go/examples/multiout/multiout.go +++ b/sdks/go/examples/multiout/multiout.go @@ -27,6 +27,7 @@ package main // categories: // - IO // - Options +// - Branching // - Multiple Outputs import ( diff --git a/sdks/go/examples/stringsplit/stringsplit.go b/sdks/go/examples/stringsplit/stringsplit.go index a1335efb02d5..4b51e2698466 100644 --- a/sdks/go/examples/stringsplit/stringsplit.go +++ b/sdks/go/examples/stringsplit/stringsplit.go @@ -42,7 +42,8 @@ package main // multifile: false // pipeline_options: // categories: -// - Testing +// - Debugging +// - Flatten import ( "context" diff --git a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go index 98ebfe96548d..2e04ec48414d 100644 --- a/sdks/go/examples/windowed_wordcount/windowed_wordcount.go +++ b/sdks/go/examples/windowed_wordcount/windowed_wordcount.go @@ -40,7 +40,7 @@ package main // multifile: false // pipeline_options: --output output.txt // categories: -// - IO +// - Windowing // - Options // - Combiners diff --git a/sdks/go/examples/wordcount/wordcount.go b/sdks/go/examples/wordcount/wordcount.go index 649c463eb3d9..d34bd8c8d59c 100644 --- a/sdks/go/examples/wordcount/wordcount.go +++ b/sdks/go/examples/wordcount/wordcount.go @@ -61,7 +61,7 @@ package main // multifile: false // pipeline_options: --output output.txt // categories: -// - IO +// - Combiners // - Options import ( diff --git a/sdks/python/apache_beam/examples/wordcount.py b/sdks/python/apache_beam/examples/wordcount.py index d9add5c23a06..b32e41bc377a 100644 --- a/sdks/python/apache_beam/examples/wordcount.py +++ b/sdks/python/apache_beam/examples/wordcount.py @@ -25,7 +25,7 @@ # multifile: false # pipeline_options: --output output.txt # categories: -# - IO +# - Combiners # - Options import argparse diff --git a/sdks/python/apache_beam/examples/wordcount_debugging.py b/sdks/python/apache_beam/examples/wordcount_debugging.py index c581c19ccdbe..859124041586 100644 --- a/sdks/python/apache_beam/examples/wordcount_debugging.py +++ b/sdks/python/apache_beam/examples/wordcount_debugging.py @@ -49,8 +49,11 @@ # multifile: false # pipeline_options: --output output.txt # categories: -# - IO +# - Flatten +# - Debugging # - Options +# - Combiners +# - Filtering import argparse import logging diff --git a/sdks/python/apache_beam/examples/wordcount_minimal.py b/sdks/python/apache_beam/examples/wordcount_minimal.py index d3335bb425c3..4f1f40169102 100644 --- a/sdks/python/apache_beam/examples/wordcount_minimal.py +++ b/sdks/python/apache_beam/examples/wordcount_minimal.py @@ -53,7 +53,10 @@ # pipeline_options: --output output.txt # categories: # - IO +# - Core Transforms +# - Flatten # - Options +# - Combiners import argparse import logging diff --git a/sdks/python/apache_beam/examples/wordcount_with_metrics.py b/sdks/python/apache_beam/examples/wordcount_with_metrics.py index 693b6bdf1be4..65f40d8cbade 100644 --- a/sdks/python/apache_beam/examples/wordcount_with_metrics.py +++ b/sdks/python/apache_beam/examples/wordcount_with_metrics.py @@ -25,7 +25,7 @@ # multifile: false # pipeline_options: --output output.txt # categories: -# - IO +# - Combiners # - Options # - Metrics