Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@
*/
package org.apache.beam.examples;

// beam-playground:
// name: DebuggingWordCount
// description: An example that counts words in Shakespeare/kinglear.txt includes regex
// filter("Flourish|stomach").
// multifile: false
// pipeline_options: --output output.txt
// categories:
// - Debugging
// - Filtering
// - Options

import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@
*/
package org.apache.beam.examples;

// beam-playground:
// name: MinimalWordCount
// description: An example that counts words in Shakespeare's works.
// multifile: false
// pipeline_options:
// categories:
// - Combiners
// - Filtering
// - IO
// - Core Transforms

import java.util.Arrays;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
Expand Down Expand Up @@ -87,8 +98,8 @@ public static void main(String[] args) {
// of input text files. TextIO.Read returns a PCollection where each element is one line from
// the input text (a set of Shakespeare's texts).

// This example reads a public data set consisting of the complete works of Shakespeare.
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/*"))
// This example reads from a public dataset containing the text of King Lear.
p.apply(TextIO.read().from("gs://apache-beam-samples/shakespeare/kinglear.txt"))

// Concept #2: Apply a FlatMapElements transform the PCollection of text lines.
// This transform splits the lines in PCollection<String>, where each element is an
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@
*/
package org.apache.beam.examples;

// beam-playground:
// name: WordCount
// description: An example that counts words in Shakespeare's works.
// multifile: false
// pipeline_options: --output output.txt
// categories:
// - Combiners
// - Options

import org.apache.beam.examples.common.ExampleUtils;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@
*/
package org.apache.beam.examples.cookbook;

// beam-playground:
// name: DistinctExample
// description: An example uses as input text of King Lear,
// by William Shakespeare as plain text files and removes
// duplicate lines across all the files.
// multifile: false
// pipeline_options: --output output.txt
// categories:
// - Filtering
// - Options
// - Core Transforms

import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath;
import org.apache.beam.sdk.io.TextIO;
Expand All @@ -28,8 +40,8 @@
import org.apache.beam.sdk.transforms.Distinct;

/**
* This example uses as input Shakespeare's plays as plaintext files, and will remove any duplicate
* lines across all the files. (The output does not preserve any input order).
* This example uses as input text of King Lear, by William Shakespeare as plaintext files, and will
* remove any duplicate lines from this file. (The output does not preserve any input order).
*
* <p>Concepts: the Distinct transform, and how to wire transforms together. Demonstrates {@link
* org.apache.beam.sdk.io.TextIO.Read}/ {@link Distinct}/{@link
Expand All @@ -46,7 +58,7 @@
*
* See examples/java/README.md for instructions about how to configure different runners.
*
* <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/*} and can be overridden
* <p>The input defaults to {@code gs://apache-beam-samples/shakespeare/kinglear.txt} and can be overridden
* with {@code --input}.
*/
public class DistinctExample {
Expand All @@ -58,7 +70,7 @@ public class DistinctExample {
*/
public interface Options extends PipelineOptions {
@Description("Path to the directory or GCS prefix containing files to read from")
@Default.String("gs://apache-beam-samples/shakespeare/*")
@Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt")
String getInput();

void setInput(String value);
Expand Down
3 changes: 2 additions & 1 deletion playground/categories.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ categories:
- Branching
- Flatten
- Core Transforms
- Windowing
- Windowing
- Debugging
10 changes: 10 additions & 0 deletions sdks/go/examples/contains/contains.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@

package main

// beam-playground:
// name: Contains
// description: An example counts received substring in Shakespeare's works.
// multifile: false
// pipeline_options: --search king
// categories:
// - Filtering
// - Options
// - Debugging

import (
"context"
"flag"
Expand Down
10 changes: 10 additions & 0 deletions sdks/go/examples/debugging_wordcount/debugging_wordcount.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@
// with --input.
package main

// beam-playground:
// name: DebuggingWordCount
// description: An example that counts words in Shakespeare's works includes regex filter("Flourish|stomach").
// multifile: false
// pipeline_options: --output output.txt
// categories:
// - Options
// - Filtering
// - Debugging

import (
"context"
"flag"
Expand Down
14 changes: 14 additions & 0 deletions sdks/go/examples/forest/forest.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,20 @@
// orders.
package main

// beam-playground:
// name: Forest
// description: An example that shows that pipeline construction is normal Go
// code -- the pipeline "forest" is created recursively and uses a global
// variable -- and that a pipeline may contain non-connected parts. The pipeline
// generated has the shape of a forest where the output of each singleton leaf
// is flattened together over several rounds. This is most clearly seen via a
// visual representation of the pipeline, such as the one produced by the 'dot' runner.
// multifile: false
// pipeline_options:
// categories:
// - Flatten
// - Branching

import (
"context"
"flag"
Expand Down
10 changes: 10 additions & 0 deletions sdks/go/examples/grades/grades.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@

package main

// beam-playground:
// name: Grades
// description: An example that combines grades data.
// multifile: false
// pipeline_options:
// categories:
// - Debugging
// - Combiners
// - Filtering

import (
"context"
"flag"
Expand Down
20 changes: 16 additions & 4 deletions sdks/go/examples/minimal_wordcount/minimal_wordcount.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

// minimal_wordcount is an example that counts words in Shakespeare.
// minimal_wordcount is an example that counts words in King Lear,
// by William Shakespeare.
//
// This example is the first in a series of four successively more detailed
// 'word count' examples. Here, for simplicity, we don't show any
Expand All @@ -36,6 +37,17 @@
// "wordcounts.txt" in your current working directory.
package main

// beam-playground:
// name: MinimalWordCount
// description: An example that counts words in King Lear,
// by William Shakespeare.
// multifile: false
// pipeline_options:
// categories:
// - IO
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// - IO
// - IO
// - Combiners
// - Core Transforms

// - Combiners
// - Core Transforms

import (
"context"
"fmt"
Expand Down Expand Up @@ -67,9 +79,9 @@ func main() {
// PCollection where each element is one line from the input text
// (one of of Shakespeare's texts).

// This example reads a public data set consisting of the complete works
// of Shakespeare.
lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/*")
// This example reads from a public dataset containing the text
// of King Lear.
lines := textio.Read(s, "gs://apache-beam-samples/shakespeare/kinglear.txt")

// Concept #2: Invoke a ParDo transform on our PCollection of text lines.
// This ParDo invokes a DoFn (defined in-line) on each element that
Expand Down
13 changes: 13 additions & 0 deletions sdks/go/examples/multiout/multiout.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,19 @@
// and writes 2 output files.
package main

// beam-playground:
// name: MultiOut
// description: An example that counts words in Shakespeare's works and writes 2 output files,
// -- big - for small words,
// -- small - for big words.
// multifile: false
// pipeline_options: --small sOutput.txt --big bOutput.txt
// categories:
// - IO
// - Options
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// - Options
// - Options
// - Branching

// - Branching
// - Multiple Outputs

import (
"context"
"flag"
Expand Down
9 changes: 9 additions & 0 deletions sdks/go/examples/stringsplit/stringsplit.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@
// phrase "StringSplit Output".
package main

// beam-playground:
// name: StringSplit
// description: An example of using a Splittable DoFn in the Go SDK with a portable runner.
// multifile: false
// pipeline_options:
// categories:
// - Debugging
// - Flatten

import (
"context"
"flag"
Expand Down
10 changes: 10 additions & 0 deletions sdks/go/examples/windowed_wordcount/windowed_wordcount.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@
// 5. Accessing the window of an element
package main

// beam-playground:
// name: WindowedWordCount
// description: An example that counts words in text, and can run over either unbounded or bounded input collections.
// multifile: false
// pipeline_options: --output output.txt
// categories:
// - Windowing
// - Options
// - Combiners

import (
"context"
"flag"
Expand Down
9 changes: 9 additions & 0 deletions sdks/go/examples/wordcount/wordcount.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@
// with --input.
package main

// beam-playground:
// name: WordCount
// description: An example that counts words in Shakespeare's works.
// multifile: false
// pipeline_options: --output output.txt
// categories:
// - Combiners
// - Options

import (
"context"
"flag"
Expand Down
10 changes: 10 additions & 0 deletions sdks/go/examples/yatzy/yatzy.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@
// non-deterministic and produce different pipelines on each invocation.
package main

// beam-playground:
// name: Yatzy
// description: An examples shows that pipeline construction is normal Go code.
// It can even be non-deterministic and produce different pipelines on each invocation.
// multifile: false
// pipeline_options:
// categories:
// - IO
// - Side Input

import (
"context"
"flag"
Expand Down
13 changes: 13 additions & 0 deletions sdks/python/apache_beam/examples/cookbook/multiple_output_pardo.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@

# pytype: skip-file

# beam-playground:
# name: MultipleOutputPardo
# description: This is a slightly modified version
# of the basic wordcount example. In this example words
# are divided into 2 buckets as shorts
# words (3 characters in length or less) and words (other).
# multifile: false
# pipeline_options: --output output.txt
# categories:
# - IO
# - Options
# - Multiple Outputs

import argparse
import logging
import re
Expand Down
9 changes: 9 additions & 0 deletions sdks/python/apache_beam/examples/wordcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,15 @@

# pytype: skip-file

# beam-playground:
# name: WordCount
# description: An example that counts words in Shakespeare's works.
# multifile: false
# pipeline_options: --output output.txt
# categories:
# - Combiners
# - Options

import argparse
import logging
import re
Expand Down
13 changes: 13 additions & 0 deletions sdks/python/apache_beam/examples/wordcount_debugging.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@

# pytype: skip-file

# beam-playground:
# name: WordCountDebugging
# description: An example that counts words in Shakespeare's works.
# includes regex filter("Flourish|stomach").
# multifile: false
# pipeline_options: --output output.txt
# categories:
# - Flatten
# - Debugging
# - Options
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# - Options
# - Options
# - Combiners
# - Filtering

# - Combiners
# - Filtering

import argparse
import logging
import re
Expand Down
12 changes: 12 additions & 0 deletions sdks/python/apache_beam/examples/wordcount_minimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@

# pytype: skip-file

# beam-playground:
# name: WordCountMinimal
# description: An example that counts words in Shakespeare's works.
# multifile: false
# pipeline_options: --output output.txt
# categories:
# - IO
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# - IO
# - IO
# - Core Transforms
# - Flatten

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think, we should also add Combiners

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

# - Core Transforms
# - Flatten
# - Options
# - Combiners

import argparse
import logging
import re
Expand Down
Loading