diff --git a/.travis.yml b/.travis.yml
index 09f916d..1348cf0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,5 +2,4 @@ language: ruby
 rvm:
   - jruby-18mode
 notifications:
-  recipients:
-    - mwalker@etsy.com
+  irc: "irc.freenode.org#etsydoop"
diff --git a/Gemfile b/Gemfile
index a65514d..ae50466 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,6 +1,6 @@
-source :rubygems
+source 'https://rubygems.org'
 
 group :test do
-  gem 'rake', '0.8.7'
+  gem 'rake', '10.0.3'
   gem 'rspec', '1.1.11'
 end
diff --git a/Gemfile.lock b/Gemfile.lock
index bda8983..7b6363d 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -1,12 +1,12 @@
 GEM
-  remote: http://rubygems.org/
+  remote: https://rubygems.org/
   specs:
-    rake (0.8.7)
+    rake (10.0.3)
     rspec (1.1.11)
 
 PLATFORMS
   java
 
 DEPENDENCIES
-  rake (= 0.8.7)
+  rake (= 10.0.3)
   rspec (= 1.1.11)
diff --git a/LICENSE.txt b/LICENSE.txt
index fc8a5de..331d4b0 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,165 +1,18 @@
-		   GNU LESSER GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
+License:
+  Project and contact information: http://github.com/mrwalker/cascading.jruby
 
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
 
+      http://www.apache.org/licenses/LICENSE-2.0
 
-  This version of the GNU Lesser General Public License incorporates
-the terms and conditions of version 3 of the GNU General Public
-License, supplemented by the additional permissions listed below.
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
 
-  0. Additional Definitions. 
+Third-party Licenses:
 
-  As used herein, "this License" refers to version 3 of the GNU Lesser
-General Public License, and the "GNU GPL" refers to version 3 of the GNU
-General Public License.
-
-  "The Library" refers to a covered work governed by this License,
-other than an Application or a Combined Work as defined below.
-
-  An "Application" is any work that makes use of an interface provided
-by the Library, but which is not otherwise based on the Library.
-Defining a subclass of a class defined by the Library is deemed a mode
-of using an interface provided by the Library.
-
-  A "Combined Work" is a work produced by combining or linking an
-Application with the Library.  The particular version of the Library
-with which the Combined Work was made is also called the "Linked
-Version".
-
-  The "Minimal Corresponding Source" for a Combined Work means the
-Corresponding Source for the Combined Work, excluding any source code
-for portions of the Combined Work that, considered in isolation, are
-based on the Application, and not on the Linked Version.
-
-  The "Corresponding Application Code" for a Combined Work means the
-object code and/or source code for the Application, including any data
-and utility programs needed for reproducing the Combined Work from the
-Application, but excluding the System Libraries of the Combined Work.
-
-  1. Exception to Section 3 of the GNU GPL.
-
-  You may convey a covered work under sections 3 and 4 of this License
-without being bound by section 3 of the GNU GPL.
-
-  2. Conveying Modified Versions.
-
-  If you modify a copy of the Library, and, in your modifications, a
-facility refers to a function or data to be supplied by an Application
-that uses the facility (other than as an argument passed when the
-facility is invoked), then you may convey a copy of the modified
-version:
-
-   a) under this License, provided that you make a good faith effort to
-   ensure that, in the event an Application does not supply the
-   function or data, the facility still operates, and performs
-   whatever part of its purpose remains meaningful, or
-
-   b) under the GNU GPL, with none of the additional permissions of
-   this License applicable to that copy.
-
-  3. Object Code Incorporating Material from Library Header Files.
-
-  The object code form of an Application may incorporate material from
-a header file that is part of the Library.  You may convey such object
-code under terms of your choice, provided that, if the incorporated
-material is not limited to numerical parameters, data structure
-layouts and accessors, or small macros, inline functions and templates
-(ten or fewer lines in length), you do both of the following:
-
-   a) Give prominent notice with each copy of the object code that the
-   Library is used in it and that the Library and its use are
-   covered by this License.
-
-   b) Accompany the object code with a copy of the GNU GPL and this license
-   document.
-
-  4. Combined Works.
-
-  You may convey a Combined Work under terms of your choice that,
-taken together, effectively do not restrict modification of the
-portions of the Library contained in the Combined Work and reverse
-engineering for debugging such modifications, if you also do each of
-the following:
-
-   a) Give prominent notice with each copy of the Combined Work that
-   the Library is used in it and that the Library and its use are
-   covered by this License.
-
-   b) Accompany the Combined Work with a copy of the GNU GPL and this license
-   document.
-
-   c) For a Combined Work that displays copyright notices during
-   execution, include the copyright notice for the Library among
-   these notices, as well as a reference directing the user to the
-   copies of the GNU GPL and this license document.
-
-   d) Do one of the following:
-
-       0) Convey the Minimal Corresponding Source under the terms of this
-       License, and the Corresponding Application Code in a form
-       suitable for, and under terms that permit, the user to
-       recombine or relink the Application with a modified version of
-       the Linked Version to produce a modified Combined Work, in the
-       manner specified by section 6 of the GNU GPL for conveying
-       Corresponding Source.
-
-       1) Use a suitable shared library mechanism for linking with the
-       Library.  A suitable mechanism is one that (a) uses at run time
-       a copy of the Library already present on the user's computer
-       system, and (b) will operate properly with a modified version
-       of the Library that is interface-compatible with the Linked
-       Version. 
-
-   e) Provide Installation Information, but only if you would otherwise
-   be required to provide such information under section 6 of the
-   GNU GPL, and only to the extent that such information is
-   necessary to install and execute a modified version of the
-   Combined Work produced by recombining or relinking the
-   Application with a modified version of the Linked Version. (If
-   you use option 4d0, the Installation Information must accompany
-   the Minimal Corresponding Source and Corresponding Application
-   Code. If you use option 4d1, you must provide the Installation
-   Information in the manner specified by section 6 of the GNU GPL
-   for conveying Corresponding Source.)
-
-  5. Combined Libraries.
-
-  You may place library facilities that are a work based on the
-Library side by side in a single library together with other library
-facilities that are not Applications and are not covered by this
-License, and convey such a combined library under terms of your
-choice, if you do both of the following:
-
-   a) Accompany the combined library with a copy of the same work based
-   on the Library, uncombined with any other library facilities,
-   conveyed under the terms of this License.
-
-   b) Give prominent notice with the combined library that part of it
-   is a work based on the Library, and explaining where to find the
-   accompanying uncombined form of the same work.
-
-  6. Revised Versions of the GNU Lesser General Public License.
-
-  The Free Software Foundation may publish revised and/or new versions
-of the GNU Lesser General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
-  Each version is given a distinguishing version number. If the
-Library as you received it specifies that a certain numbered version
-of the GNU Lesser General Public License "or any later version"
-applies to it, you have the option of following the terms and
-conditions either of that published version or of any later version
-published by the Free Software Foundation. If the Library as you
-received it does not specify a version number of the GNU Lesser
-General Public License, you may choose any version of the GNU Lesser
-General Public License ever published by the Free Software Foundation.
-
-  If the Library as you received it specifies that a proxy can decide
-whether future versions of the GNU Lesser General Public License shall
-apply, that proxy's public statement of acceptance of any version is
-permanent authorization for you to choose that version for the
-Library.
+  All third-party dependencies are listed in ivy.xml.
diff --git a/README.md b/README.md
index c6bf9c2..85ef09c 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ cascade 'wordcount', :mode => :local do
     source 'input', tap(input_path)
 
     assembly 'input' do
-      split_rows 'line', 'word', :pattern => /[.,]*\s+/, :output => 'word'
+      split_rows 'line', /[.,]*\s+/, 'word', :output => 'word'
       group_by 'word' do
         count
       end
@@ -28,8 +28,8 @@ end.complete
 
 cascading.jruby provides a clean Ruby interface to Cascading, but doesn't attempt to add abstractions on top of it.  Therefore, you should be acquainted with the [Cascading](http://docs.cascading.org/cascading/2.0/userguide/html/) [API](http://docs.cascading.org/cascading/2.0/javadoc/) before you begin.
 
-For operations you can apply to your dataflow within a pipe assembly, see the [Assembly](http://rubydoc.info/gems/cascading.jruby/0.0.10/Cascading/Assembly) class.  For operations available within a block passed to a group_by, union, or join, see the [Aggregations](http://rubydoc.info/gems/cascading.jruby/0.0.10/Cascading/Aggregations) class.
+For operations you can apply to your dataflow within a pipe assembly, see the [Assembly](http://rubydoc.info/gems/cascading.jruby/1.0.0/Cascading/Assembly) class.  For operations available within a block passed to a group_by, union, or join, see the [Aggregations](http://rubydoc.info/gems/cascading.jruby/1.0.0/Cascading/Aggregations) class.
 
-Note that the Ruby code you write merely constructs a Cascading job, so no JRuby runtime is required on your cluster.  This stands in contrast with writing [Hadoop streaming jobs in Ruby](http://www.quora.com/How-do-the-different-options-for-Ruby-on-Hadoop-compare).  To run cascading.jruby applications on a Hadoop cluster, you must use [Jading](https://github.com/etsy/jading) to package them into a job jar.
+Note that the Ruby code you write merely constructs a Cascading job, so no JRuby runtime is required on your cluster.  This stands in contrast with writing [Hadoop streaming jobs in Ruby](http://www.quora.com/How-do-the-different-options-for-Ruby-on-Hadoop-compare).  To run cascading.jruby applications on a Hadoop cluster, you must use [Jading](https://github.com/mrwalker/jading) to package them into a job jar.
 
-cascading.jruby has been tested on JRuby versions 1.2.0, 1.4.0, 1.5.3, 1.6.5, and 1.6.7.2.
+cascading.jruby has been tested on JRuby versions 1.2.0, 1.4.0, 1.5.3, 1.6.5, 1.6.7.2, 1.7.0, and 1.7.3.
diff --git a/cascading.jruby.gemspec b/cascading.jruby.gemspec
index a9c27c2..10057fa 100644
--- a/cascading.jruby.gemspec
+++ b/cascading.jruby.gemspec
@@ -1,24 +1,25 @@
 # -*- encoding: utf-8 -*-
+#$: << File.join(File.dirname(__FILE__), '..', 'lib')
+#require 'cascading'
 
 Gem::Specification.new do |s|
   s.name = "cascading.jruby"
-  s.version = "0.0.10"
-
-  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  # TODO: in 2.0.0, Job will encapsulate Cascading module, so we can directly
+  # grab the version from there; for now, just hack it
+  #s.version = Cascading::VERSION
+  s.version = '1.0.0'
+  s.date = Time.now.strftime('%Y-%m-%d')
+  s.summary = "A JRuby DSL for Cascading"
+  s.homepage = "http://github.com/mrwalker/cascading.jruby"
+  s.email = "matt.r.walker@gmail.com"
   s.authors = ["Matt Walker", "Gr\303\251goire Marabout"]
-  s.description = "cascading.jruby is a small DSL above Cascading, written in JRuby"
-  s.email = "mwalker@etsy.com"
-  s.extra_rdoc_files = ["LICENSE.txt"]
-  s.files = ["lib/cascading.rb", "lib/cascading/aggregations.rb", "lib/cascading/assembly.rb", "lib/cascading/base.rb", "lib/cascading/cascade.rb", "lib/cascading/cascading.rb", "lib/cascading/cascading_exception.rb", "lib/cascading/expr_stub.rb", "lib/cascading/ext/array.rb", "lib/cascading/flow.rb", "lib/cascading/mode.rb", "lib/cascading/operations.rb", "lib/cascading/scope.rb", "lib/cascading/sub_assembly.rb", "lib/cascading/tap.rb"]
-  s.homepage = "http://github.com/etsy/cascading.jruby"
-  s.rdoc_options = ["--main", "README.md"]
+
+  s.files = Dir.glob("lib/**/*.rb")
+  s.test_files = Dir.glob("test/**/*.rb")
   s.require_paths = ["lib"]
-  s.rubyforge_project = "cascading.jruby"
-  s.rubygems_version = "1.8.21"
-  s.summary = "A JRuby DSL for Cascading"
-  s.test_files = ["test/test_aggregations.rb", "test/test_assembly.rb", "test/test_cascade.rb", "test/test_cascading.rb", "test/test_exceptions.rb", "test/test_flow.rb", "test/test_local_execution.rb", "test/test_operations.rb"]
 
-  if s.respond_to? :specification_version then
-    s.specification_version = 3
-  end
+  s.rdoc_options = ["--main", "README.md"]
+  s.extra_rdoc_files = ["README.md", "LICENSE.txt"]
+
+  s.description = "cascading.jruby is a small DSL above Cascading, written in JRuby"
 end
diff --git a/lib/cascading.rb b/lib/cascading.rb
index 64e61b3..9d19936 100644
--- a/lib/cascading.rb
+++ b/lib/cascading.rb
@@ -2,20 +2,26 @@
 
 module Cascading
   # :stopdoc:
-  VERSION = '0.0.10'
+  VERSION = '1.0.0'
 end
 
+require 'cascading/aggregations'
 require 'cascading/assembly'
 require 'cascading/base'
 require 'cascading/cascade'
 require 'cascading/cascading'
 require 'cascading/cascading_exception'
 require 'cascading/expr_stub'
+require 'cascading/filter_operations'
 require 'cascading/flow'
+require 'cascading/identity_operations'
 require 'cascading/mode'
 require 'cascading/operations'
+require 'cascading/regex_operations'
 require 'cascading/scope'
+require 'cascading/sub_assembly'
 require 'cascading/tap'
+require 'cascading/text_operations'
 
-# include module to make them available at top package
+# include module to make it available at top level
 include Cascading
diff --git a/lib/cascading/aggregations.rb b/lib/cascading/aggregations.rb
index 2980748..fb7f2a9 100644
--- a/lib/cascading/aggregations.rb
+++ b/lib/cascading/aggregations.rb
@@ -1,28 +1,39 @@
-require 'cascading/operations'
 require 'cascading/scope'
 require 'cascading/ext/array'
 
 module Cascading
+  # Aggregations is the context available to you within the block of a group_by,
+  # union, or join that allows you to apply Every pipes to the result of those
+  # operations.  You may apply aggregators and buffers within this context
+  # subject to several rules laid out by Cascading.
+  #
   # Rules enforced by Aggregations:
   # * Contains either 1 Buffer or >= 1 Aggregator (explicitly checked)
-  # * No GroupBys, CoGroups, Joins, or Merges (methods for these pipes do not
-  # exist on Aggregations)
+  # * No GroupBys, CoGroups, Joins, or Merges (methods for these pipes do not exist on Aggregations)
   # * No Eaches (Aggregations#each does not exist)
   # * Aggregations may not branch (Aggregations#branch does not exist)
   #
   # Externally enforced rules:
   # * May be empty (in which case, Aggregations is not instantiated)
-  # * Must follow a GroupBy or CoGroup (not a Join or Merge)
+  # * Must follow a GroupBy or CoGroup (not a HashJoin or Merge)
   #
   # Optimizations:
-  # * If the leading Group is a GroupBy and all subsequent Everies are
-  # Aggregators that have a corresponding AggregateBy, Aggregations can replace
-  # the GroupBy/Aggregator pipe with a single composite AggregateBy
+  # * If the leading Group is a GroupBy and all subsequent Everies are Aggregators that have a corresponding AggregateBy, Aggregations can replace the GroupBy/Aggregator pipe with a single composite AggregateBy
+  #
+  # Aggregator and buffer DSL standard optional parameter names:
+  # [input] c.p.Every argument selector
+  # [into] c.o.Operation field declaration
+  # [output] c.p.Every output selector
   class Aggregations
-    include Operations
-
     attr_reader :assembly, :tail_pipe, :scope, :aggregate_bys
 
+    # Do not use this constructor directly; instead, pass a block containing
+    # the desired aggregations to a group_by, union, or join and it will be
+    # instantiated for you.
+    #
+    # Builds the context in which a sequence of Every aggregations may be
+    # evaluated in the given assembly appended to the given group pipe and with
+    # the given incoming_scopes.
     def initialize(assembly, group, incoming_scopes)
       @assembly = assembly
       @tail_pipe = group
@@ -32,23 +43,14 @@ def initialize(assembly, group, incoming_scopes)
       @aggregate_bys = tail_pipe.is_group_by ? [] : nil
     end
 
+    # Prints information about the scope of these Aggregations at the point at
+    # which it is called.  This allows you to trace the propagation of field
+    # names through your job and is handy for debugging.  See Scope for
+    # details.
     def debug_scope
       puts "Current scope of aggregations for '#{assembly.name}':\n  #{scope}\n----------\n"
     end
 
-    def make_pipe(type, parameters)
-      pipe = type.new(*parameters)
-
-      # Enforce 1 Buffer or >= 1 Aggregator rule
-      if tail_pipe.kind_of?(Java::CascadingPipe::Every)
-        raise 'Buffer must be sole aggregation' if tail_pipe.buffer? || (tail_pipe.aggregator? && pipe.buffer?)
-      end
-
-      @tail_pipe = pipe
-      @scope = Scope.outgoing_scope(tail_pipe, [scope])
-    end
-    private :make_pipe
-
     # We can replace these aggregations with the corresponding composite
     # AggregateBy if the leading Group was a GroupBy and all subsequent
     # Aggregators had a corresponding AggregateBy (which we've encoded in the
@@ -69,13 +71,27 @@ def finalize
 
     # Builds an every pipe and adds it to the current list of aggregations.
     # Note that this list may be either exactly 1 Buffer or any number of
-    # Aggregators.
-    def every(*args)
-      options = args.extract_options!
-
-      in_fields = fields(args)
+    # Aggregators.  Exactly one of :aggregator or :buffer must be specified and
+    # :aggregator may be accompanied by a corresponding :aggregate_by.
+    #
+    # The named options are:
+    # [aggregator] A Cascading Aggregator, mutually exclusive with :buffer.
+    # [aggregate_by] A Cascading AggregateBy that corresponds to the given
+    #                :aggregator.  Only makes sense with the :aggregator option
+    #                and does not exist for all Aggregators.  Providing nothing
+    #                or nil will cause all Aggregations to operate as normal,
+    #                without being compiled into a composite AggregateBy.
+    # [buffer] A Cascading Buffer, mutually exclusive with :aggregator.
+    # [output] c.p.Every output selector.
+    #
+    # Example:
+    #    every 'field1', 'field2', :aggregator => sum_aggregator, :aggregate_by => sum_by, :output => all_fields
+    #    every fields(input_fields), :buffer => Java::SomePackage::SomeBuffer.new, :output => all_fields
+    def every(*args_with_options)
+      options, in_fields = args_with_options.extract_options!, fields(args_with_options)
       out_fields = fields(options[:output])
       operation = options[:aggregator] || options[:buffer]
+      raise 'every requires either :aggregator or :buffer' unless operation
 
       if options[:aggregate_by] && aggregate_bys
         aggregate_bys << options[:aggregate_by]
@@ -84,71 +100,152 @@ def every(*args)
       end
 
       parameters = [tail_pipe, in_fields, operation, out_fields].compact
-      make_pipe(Java::CascadingPipe::Every, parameters)
-    end
+      every = make_pipe(Java::CascadingPipe::Every, parameters)
+      raise ':aggregator specified but c.o.Buffer provided' if options[:aggregator] && every.is_buffer
+      raise ':buffer specified but c.o.Aggregator provided' if options[:buffer] && every.is_aggregator
 
-    def assert_group(*args)
-      options = args.extract_options!
+      every
+    end
 
-      assertion = args[0]
+    # Builds an every assertion pipe given a c.o.a.Assertion and adds it to the
+    # current list of aggregations.  Note this breaks a chain of AggregateBys.
+    #
+    # The named options are:
+    # [level] The assertion level; defaults to strict.
+    def assert_group(assertion, options = {})
       assertion_level = options[:level] || Java::CascadingOperation::AssertionLevel::STRICT
 
       parameters = [tail_pipe, assertion_level, assertion]
       make_pipe(Java::CascadingPipe::Every, parameters)
     end
 
-    def assert_group_size_equals(*args)
-      options = args.extract_options!
-
-      assertion = Java::CascadingOperationAssertion::AssertGroupSizeEquals.new(args[0])
+    # Builds a pipe that asserts the size of the current group is the specified
+    # size for all groups.
+    def assert_group_size_equals(size, options = {})
+      assertion = Java::CascadingOperationAssertion::AssertGroupSizeEquals.new(size)
       assert_group(assertion, options)
     end
 
-    # Builds a series of every pipes for aggregation.
+    # Computes the minima of the specified fields within each group.  Fields
+    # may be a list or a map for renaming.  Note that fields are sorted by
+    # input name when a map is provided.
     #
-    # Args can either be a list of fields to aggregate and an options hash or
-    # a hash that maps input field name to output field name (similar to
-    # insert) and an options hash.
+    # The named options are:
+    # [ignore] Java Array of Objects of values to be ignored.
     #
-    # Options include:
-    # * <tt>:ignore</tt> a Java Array of Objects (for min and max) or Tuples
-    # (for first and last) of values for the aggregator to ignore
-    # * <tt>function</tt> is a symbol that is the method to call to construct
-    # the Cascading Aggregator.
-    def composite_aggregator(args, function)
-      field_map, options = extract_field_map(args)
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       insert 'const' => 1
+    #       group_by 'const' do
+    #         min 'field1', 'field2'
+    #         min 'field3' => 'fieldA', 'field4' => 'fieldB'
+    #       end
+    #       discard 'const'
+    #     end
+    def min(*args_with_options)
+      composite_aggregator(args_with_options, Java::CascadingOperationAggregator::Min)
+    end
 
-      field_map.each do |in_field, out_field|
-        agg = self.send(function, out_field, options)
-        every(in_field, :aggregator => agg, :output => all_fields)
-      end
-      raise "Composite aggregator '#{function.to_s.gsub('_function', '')}' invoked on 0 fields" if field_map.empty?
+    # Computes the maxima of the specified fields within each group.  Fields
+    # may be a list or a map for renaming.  Note that fields are sorted by
+    # input name when a map is provided.
+    #
+    # The named options are:
+    # [ignore] Java Array of Objects of values to be ignored.
+    #
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       insert 'const' => 1
+    #       group_by 'const' do
+    #         max 'field1', 'field2'
+    #         max 'field3' => 'fieldA', 'field4' => 'fieldB'
+    #       end
+    #       discard 'const'
+    #     end
+    def max(*args_with_options)
+      composite_aggregator(args_with_options, Java::CascadingOperationAggregator::Max)
+    end
+
+    # Returns the first value within each group for the specified fields.
+    # Fields may be a list or a map for renaming.  Note that fields are sorted
+    # by input name when a map is provided.
+    #
+    # The named options are:
+    # [ignore] Java Array of Tuples which should be ignored
+    #
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       group_by 'key1', 'key2' do
+    #         first 'field1', 'field2'
+    #         first 'field3' => 'fieldA', 'field4' => 'fieldB'
+    #       end
+    #     end
+    def first(*args_with_options)
+      composite_aggregator(args_with_options, Java::CascadingOperationAggregator::First)
     end
 
-    def min(*args); composite_aggregator(args, :min_function); end
-    def max(*args); composite_aggregator(args, :max_function); end
-    def first(*args); composite_aggregator(args, :first_function); end
-    def last(*args); composite_aggregator(args, :last_function); end
+    # Returns the last value within each group for the specified fields.
+    # Fields may be a list or a map for renaming.  Note that fields are sorted
+    # by input name when a map is provided.
+    #
+    # The named options are:
+    # [ignore] Java Array of Tuples which should be ignored
+    #
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       group_by 'key1', 'key2' do
+    #         last 'field1', 'field2'
+    #         last 'field3' => 'fieldA', 'field4' => 'fieldB'
+    #       end
+    #     end
+    def last(*args_with_options)
+      composite_aggregator(args_with_options, Java::CascadingOperationAggregator::Last)
+    end
 
-    # Counts elements of a group.  May optionally specify the name of the
-    # output count field (defaults to 'count').
+    # Counts elements of each group.  May optionally specify the name of the
+    # output count field, which defaults to 'count'.
+    #
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       group_by 'key1', 'key2' do
+    #         count
+    #         count 'key1_key2_count'
+    #       end
+    #     end
     def count(name = 'count')
       count_aggregator = Java::CascadingOperationAggregator::Count.new(fields(name))
       count_by = Java::CascadingPipeAssembly::CountBy.new(fields(name))
       every(last_grouping_fields, :aggregator => count_aggregator, :output => all_fields, :aggregate_by => count_by)
     end
 
-    # Sums one or more fields.  Fields to be summed may either be provided as
-    # the arguments to sum (in which case they will be aggregated into a field
-    # of the same name in the given order), or via a hash using the :mapping
-    # parameter (in which case they will be aggregated from the field named by
-    # the key into the field named by the value after being sorted).  The type
-    # of the output sum may be controlled with the :type parameter.
-    def sum(*args)
-      options = args.extract_options!
+    # Sums the specified fields within each group.  Fields may be a list or
+    # provided through the :mapping option for renaming.  Note that fields are
+    # sorted by name when a map is provided.
+    #
+    # The named options are:
+    # [mapping] Map of input to output field names if renaming is desired.
+    #           Results in output fields sorted by input field.
+    # [type] Controls the type of the output, specified using values from the
+    #        Cascading::JAVA_TYPE_MAP as in Janino expressions (:double, :long, etc.)
+    #
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       group_by 'key1', 'key2' do
+    #         sum 'field1', 'field2', :type => :long
+    #         sum :mapping => { 'field3' => 'fieldA', 'field4' => 'fieldB' }, :type => :double
+    #       end
+    #     end
+    def sum(*args_with_options)
+      options, in_fields = args_with_options.extract_options!, args_with_options
       type = JAVA_TYPE_MAP[options[:type]]
 
-      mapping = options[:mapping] ? options[:mapping].sort : args.zip(args)
+      mapping = options[:mapping] ? options[:mapping].sort : in_fields.zip(in_fields)
       mapping.each do |in_field, out_field|
         sum_aggregator = Java::CascadingOperationAggregator::Sum.new(*[fields(out_field), type].compact)
         # NOTE: SumBy requires a type in wip-286, unlike Sum (see Sum.java line 42 for default)
@@ -158,10 +255,22 @@ def sum(*args)
       raise "sum invoked on 0 fields (note :mapping must be provided to explicitly rename fields)" if mapping.empty?
     end
 
-    # Averages one or more fields.  The contract of average is identical to
-    # that of other composite aggregators, but it accepts no options.
-    def average(*args)
-      field_map, _ = extract_field_map(args)
+    # Averages the specified fields within each group.  Fields may be a list or
+    # a map for renaming.  Note that fields are sorted by input name when a map
+    # is provided.
+    #
+    # Examples:
+    #     assembly 'aggregate' do
+    #       ...
+    #       insert 'const' => 1
+    #       group_by 'const' do
+    #         max 'field1', 'field2'
+    #         max 'field3' => 'fieldA', 'field4' => 'fieldB'
+    #       end
+    #       discard 'const'
+    #     end
+    def average(*fields_or_field_map)
+      field_map, _ = extract_field_map(fields_or_field_map)
 
       field_map.each do |in_field, out_field|
         average_aggregator = Java::CascadingOperationAggregator::Average.new(fields(out_field))
@@ -173,6 +282,42 @@ def average(*args)
 
     private
 
+    def make_pipe(type, parameters)
+      pipe = type.new(*parameters)
+
+      # Enforce 1 Buffer or >= 1 Aggregator rule
+      if tail_pipe.kind_of?(Java::CascadingPipe::Every)
+        raise 'Buffer must be sole aggregation' if tail_pipe.buffer? || (tail_pipe.aggregator? && pipe.buffer?)
+      end
+
+      @tail_pipe = pipe
+      @scope = Scope.outgoing_scope(tail_pipe, [scope])
+
+      tail_pipe
+    end
+
+    # Builds a series of every pipes for aggregation.
+    #
+    # Args can either be a list of fields to aggregate and an options hash or
+    # a hash that maps input field name to output field name (similar to
+    # insert) and an options hash.
+    #
+    # The named options are:
+    # [ignore] Java Array of Objects (for min and max) or Tuples (for first and
+    #          last) of values for the aggregator to ignore.
+    def composite_aggregator(args, aggregator)
+      field_map, options = extract_field_map(args)
+
+      field_map.each do |in_field, out_field|
+        every(
+          in_field,
+          :aggregator => aggregator.new(*[fields(out_field), options[:ignore]].compact),
+          :output => all_fields
+        )
+      end
+      raise "Composite aggregator '#{aggregator}' invoked on 0 fields" if field_map.empty?
+    end
+
     # Extracts a field mapping, input field => output field, by accepting a
     # hash in the first argument.  If no hash is provided, then maps arguments
     # onto themselves which names outputs the same as inputs.  Additionally
diff --git a/lib/cascading/assembly.rb b/lib/cascading/assembly.rb
index 684b4c1..eb4e032 100644
--- a/lib/cascading/assembly.rb
+++ b/lib/cascading/assembly.rb
@@ -1,15 +1,50 @@
 require 'cascading/base'
 require 'cascading/operations'
+require 'cascading/identity_operations'
+require 'cascading/filter_operations'
+require 'cascading/regex_operations'
+require 'cascading/text_operations'
 require 'cascading/aggregations'
 require 'cascading/sub_assembly'
 require 'cascading/ext/array'
 
 module Cascading
+  # An Assembly is a sequence of Cascading pipes (Each, GroupBy, CoGroup,
+  # Every, and SubAssembly).  This class will serve as your primary mechanism
+  # for doing work within a flow and contains all the functions and filters you
+  # will apply to a pipe (Eaches), as well as group_by, union, and join.  For
+  # aggregators and buffers, please see Aggregations.
+  #
+  # Function and filter DSL rules:
+  # * Use positional arguments for required parameters
+  # * Use options = {} for optional parameters
+  # * Use *args sparingly, specifically when you need to accept a varying length list of fields
+  # * If you require both a varying length list of fields and optional parameters, then see the Array#extract_options! extension
+  # * If you choose to name a required parameter, add it to options = {} and throw an exception if the caller does not provide it
+  # * If you have a require parameter that is provided by one of a set of options names, throw an exception if the caller does not provide at least one value (see :function and :filter in Assembly#each for an example)
+  #
+  # Function and filter DSL standard optional parameter names:
+  # [input] c.p.Each argument selector
+  # [into] c.o.Operation field declaration
+  # [output] c.p.Each output selector
+  #
+  # A note on aliases: when a DSL method uniquely wraps a single Cascading
+  # operation, we attempt to provide an alias that matches the Cascading
+  # operation.  However, Cascading operations are often nouns rather than verbs,
+  # and the latter are preferable for a dataflow DSL.
   class Assembly < Cascading::Node
-    include Operations
-
     attr_reader :head_pipe, :tail_pipe
 
+    # Do not use this constructor directly; instead, use Flow#assembly or
+    # Assembly#branch to build assemblies.
+    #
+    # Builds an Assembly given a name, parent, and optional outgoing_scopes
+    # (necessary only for branching).
+    #
+    # An assembly's name is quite important as it will determine:
+    # * The sources from which it will read, if any
+    # * The name to be used in joins or unions downstream
+    # * The name to be used to sink the output of the assembly downstream
     def initialize(name, parent, outgoing_scopes = {})
       super(name, parent)
 
@@ -27,6 +62,11 @@ def initialize(name, parent, outgoing_scopes = {})
       @incoming_scopes = [scope]
     end
 
+    # Produces a textual description of this Assembly.  The description details
+    # the structure of the Assembly, its input and output fields and any
+    # children (branches).  The offset parameter allows for this describe to be
+    # nested within a calling context, which lets us indent the structural
+    # hierarchy of a job.
     def describe(offset = '')
       incoming_scopes_desc = "#{@incoming_scopes.map{ |incoming_scope| incoming_scope.values_fields.to_a.inspect }.join(', ')}"
       incoming_scopes_desc = "(#{incoming_scopes_desc})" unless @incoming_scopes.size == 1
@@ -35,199 +75,231 @@ def describe(offset = '')
       description
     end
 
+    # Rather than the immediate parent, this method returns the parent flow of
+    # this Assembly.  If this is a branch, we must traverse the parents of
+    # parent assemblies.
     def parent_flow
       return parent if parent.kind_of?(Flow)
       parent.parent_flow
     end
 
+    # Accesses the outgoing scope of this Assembly at the point at which it is
+    # called.  This is useful for grabbing the values_fields at any point in
+    # the construction of the Assembly.  See Scope for details.
     def scope
       @outgoing_scopes[name]
     end
 
+    # Prints information about the scope of this Assembly at the point at which
+    # it is called.  This allows you to trace the propagation of field names
+    # through your job and is handy for debugging.  See Scope for details.
     def debug_scope
       puts "Current scope for '#{name}':\n  #{scope}\n----------\n"
     end
 
-    def make_pipe(type, parameters)
-      @tail_pipe = type.new(*parameters)
-      @outgoing_scopes[name] = Scope.outgoing_scope(tail_pipe, [scope])
-
-      tail_pipe
-    end
-    private :make_pipe
-
-    def populate_incoming_scopes(assembly_names, group_fields_args = {})
-      # NOTE: this overrides the existing incoming_scopes, which changes the
-      # way describe will function on this assembly
-      pipes, @incoming_scopes, group_fields = [], [], []
-      assembly_names.each do |assembly_name|
-        assembly = parent_flow.find_child(assembly_name)
-        raise "Could not find assembly '#{assembly_name}' from '#{name}'" unless assembly
-
-        pipes << assembly.tail_pipe
-        @incoming_scopes << assembly.scope
-        group_fields << fields(group_fields_args[assembly_name]) if group_fields_args[assembly_name]
-      end
-      [pipes, group_fields]
-    end
-    private :populate_incoming_scopes
-
-    def apply_aggregations(group, incoming_scopes, &block)
-      aggregations = Aggregations.new(self, group, incoming_scopes)
-      aggregations.instance_eval(&block) if block_given?
-
-      # Sorting of any type means that we cannot use the AggregateBy optimization
-      if aggregations.can_aggregate_by? && !group.is_sorted && !group.is_sort_reversed
-        grouping_fields = group.key_selectors.values.first
-        group.key_selectors.values.each do |key_fields|
-          raise "Grouping fields mismatch: #{grouping_fields} expected; #{key_fields} found from #{group.key_selectors}" unless key_fields == grouping_fields
-        end
-
-        aggregate_by = sub_assembly(Java::CascadingPipeAssembly::AggregateBy.new(
-          name,
-          group.previous,
-          grouping_fields,
-          aggregations.aggregate_bys.to_java(Java::CascadingPipeAssembly::AggregateBy)
-        ), group.previous, incoming_scopes)
-
-        aggregate_by
-      else
-        aggregations.finalize if block_given?
-        @tail_pipe = aggregations.tail_pipe
-        @outgoing_scopes[name] = aggregations.scope
-
-        group
-      end
-    end
-    private :apply_aggregations
-
+    # Prints detail about this Assembly including its name, head pipe, and tail
+    # pipe.
     def to_s
       "#{name} : head pipe : #{head_pipe} - tail pipe: #{tail_pipe}"
     end
 
-    def prepare_join(*args, &block)
-      options = args.extract_options!
-
-      pipes, _ = populate_incoming_scopes(args)
-
-      group_fields_args = options[:on]
-      raise 'join requires :on parameter' unless group_fields_args
-
-      if group_fields_args.kind_of?(String)
-        group_fields_args = [group_fields_args]
-      end
-
-      group_fields = []
-      if group_fields_args.kind_of?(Array)
-        pipes.size.times do
-          group_fields << fields(group_fields_args)
-        end
-      elsif group_fields_args.kind_of?(Hash)
-        pipes, group_fields = populate_incoming_scopes(group_fields_args.keys.sort, group_fields_args)
-      else
-        raise "Unsupported data type for :on in join: '#{group_fields_args.class}'"
-      end
-
-      raise 'join requires non-empty :on parameter' if group_fields_args.empty?
-      group_fields = group_fields.to_java(Java::CascadingTuple::Fields)
-      incoming_fields = @incoming_scopes.map{ |s| s.values_fields }
-      declared_fields = fields(options[:declared_fields] || dedup_fields(*incoming_fields))
-      joiner = options[:joiner]
-      is_hash_join = options[:hash] || false
-
-      case joiner
-      when :inner, 'inner', nil
-        joiner = Java::CascadingPipeJoiner::InnerJoin.new
-      when :left,  'left'
-        joiner = Java::CascadingPipeJoiner::LeftJoin.new
-      when :right, 'right'
-        joiner = Java::CascadingPipeJoiner::RightJoin.new
-      when :outer, 'outer'
-        joiner = Java::CascadingPipeJoiner::OuterJoin.new
-      when Array
-        joiner = joiner.map do |t|
-          case t
-          when true,  1, :inner then true
-          when false, 0, :outer then false
-          else fail "invalid mixed joiner entry: #{t}"
-          end
-        end
-        joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
-      end
-
-      if is_hash_join
-        raise ArgumentError, "hash joins don't support aggregations" if block_given?
-        parameters = [
-          pipes.to_java(Java::CascadingPipe::Pipe),
-          group_fields,
-          declared_fields,
-          joiner
-        ]
-        group_assembly = Java::CascadingPipe::HashJoin.new(*parameters)
-      else
-        result_group_fields = dedup_fields(*group_fields)
-        parameters = [
-          pipes.to_java(Java::CascadingPipe::Pipe),
-          group_fields,
-          declared_fields,
-          result_group_fields,
-          joiner
-        ]
-        group_assembly = Java::CascadingPipe::CoGroup.new(*parameters)
-      end
-      apply_aggregations(group_assembly, @incoming_scopes, &block)
-    end
-    private :prepare_join
-
     # Builds a HashJoin pipe. This should be used carefully, as the right side
-    # of the join is accumulated entirely in memory. Requires a list of assembly
-    # names to join and :on to specify the join_fields.
-    def hash_join(*args, &block)
-      options = args.extract_options!
+    # of the join is accumulated entirely in memory. Requires a list of
+    # assembly names to join and :on to specify the join_fields.  Note that a
+    # hash_join "takes over" the Assembly in which it is built, so it is
+    # typically the first statement within the block of the assembly or branch.
+    # Additionally, a hash join does not accept a block for aggregations like
+    # other joins; this restriction is enforced here, but comes directly from
+    # Cascading.
+    #
+    # The named options are:
+    # [on] The keys of the join, an array of strings if they are the same in
+    #      all inputs, or a hash mapping assembly names to key names if they
+    #      differ across inputs.
+    # [declared_fields] By default, a deduplicated array of incoming field
+    #                   names (see Cascading::dedup_fields).  Specifies the
+    #                   names of the fields that will be available to
+    #                   aggregations or post-join if no aggregations are
+    #                   specified.
+    # [joiner] A specification of the c.p.j.Joiner to use.  Values like :inner
+    #          and 'inner', :right and 'right' are accepted, as well as an
+    #          array specifying mixed joins.  Typically, this is not provided,
+    #          but one of the higher level join methods on Assembly is used
+    #          directly (like Assembly#inner_join or Assembly#right_join).
+    #
+    # Example:
+    #     assembly 'join_left_right' do
+    #       hash_join 'left', 'right', :on => ['key1', 'key2'], :joiner => :inner
+    #     end
+    def hash_join(*args_with_options)
+      raise ArgumentError, "HashJoin doesn't support aggregations so the block provided to hash_join will be ignored" if block_given?
+
+      options, assembly_names = args_with_options.extract_options!, args_with_options
       options[:hash] = true
-      args << options
-      prepare_join(*args, &block)
+      prepare_join(assembly_names, options)
     end
 
     # Builds a join (CoGroup) pipe. Requires a list of assembly names to join
-    # and :on to specify the group_fields.
-    def join(*args, &block)
-      options = args.extract_options!
+    # and :on to specify the group_fields.  Note that a join "takes over" the
+    # Assembly in which it is built, so it is typically the first statement
+    # within the block of the assembly or branch.  The block passed to this
+    # method will be evaluated in the context of Aggregations, not Assembly.
+    #
+    # The named options are:
+    # [on] The keys of the join, an array of strings if they are the same in
+    #      all inputs, or a hash mapping assembly names to key names if they
+    #      differ across inputs.
+    # [declared_fields] By default, a deduplicated array of incoming field
+    #                   names (see Cascading::dedup_fields).  Specifies the
+    #                   names of the fields that will be available to
+    #                   aggregations or post-join if no aggregations are
+    #                   specified.
+    # [joiner] A specification of the c.p.j.Joiner to use.  Values like :inner
+    #          and 'inner', :right and 'right' are accepted, as well as an
+    #          array specifying mixed joins.  Typically, this is not provided,
+    #          but one of the higher level join methods on Assembly is used
+    #          directly (like Assembly#inner_join or Assembly#right_join).
+    #
+    # Example:
+    #     assembly 'join_left_right' do
+    #       join 'left', 'right', :on => ['key1', 'key2'], :joiner => :inner do
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #     end
+    def join(*args_with_options, &block)
+      options, assembly_names = args_with_options.extract_options!, args_with_options
       options[:hash] = false
-      args << options
-      prepare_join(*args, &block)
+      prepare_join(assembly_names, options, &block)
     end
     alias co_group join
 
-    def inner_join(*args, &block)
-      options = args.extract_options!
+    # Builds an inner join (CoGroup) pipe. Requires a list of assembly names to
+    # join and :on to specify the group_fields.
+    #
+    # The named options are:
+    # [on] The keys of the join, an array of strings if they are the same in
+    #      all inputs, or a hash mapping assembly names to key names if they
+    #      differ across inputs.
+    # [declared_fields] By default, a deduplicated array of incoming field
+    #                   names (see Cascading::dedup_fields).  Specifies the
+    #                   names of the fields that will be available to
+    #                   aggregations or post-join if no aggregations are
+    #                   specified.
+    #
+    # Example:
+    #     assembly 'join_left_right' do
+    #       inner_join 'left', 'right', :on => ['key1', 'key2']
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #     end
+    def inner_join(*args_with_options, &block)
+      options = args_with_options.extract_options!
       options[:joiner] = :inner
-      args << options
-      join(*args, &block)
+      args_with_options << options
+      join(*args_with_options, &block)
     end
 
-    def left_join(*args, &block)
-      options = args.extract_options!
+    # Builds a left join (CoGroup) pipe. Requires a list of assembly names to
+    # join and :on to specify the group_fields.
+    #
+    # The named options are:
+    # [on] The keys of the join, an array of strings if they are the same in
+    #      all inputs, or a hash mapping assembly names to key names if they
+    #      differ across inputs.
+    # [declared_fields] By default, a deduplicated array of incoming field
+    #                   names (see Cascading::dedup_fields).  Specifies the
+    #                   names of the fields that will be available to
+    #                   aggregations or post-join if no aggregations are
+    #                   specified.
+    #
+    # Example:
+    #     assembly 'join_left_right' do
+    #       left_join 'left', 'right', :on => ['key1', 'key2'] do
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #     end
+    def left_join(*args_with_options, &block)
+      options = args_with_options.extract_options!
       options[:joiner] = :left
-      args << options
-      join(*args, &block)
+      args_with_options << options
+      join(*args_with_options, &block)
     end
 
-    def right_join(*args, &block)
-      options = args.extract_options!
+    # Builds a right join (CoGroup) pipe. Requires a list of assembly names to
+    # join and :on to specify the group_fields.
+    #
+    # The named options are:
+    # [on] The keys of the join, an array of strings if they are the same in
+    #      all inputs, or a hash mapping assembly names to key names if they
+    #      differ across inputs.
+    # [declared_fields] By default, a deduplicated array of incoming field
+    #                   names (see Cascading::dedup_fields).  Specifies the
+    #                   names of the fields that will be available to
+    #                   aggregations or post-join if no aggregations are
+    #                   specified.
+    #
+    # Example:
+    #     assembly 'join_left_right' do
+    #       right_join 'left', 'right', :on => ['key1', 'key2'] do
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #     end
+    def right_join(*args_with_options, &block)
+      options = args_with_options.extract_options!
       options[:joiner] = :right
-      args << options
-      join(*args, &block)
+      args_with_options << options
+      join(*args_with_options, &block)
     end
 
-    def outer_join(*args, &block)
-      options = args.extract_options!
+    # Builds an outer join (CoGroup) pipe. Requires a list of assembly names to
+    # join and :on to specify the group_fields.
+    #
+    # The named options are:
+    # [on] The keys of the join, an array of strings if they are the same in
+    #      all inputs, or a hash mapping assembly names to key names if they
+    #      differ across inputs.
+    # [declared_fields] By default, a deduplicated array of incoming field
+    #                   names (see Cascading::dedup_fields).  Specifies the
+    #                   names of the fields that will be available to
+    #                   aggregations or post-join if no aggregations are
+    #                   specified.
+    #
+    # Example:
+    #     assembly 'join_left_right' do
+    #       outer_join 'left', 'right', :on => ['key1', 'key2'] do
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #     end
+    def outer_join(*args_with_options, &block)
+      options = args_with_options.extract_options!
       options[:joiner] = :outer
-      args << options
-      join(*args, &block)
+      args_with_options << options
+      join(*args_with_options, &block)
     end
 
-    # Builds a new branch.
+    # Builds a child Assembly that branches this Assembly given a name and
+    # block.
+    #
+    # An assembly's name is quite important as it will determine:
+    # * The sources from which it will read, if any
+    # * The name to be used in joins or unions downstream
+    # * The name to be used to sink the output of the assembly downstream
+    #
+    # Many branches may be built within an assembly.  The result of a branch is
+    # the same as the Flow#assembly constructor, an Assembly object.
+    #
+    # Example:
+    #     assembly 'some_work' do
+    #       ...
+    #
+    #       branch 'more_work' do
+    #         ...
+    #       end
+    #
+    #       branch 'yet_more_work' do
+    #         ...
+    #       end
+    #     end
     def branch(name, &block)
       raise "Could not build branch '#{name}'; block required" unless block_given?
       assembly = Assembly.new(name, self, @outgoing_scopes)
@@ -236,11 +308,27 @@ def branch(name, &block)
       assembly
     end
 
-    # Builds a new GroupBy pipe that groups on the fields given in args.
-    # Any block passed to this method should contain only Everies.
-    def group_by(*args, &block)
-      options = args.extract_options!
-      group_fields = fields(args)
+    # Builds a new GroupBy pipe that groups on the fields given in
+    # args_with_options. The block passed to this method will be evaluated in
+    # the context of Aggregations, not Assembly.
+    #
+    # The named options are:
+    # [sort_by] Optional keys for within-group sort.
+    # [reverse] Boolean that can reverse the order of within-group sorting
+    #           (only makes sense given :sort_by keys).
+    #
+    # Example:
+    #     assembly 'total' do
+    #       ...
+    #       insert 'const' => 1
+    #       group_by 'const' do
+    #         count
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #       discard 'const'
+    #     end
+    def group_by(*args_with_options, &block)
+      options, group_fields = args_with_options.extract_options!, fields(args_with_options)
       sort_fields = fields(options[:sort_by])
       reverse = options[:reverse]
 
@@ -251,16 +339,31 @@ def group_by(*args, &block)
     # Unifies multiple incoming pipes sharing the same field structure using a
     # GroupBy.  Accepts :on like join and :sort_by and :reverse like group_by,
     # as well as a block which may be used for a sequence of Every
-    # aggregations.
+    # aggregations.  The block passed to this method will be evaluated in the
+    # context of Aggregations, not Assembly.
     #
     # By default, groups only on the first field (see line 189 of GroupBy.java)
-    def union(*args, &block)
-      options = args.extract_options!
+    #
+    # The named options are:
+    # [on] The keys of the union, which defaults to the first field in the
+    #      first input assembly.
+    # [sort_by] Optional keys for sorting.
+    # [reverse] Boolean that can reverse the order of sorting
+    #           (only makes sense given :sort_by keys).
+    #
+    # Example:
+    #     assembly 'union_left_right' do
+    #       union 'left', 'right' do
+    #         sum 'val1', 'val2', :type => :long
+    #       end
+    #     end
+    def union(*args_with_options, &block)
+      options, assembly_names = args_with_options.extract_options!, args_with_options
       group_fields = fields(options[:on])
       sort_fields = fields(options[:sort_by])
       reverse = options[:reverse]
 
-      pipes, _ = populate_incoming_scopes(args)
+      pipes, _ = populate_incoming_scopes(assembly_names)
 
       # Must provide group_fields to ensure field name propagation
       group_fields = fields(@incoming_scopes.first.values_fields.get(0)) unless group_fields
@@ -273,10 +376,15 @@ def union(*args, &block)
     end
     alias :union_pipes :union
 
-    # Allows you to plugin c.p.SubAssemblies to a cascading.jruby Assembly
-    # under certain assumptions.  Note the default is to extend the tail pipe
-    # of this Assembly using a linear SubAssembly.  See SubAssembly class for
-    # details.
+    # Allows you to plugin c.p.SubAssemblies to an Assembly under certain
+    # assumptions.  Note the default is to extend the tail pipe of this
+    # Assembly using a linear SubAssembly.  See SubAssembly class for details.
+    #
+    # Example:
+    #     assembly 'id_rows' do
+    #       ...
+    #       sub_assembly Java::CascadingPipeAssembly::Discard.new(tail_pipe, fields('id'))
+    #     end
     def sub_assembly(sub_assembly, pipes = [tail_pipe], incoming_scopes = [scope])
       sub_assembly = SubAssembly.new(self, sub_assembly)
       sub_assembly.finalize(pipes, incoming_scopes)
@@ -287,17 +395,24 @@ def sub_assembly(sub_assembly, pipes = [tail_pipe], incoming_scopes = [scope])
       sub_assembly
     end
 
-    # Builds a basic _each_ pipe, and adds it to the current assembly.
-    # --
+    # Builds a basic each pipe and adds it to the current Assembly.  Default
+    # arguments are all_fields, a default inherited from c.o.Each.  Exactly one
+    # of :function and :filter must be specified and filters do not support an
+    # :output selector.
+    #
+    # The named options are:
+    # [filter] A Cascading Filter, mutually exclusive with :function.
+    # [function] A Cascading Function, mutually exclusive with :filter.
+    # [output] c.p.Each output selector, only valid with :function.
+    #
     # Example:
-    #     each 'line', :function => regex_splitter(['name', 'val1', 'val2', 'id'], :pattern => /[.,]*\s+/), :output => ['id', 'name', 'val1', 'val2']
-    def each(*args)
-      options = args.extract_options!
-
-      in_fields = fields(args)
-      out_fields = fields(options[:output])
-
+    #    each fields(input_fields), :function => Java::CascadingOperation::Identity.new
+    #    each 'field1', 'field2', :function => Java::CascadingOperation::Identity.new
+    def each(*args_with_options)
+      options, in_fields = args_with_options.extract_options!, fields(args_with_options)
+      out_fields = fields(options[:output]) # Default Fields.RESULTS from c.o.Each
       operation = options[:filter] || options[:function]
+      raise 'each requires either :filter or :function' unless operation
       raise 'c.p.Each does not support applying an output selector to a c.o.Filter' if options[:filter] && options[:output]
 
       parameters = [tail_pipe, in_fields, operation, out_fields].compact
@@ -308,468 +423,156 @@ def each(*args)
       each
     end
 
-    # Restricts the current assembly to the specified fields.
-    # --
-    # Example:
-    #     project "field1", "field2"
-    def project(*args)
-      each fields(args), :function => Java::CascadingOperation::Identity.new
-    end
-
-    # Removes the specified fields from the current assembly.
-    # --
-    # Example:
-    #     discard "field1", "field2"
-    def discard(*args)
-      discard_fields = fields(args)
-      keep_fields = difference_fields(scope.values_fields, discard_fields)
-      project(*keep_fields.to_a)
-    end
-
-    # Renames fields according to the mapping provided.
-    # --
-    # Example:
-    #     rename "old_name" => "new_name"
-    def rename(name_map)
-      old_names = scope.values_fields.to_a
-      new_names = old_names.map{ |name| name_map[name] || name }
-      invalid = name_map.keys.sort - old_names
-      raise "invalid names: #{invalid.inspect}" unless invalid.empty?
-
-      each all_fields, :function => Java::CascadingOperation::Identity.new(fields(new_names))
-    end
-
-    def cast(type_map)
-      names = type_map.keys.sort
-      types = JAVA_TYPE_MAP.values_at(*type_map.values_at(*names))
-      fields = fields(names)
-      types = types.to_java(java.lang.Class)
-      each fields, :function => Java::CascadingOperation::Identity.new(fields, types)
-    end
-
-    def copy(*args)
-      options = args.extract_options!
-      from = args[0] || all_fields
-      into = args[1] || options[:into] || all_fields
-      each fields(from), :function => Java::CascadingOperation::Identity.new(fields(into)), :output => all_fields
-    end
-
-    # A pipe that does nothing.
-    def pass(*args)
-      each all_fields, :function => Java::CascadingOperation::Identity.new
-    end
+    include Operations
+    include IdentityOperations
+    include FilterOperations
+    include RegexOperations
+    include TextOperations
 
-    def assert(*args)
-      options = args.extract_options!
-      assertion = args[0]
+    # Builds an each assertion pipe given a c.o.a.Assertion and adds it to the
+    # current Assembly.
+    #
+    # The named options are:
+    # [level] The assertion level; defaults to strict.
+    def assert(assertion, options = {})
       assertion_level = options[:level] || Java::CascadingOperation::AssertionLevel::STRICT
 
       parameters = [tail_pipe, assertion_level, assertion]
       make_pipe(Java::CascadingPipe::Each, parameters)
     end
 
-    # Builds a debugging pipe.
-    #
-    # Without arguments, it generate a simple debug pipe, that prints all tuple to the standard
-    # output.
-    #
-    # The other named options are:
-    # * <tt>:print_fields</tt> a boolean. If is set to true, then it prints every 10 tuples.
-    #
-    def debug(*args)
-      options = args.extract_options!
-      print_fields = options[:print_fields] || true
-      parameters = [print_fields].compact
-      debug = Java::CascadingOperation::Debug.new(*parameters)
-      debug.print_tuple_every = options[:tuple_interval] || 1
-      debug.print_fields_every = options[:fields_interval] || 10
-      each(all_fields, :filter => debug)
-    end
-
-    # Builds a pipe that assert the size of the tuple is the size specified in parameter.
-    #
-    # The method accept an unique uname argument : a number indicating the size expected.
-    def assert_size_equals(*args)
-      options = args.extract_options!
-      assertion = Java::CascadingOperationAssertion::AssertSizeEquals.new(args[0])
+    # Builds a pipe that asserts the size of the tuple is the specified size.
+    def assert_size_equals(size, options = {})
+      assertion = Java::CascadingOperationAssertion::AssertSizeEquals.new(size)
       assert(assertion, options)
     end
 
-    # Builds a pipe that assert the none of the fields in the tuple are null.
-    def assert_not_null(*args)
-      options = args.extract_options!
+    # Builes a pipe that asserts none of the fiels in the tuple are null.
+    def assert_not_null(options = {})
       assertion = Java::CascadingOperationAssertion::AssertNotNull.new
       assert(assertion, options)
     end
 
-    # Builds a _parse_ pipe. This pipe will parse the fields specified in input (first unamed arguments),
-    # using a specified regex pattern.
-    #
-    # If provided, the unamed arguments must be the fields to be parsed. If not provided, then all incoming
-    # fields are used.
-    #
-    # The named options are:
-    # * <tt>:pattern</tt> a string or regex. Specifies the regular expression used for parsing the argument fields.
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def parse(*args)
-        options = args.extract_options!
-        fields = args || all_fields
-        pattern = options[:pattern]
-        output = options[:output] || all_fields
-        each(fields, :function => regex_parser(pattern, options), :output => output)
-    end
+    private
 
-    # Builds a pipe that splits a field into other fields, using a specified regular expression.
-    #
-    # The first unnamed argument is the field to be split.
-    # The second unnamed argument is an array of strings indicating the fields receiving the result of the split.
-    #
-    # The named options are:
-    # * <tt>:pattern</tt> a string or regex. Specifies the regular expression used for splitting the argument fields.
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def split(*args)
-      options = args.extract_options!
-      fields = options[:into] || args[1]
-      pattern = options[:pattern] || /[.,]*\s+/
-      output = options[:output] || all_fields
-      each(args[0], :function => regex_splitter(fields, :pattern => pattern), :output=>output)
-    end
-
-    # Builds a pipe that splits a field into new rows, using a specified regular expression.
-    #
-    # The first unnamed argument is the field to be split.
-    # The second unnamed argument is the field receiving the result of the split.
-    #
-    # The named options are:
-    # * <tt>:pattern</tt> a string or regex. Specifies the regular expression used for splitting the argument fields.
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def split_rows(*args)
-      options = args.extract_options!
-      fields = options[:into] || args[1]
-      pattern = options[:pattern] || /[.,]*\s+/
-      output = options[:output] || all_fields
-      each(args[0], :function => regex_split_generator(fields, :pattern => pattern), :output=>output)
-    end
-
-    # Builds a pipe that emits a new row for each regex group matched in a field, using a specified regular expression.
-    #
-    # The first unnamed argument is the field to be matched against.
-    # The second unnamed argument is the field receiving the result of the match.
-    #
-    # The named options are:
-    # * <tt>:pattern</tt> a string or regex. Specifies the regular expression used for matching the argument fields.
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def match_rows(*args)
-      options = args.extract_options!
-      fields = options[:into] || args[1]
-      pattern = options[:pattern] || /[\w]+/
-      output = options[:output] || all_fields
-      each(args[0], :function => regex_generator(fields, :pattern => pattern), :output=>output)
-    end
-
-    # Builds a pipe that parses the specified field as a date using hte provided format string.
-    # The unamed argument specifies the field to format.
-    #
-    # The named options are:
-    # * <tt>:into</tt> a string. It specifies the receiving field. By default, it will be named after
-    # the input argument.
-    # * <tt>:pattern</tt> a string. Specifies the date format.
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def parse_date(*args)
-      options = args.extract_options!
-      field = options[:into] || "#{args[0]}_parsed"
-      output = options[:output] || all_fields
-      pattern = options[:pattern] || "yyyy/MM/dd"
-
-      each args[0], :function => date_parser(field, pattern), :output => output
-    end
+    def make_pipe(type, parameters)
+      @tail_pipe = type.new(*parameters)
+      @outgoing_scopes[name] = Scope.outgoing_scope(tail_pipe, [scope])
 
-    # Builds a pipe that format a date using a specified format pattern.
-    #
-    # The unamed argument specifies the field to format.
-    #
-    # The named options are:
-    # * <tt>:into</tt> a string. It specifies the receiving field. By default, it will be named after
-    # the input argument.
-    # * <tt>:pattern</tt> a string. Specifies the date format.
-    # * <tt>:timezone</tt> a string.  Specifies the timezone (defaults to UTC).
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def format_date(*args)
-      options = args.extract_options!
-      field = options[:into] || "#{args[0]}_formatted"
-      pattern = options[:pattern] || "yyyy/MM/dd"
-      output = options[:output] || all_fields
-
-      each args[0], :function => date_formatter(field, pattern, options[:timezone]), :output => output
+      tail_pipe
     end
 
-    # Builds a pipe that perform a query/replace based on a regular expression.
-    #
-    # The first unamed argument specifies the input field.
-    #
-    # The named options are:
-    # * <tt>:pattern</tt> a string or regex. Specifies the pattern to look for in the input field. This non-optional argument
-    # can also be specified as a second _unamed_ argument.
-    # * <tt>:replacement</tt> a string. Specifies the replacement.
-    # * <tt>:output</tt> a string or array of strings. Specifies the outgoing fields (all fields will be output by default)
-    def replace(*args)
-      options = args.extract_options!
-
-      pattern = options[:pattern] || args[1]
-      replacement = options[:replacement] || args[2]
-      into = options[:into] || "#{args[0]}_replaced"
-      output = options[:output] || all_fields
-
-      each args[0], :function => regex_replace(into, pattern, replacement), :output => output
-    end
+    def populate_incoming_scopes(assembly_names, group_fields_args = {})
+      # NOTE: this overrides the existing incoming_scopes, which changes the
+      # way describe will function on this assembly
+      pipes, @incoming_scopes, group_fields = [], [], []
+      assembly_names.each do |assembly_name|
+        assembly = parent_flow.find_child(assembly_name)
+        raise "Could not find assembly '#{assembly_name}' from '#{name}'" unless assembly
 
-    # Builds a pipe that inserts values into the current tuple.
-    #
-    # The method takes a hash as parameter. This hash contains as keys the names of the fields to insert
-    # and as values, the values they must contain. For example:
-    #
-    #       insert {"who" => "Grégoire", "when" => Time.now.strftime("%Y-%m-%d") }
-    #
-    # will insert two new fields: a field _who_ containing the string "Grégoire", and a field _when_ containing
-    # the formatted current date.
-    # The methods outputs all fields.
-    # The named options are:
-    def insert(args)
-      args.keys.sort.each do |field_name|
-        value = args[field_name]
-
-        if value.kind_of?(ExprStub)
-          value.validate_scope(scope)
-          each all_fields, :function => expression_function(field_name, :expression => value.expression, :parameters => value.types), :output => all_fields
-        else
-          each all_fields, :function => insert_function([field_name], :values => [value]), :output => all_fields
-        end
+        pipes << assembly.tail_pipe
+        @incoming_scopes << assembly.scope
+        group_fields << fields(group_fields_args[assembly_name]) if group_fields_args[assembly_name]
       end
+      [pipes, group_fields]
     end
 
-    # Builds a pipe that filters the tuples based on an expression or a pattern (but not both !).
-    #
-    # The first unamed argument, if provided, is a filtering expression (using the Janino syntax).
-    #
-    # The named options are:
-    # * <tt>:pattern</tt> a string. Specifies a regular expression pattern used to filter the tuples. If this
-    # option is provided, then the filter is regular expression-based. This is incompatible with the _expression_ option.
-    # * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
-    # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
-    # expression-based. This is incompatible with the _pattern_ option.
-    # * <tt>:validate</tt> a boolean.  Passed into Cascading#expr to enable or disable
-    # expression validation.  Defaults to true.
-    # * <tt>:validate_with</tt> a hash.  Actual arguments used by Cascading#expr for
-    # expression validation.  Defaults to {}.
-    def filter(*args)
-      options = args.extract_options!
-      from = options.delete(:from) || all_fields
-      expression = options.delete(:expression) || args.shift
-      regex = options.delete(:pattern)
-      validate = options.has_key?(:validate) ? options.delete(:validate) : true
-      validate_with = options.has_key?(:validate_with) ? options.delete(:validate_with) : {}
-
-      if expression
-        stub = expr(expression, { :validate => validate, :validate_with => validate_with })
-        types, expression = stub.types, stub.expression
-
-        stub.validate_scope(scope)
-        each from, :filter => expression_filter(
-          :parameters => types,
-          :expression => expression
-        )
-      elsif regex
-        each from, :filter => regex_filter(regex, options)
-      end
-    end
+    def apply_aggregations(group, incoming_scopes, &block)
+      aggregations = Aggregations.new(self, group, incoming_scopes)
+      aggregations.instance_eval(&block) if block_given?
 
-    def filter_null(*args)
-      options = args.extract_options!
-      each(args, :filter => Java::CascadingOperationFilter::FilterNull.new)
-    end
-    alias reject_null filter_null
+      # Sorting of any type means that we cannot use the AggregateBy optimization
+      if aggregations.can_aggregate_by? && !group.is_sorted && !group.is_sort_reversed
+        grouping_fields = group.key_selectors.values.first
+        group.key_selectors.values.each do |key_fields|
+          raise "Grouping fields mismatch: #{grouping_fields} expected; #{key_fields} found from #{group.key_selectors}" unless key_fields == grouping_fields
+        end
 
-    def filter_not_null(*args)
-      options = args.extract_options!
-      each(args, :filter => Java::CascadingOperationFilter::FilterNotNull.new)
-    end
-    alias where_null filter_not_null
+        aggregate_by = sub_assembly(Java::CascadingPipeAssembly::AggregateBy.new(
+          name,
+          group.previous,
+          grouping_fields,
+          aggregations.aggregate_bys.to_java(Java::CascadingPipeAssembly::AggregateBy)
+        ), group.previous, incoming_scopes)
 
-    # Builds a pipe that rejects the tuples based on an expression.
-    #
-    # The first unamed argument, if provided, is a filtering expression (using the Janino syntax).
-    #
-    # The named options are:
-    # * <tt>:expression</tt> a string. Specifies a Janino expression used to filter the tuples. This option has the
-    # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
-    # expression-based.
-    # * <tt>:validate</tt> a boolean.  Passed into Cascading#expr to enable or disable
-    # expression validation.  Defaults to true.
-    # * <tt>:validate_with</tt> a hash.  Actual arguments used by Cascading#expr for
-    # expression validation.  Defaults to {}.
-    def reject(*args)
-      options = args.extract_options
-      raise "Regex not allowed" if options && options[:pattern]
-
-      filter(*args)
-    end
+        aggregate_by
+      else
+        aggregations.finalize if block_given?
+        @tail_pipe = aggregations.tail_pipe
+        @outgoing_scopes[name] = aggregations.scope
 
-    # Builds a pipe that includes just the tuples matching an expression.
-    #
-    # The first unamed argument, if provided, is a filtering expression (using the Janino syntax).
-    #
-    # The named options are:
-    # * <tt>:expression</tt> a string. Specifies a Janino expression used to select the tuples. This option has the
-    # same effect than providing it as first unamed argument. If this option is provided, then the filter is Janino
-    # expression-based.
-    # * <tt>:validate</tt> a boolean.  Passed into Cascading#expr to enable or disable
-    # expression validation.  Defaults to true.
-    # * <tt>:validate_with</tt> a hash.  Actual arguments used by Cascading#expr for
-    # expression validation.  Defaults to {}.
-    def where(*args)
-      options = args.extract_options
-      raise "Regex not allowed" if options && options[:pattern]
-
-      if options[:expression]
-        _, imports, expr = options[:expression].match(/^((?:\s*import.*;\s*)*)(.*)$/).to_a
-        options[:expression] = "#{imports}!(#{expr})"
-      elsif args[0]
-        _, imports, expr = args[0].match(/^((?:\s*import.*;\s*)*)(.*)$/).to_a
-        args[0] = "#{imports}!(#{expr})"
+        group
       end
-
-      filter(*args)
     end
 
-    # Builds a pipe that evaluates the specified Janino expression and insert it in a new field in the tuple.
-    #
-    # The named options are:
-    # * <tt>:from</tt> a string or array of strings. Specifies the input fields.
-    # * <tt>:express</tt> a string. The janino expression.
-    # * <tt>:into</tt> a string. Specified the name of the field to insert with the result of the evaluation.
-    # * <tt>:parameters</tt> a hash. Specifies the type mapping for the parameters. See Cascading::Operations.expression_function.
-    def eval_expression(*args)
-      options = args.extract_options!
-
-      into = options.delete(:into)
-      from = options.delete(:from) || all_fields
-      output = options.delete(:output) || all_fields
-      options[:expression] ||= args.shift
-      options[:parameters] ||= args.shift
-
-      each from, :function => expression_function(into, options), :output=>output
-    end
+    def prepare_join(assembly_names, options, &block)
+      pipes, _ = populate_incoming_scopes(assembly_names)
 
-    # Builds a pipe that returns distinct tuples based on the provided fields.
-    #
-    # The method accepts optional unamed argument specifying the fields to base the distinct on
-    # (all fields, by default).
-    def distinct(*args)
-      raise "Distinct is badly broken"
-      fields = args[0] || all_fields
-      group_by *fields
-      pass
-    end
-
-    def join_fields(*args)
-      options = args.extract_options!
-      output = options[:output] || all_fields
+      group_fields_args = options[:on]
+      raise 'join requires :on parameter' unless group_fields_args
 
-      each args, :function => field_joiner(options), :output => output
-    end
+      if group_fields_args.kind_of?(String)
+        group_fields_args = [group_fields_args]
+      end
 
-    # Ungroups, or unpivots, a tuple (see Cascading's UnGroup at http://docs.cascading.org/cascading/2.0/javadoc/cascading/operation/function/UnGroup.html).
-    #
-    # You must provide :key and you must provide only one of :value_selectors
-    # and :num_values.
-    #
-    # The named options are:
-    # * <tt>:key</tt> required array of field names to replicate on every
-    #   output row in an ungrouped group.
-    # * <tt>:value_selectors</tt> an array of field names to ungroup.  Each
-    #   field will be ungrouped into an output tuple along with the key fields
-    #   in the order provided.
-    # * <tt>:num_values</tt> an integer specifying the number of fields to
-    #   ungroup into each output tuple (excluding the key fields).  All input
-    #   fields will be ungrouped.
-    # * <tt>:input</tt> an array of field names that specifies the fields to
-    #   input to UnGroup.  Defaults to all_fields.
-    # * <tt>:into</tt> an array of field names.  Default set by UnGroup.
-    # * <tt>:output</tt> an array of field names that specifies the fields to
-    #   produce as output of UnGroup.  Defaults to all_fields.
-    def ungroup(*args)
-      options = args.extract_options!
-      input = options[:input] || all_fields
-      into = fields(options[:into])
-      output = options[:output] || all_fields
-      key = fields(options[:key])
-
-      raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
-      value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
-      num_values = options[:num_values] if options.has_key?(:num_values)
-
-      parameters = [into, key, value_selectors, num_values].compact
-      each input, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
-    end
+      group_fields = []
+      if group_fields_args.kind_of?(Array)
+        pipes.size.times do
+          group_fields << fields(group_fields_args)
+        end
+      elsif group_fields_args.kind_of?(Hash)
+        pipes, group_fields = populate_incoming_scopes(group_fields_args.keys.sort, group_fields_args)
+      else
+        raise "Unsupported data type for :on in join: '#{group_fields_args.class}'"
+      end
 
-    # Inserts one of two values into the dataflow based upon the result of the
-    # supplied filter on the input fields.  This is primarily useful for
-    # creating indicators from filters.
-    #
-    # Parameters:
-    # * <tt>input</tt> name of field to apply the filter.
-    # * <tt>filter</tt> Cascading Filter to apply.
-    # * <tt>keep_value</tt> Java value to produce when the filter would keep
-    #   the given input.
-    # * <tt>remove_value</tt> Java value to produce when the filter would
-    #   remove the given input.
-    #
-    # The named options are:
-    # * <tt>:into</tt> an output field name, defaulting to 'filter_value'.
-    # * <tt>:output</tt> an array of field names that specifies the fields to
-    #   retain in the output tuple.  Defaults to all_fields.
-    def set_value(input, filter, keep_value, remove_value, params = {})
-      into = fields(params[:into] || 'filter_value')
-      output = params[:output] || all_fields
-      each input, :function => Java::CascadingOperationFunction::SetValue.new(into, filter, keep_value, remove_value), :output => output
-    end
+      raise 'join requires non-empty :on parameter' if group_fields_args.empty?
+      group_fields = group_fields.to_java(Java::CascadingTuple::Fields)
+      incoming_fields = @incoming_scopes.map{ |s| s.values_fields }
+      declared_fields = fields(options[:declared_fields] || dedup_fields(*incoming_fields))
+      joiner = options[:joiner]
+      is_hash_join = options[:hash] || false
 
-    # Efficient way of inserting a null indicator for any field, even one that
-    # cannot be coerced to a string.  This is accomplished using Cascading's
-    # FilterNull and SetValue operators rather than Janino.  1 is produced if
-    # the field is null and 0 otherwise.
-    #
-    # Parameters:
-    # * <tt>input</tt> name of field to check for null.
-    #
-    # The named options are:
-    # * <tt>:into</tt> an output field name, defaulting to 'is_null'.
-    # * <tt>:output</tt> an array of field names that specifies the fields to
-    #   retain in the output tuple.  Defaults to all_fields.
-    def null_indicator(input, params = {})
-      into = fields(params[:into] || 'is_null')
-      output = params[:output] || all_fields
-      set_value input, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, :into => into, :output => output
-    end
+      case joiner
+      when :inner, 'inner', nil
+        joiner = Java::CascadingPipeJoiner::InnerJoin.new
+      when :left,  'left'
+        joiner = Java::CascadingPipeJoiner::LeftJoin.new
+      when :right, 'right'
+        joiner = Java::CascadingPipeJoiner::RightJoin.new
+      when :outer, 'outer'
+        joiner = Java::CascadingPipeJoiner::OuterJoin.new
+      when Array
+        joiner = joiner.map do |t|
+          case t
+          when true,  1, :inner then true
+          when false, 0, :outer then false
+          else fail "invalid mixed joiner entry: #{t}"
+          end
+        end
+        joiner = Java::CascadingPipeJoiner::MixedJoin.new(joiner.to_java(:boolean))
+      end
 
-    # Given a field and a regex, returns an indicator that is 1 if the string
-    # contains at least 1 match and 0 otherwise.
-    #
-    # Parameters:
-    # * <tt>input</tt> field name or names that specifies the fields over which
-    #   to perform the match.
-    # * <tt>pattern</tt> regex to apply to the input.
-    #
-    # The named options are:
-    # * <tt>:into</tt> an output field name, defaulting to 'regex_contains'.
-    # * <tt>:output</tt> an array of field names that specifies the fields to
-    #   retain in the output tuple.  Defaults to all_fields.
-    def regex_contains(input, pattern, params = {})
-      input = fields(input)
-      pattern = pattern.to_s # Supports JRuby regexes
-      into = fields(params[:into] || 'regex_contains')
-      output = params[:output] || all_fields
-      set_value input, Java::CascadingOperationRegex::RegexFilter.new(pattern), 1.to_java, 0.to_java, :into => into, :output => output
+      if is_hash_join
+        parameters = [
+          pipes.to_java(Java::CascadingPipe::Pipe),
+          group_fields,
+          declared_fields,
+          joiner
+        ]
+        group_assembly = Java::CascadingPipe::HashJoin.new(*parameters)
+      else
+        result_group_fields = dedup_fields(*group_fields)
+        parameters = [
+          pipes.to_java(Java::CascadingPipe::Pipe),
+          group_fields,
+          declared_fields,
+          result_group_fields,
+          joiner
+        ]
+        group_assembly = Java::CascadingPipe::CoGroup.new(*parameters)
+      end
+      apply_aggregations(group_assembly, @incoming_scopes, &block)
     end
   end
 end
diff --git a/lib/cascading/base.rb b/lib/cascading/base.rb
index 80c30aa..bedb5c6 100644
--- a/lib/cascading/base.rb
+++ b/lib/cascading/base.rb
@@ -1,7 +1,22 @@
 module Cascading
+  # A Node is a Cascade, Flow, or Assembly, all of which are composite
+  # structures that describe the hierarchical structure of your job.  A Cascade
+  # may contain many Flows and a Flow and Assembly may contain many Assemblies
+  # (branches in the case of the Assembly).  Nodes are named, contain parent
+  # and child pointers, and keep track of their children both by name and by
+  # insertion order.
+  #
+  # Nodes must be uniquely named within the scope of their parent so that they
+  # unambiguously looked up for connecting pipes within a flow.  However, we
+  # only ensure that children are uniquely named upon insertion; full
+  # uniqueness isn't required until Node#find_child is called (this allows for
+  # name reuse in a few limited circumstances that was important when migrating
+  # the Etsy workload to enforce these constraints).
   class Node
     attr_accessor :name, :parent, :children, :child_names, :last_child
 
+    # A Node requires a name and a parent when it is constructed.  Children are
+    # added later with Node#add_child.
     def initialize(name, parent)
       @name = name
       @parent = parent
@@ -23,10 +38,15 @@ def add_child(node)
       node
     end
 
+    # The qualified name of a node is formed from the name of all nodes in the
+    # path from the root to that node.
     def qualified_name
       parent ? "#{parent.qualified_name}.#{name}" : name
     end
 
+    # Produces a textual description of this Node.  This method is overridden
+    # by all classes inheriting Node, so it serves mainly as a template for
+    # describing a node with children.
     def describe(offset = '')
       "#{offset}#{name}:node\n#{child_names.map{ |child| children[child].describe("#{offset}  ") }.join("\n")}"
     end
@@ -44,6 +64,8 @@ def find_child(name)
       all_children_with_name.first
     end
 
+    # Returns the root Node, the topmost parent of the hierarchy (typically a
+    # Cascade or Flow).
     def root
       return self unless parent
       parent.root
diff --git a/lib/cascading/cascade.rb b/lib/cascading/cascade.rb
index 3a9de80..1fa3138 100644
--- a/lib/cascading/cascade.rb
+++ b/lib/cascading/cascade.rb
@@ -2,6 +2,13 @@
 require 'yaml'
 
 module Cascading
+  # A Cascade wraps a c.c.Cascade.  A Cascade is composed of Flows, which are
+  # constructed using the Cascade#flow method within the block passed to the
+  # Cascading::cascade constructor.  Many flows may be nested within a Cascade.
+  #
+  # Note that you are not required to use a Cascade to wrap your job.  Instead,
+  # you could start with a top-level Flow, which you might prefer if you have
+  # no need of a c.c.Cascade's make-like semantics wrt sinks.
   class Cascade < Cascading::Node
     extend Registerable
 
@@ -10,46 +17,72 @@ class Cascade < Cascading::Node
     # Do not use this constructor directly; instead, use Cascading::cascade to
     # build cascades.
     #
-    # Builds a cascade given the specified name.  Optionally accepts
-    # :properties which will be used as the default properties for all child
-    # flows.  Properties must be a Ruby Hash with string keys and values and
-    # will be copied before being passed into each flow in the cascade.  See
-    # Cascading::Flow#initialize for details on how flows handle properties.
-    # Optionally accepts a :mode which will be used as the default mode for all
-    # child flows.  See Cascading::Mode.parse for details.
-    def initialize(name, params = {})
-      @properties = params[:properties] || {}
-      @mode = params[:mode]
+    # Builds a Cascade given a name.
+    #
+    # The named options are:
+    # [properties] Properties hash which will be used as the default properties
+    #              for all child flows.  Properties must be a Ruby Hash with
+    #              string keys and values and will be copied before being
+    #              passed into each flow in the cascade.  See Flow#initialize
+    #              for details on how flows handle properties.
+    # [mode] Mode which will be used as the default mode for all child flows.
+    #        See Mode.parse for details.
+    def initialize(name, options = {})
+      @properties = options[:properties] || {}
+      @mode = options[:mode]
       super(name, nil) # A Cascade cannot have a parent
       self.class.add(name, self)
     end
 
-    # Builds a child flow given a name and block.  Optionally accepts
-    # :properties which will override the default properties stroed in this
-    # cascade.  Optionally accepts a :mode, which will override the default
-    # mode stored in this cascade.
-    def flow(name, params = {}, &block)
+    # Builds a child Flow in this Cascade given a name and block.
+    #
+    # The named options are:
+    # [properties] Properties hash which will override the default properties
+    #              stored in this cascade.
+    # [mode] Mode which will override the default mode stored in this cascade.
+    #
+    # Example:
+    #     cascade 'wordcount', :mode => :local do
+    #       flow 'first_step' do
+    #         ...
+    #       end
+    #
+    #       flow 'second_step' do
+    #         ...
+    #       end
+    #     end
+    def flow(name, options = {}, &block)
       raise "Could not build flow '#{name}'; block required" unless block_given?
 
-      params[:properties] ||= properties.dup
-      params[:mode] ||= mode
+      options[:properties] ||= properties.dup
+      options[:mode] ||= mode
 
-      flow = Flow.new(name, self, params)
+      flow = Flow.new(name, self, options)
       add_child(flow)
       flow.instance_eval(&block)
       flow
     end
 
+    # Produces a textual description of this Cascade.  The description details
+    # the structure of the Cascade, the sources and sinks of each Flow, and the
+    # input and output fields of each Assembly.  The offset parameter allows
+    # for this describe to be nested within a calling context, which lets us
+    # indent the structural hierarchy of a job.
     def describe(offset = '')
       "#{offset}#{name}:cascade\n#{child_names.map{ |child| children[child].describe("#{offset}  ") }.join("\n")}"
     end
 
+    # Writes out the DOT file describing the structure of this Cascade.
+    #
+    # NOTE: will be at Job in later version and also present on Flow
     def draw(dir)
       @children.each do |name, flow|
         flow.connect.writeDOT("#{dir}/#{name}.dot")
       end
     end
 
+    # Builds a map, keyed by flow name, of the sink metadata for each child
+    # flow.  Currently, this contains only the field names of each sink.
     def sink_metadata
       @children.inject({}) do |sink_fields, (name, flow)|
         sink_fields[name] = flow.sink_metadata
@@ -57,12 +90,16 @@ def sink_metadata
       end
     end
 
+    # Writes the mapping produced by Cascade#sink_metadata to a file at the
+    # given path in YAML.
     def write_sink_metadata(file_name)
       File.open(file_name, 'w') do |file|
         YAML.dump(sink_metadata, file)
       end
     end
 
+    # Connects this Cascade, producing a c.c.Cascade, which is then completed,
+    # executing it.  Child flows are connected, so no parameters are required.
     def complete
       begin
         Java::CascadingCascade::CascadeConnector.new.connect(name, make_flows(@children)).complete
diff --git a/lib/cascading/cascading.rb b/lib/cascading/cascading.rb
index 763d313..59846c4 100644
--- a/lib/cascading/cascading.rb
+++ b/lib/cascading/cascading.rb
@@ -1,6 +1,33 @@
+require 'cascading/cascade'
+require 'cascading/flow'
 require 'cascading/expr_stub'
 
+# The Cascading module contains all of the cascading.jruby DSL.  Inserting the
+# following into your script:
+#     require 'rubygems'
+#     require 'cascading'
+# includes this module at the top level, making all of its features available.
+#
+# To build a dataflow like the one in the README.md or
+# {samples}[http://github.com/mrwalker/cascading.jruby/tree/master/samples],
+# start by looking at Cascade or Flow.  These are the
+# highest level structures you'll use to put together your job.
+#
+# Within a flow, you'll connect sources to sinks by way of Assembly, which
+# refers to "pipe assemblies" from Cascading.  Within an Assembly, you'll use
+# functions and filters (see Operations, IdentityOperations, RegexOperations,
+# FilterOperations, and TextOperations) as well as Assembly#group_by,
+# Assembly#union, and Assembly#join.  You can provide those last pipes with a
+# block that can select operations from Aggregations.
+#
+# Finally, you'll want to address the execution of your job, whether it be
+# locally testing or running remotely on a Hadoop cluster.  See the Mode class
+# for the available modes, and parameterize your script such that it can operate
+# in Cascading local mode locally and in Hadoop mode when run in a jar produced
+# with {Jading}[http://github.com/mrwalker/jading].
 module Cascading
+  # Mapping that defines a convenient syntax for specifying Java classes, used
+  # in Janino expressions and elsewhere.
   JAVA_TYPE_MAP = {
     :int => java.lang.Integer.java_class, :long => java.lang.Long.java_class,
     :bool => java.lang.Boolean.java_class, :double => java.lang.Double.java_class,
@@ -24,44 +51,84 @@ module Cascading
   # directly building their own cascades and flows so that jading can send them
   # default properties.
 
-  # Builds a top-level cascade given a name and a block.  Optionally accepts a
-  # :mode, as explained in Cascading::Cascade#initialize.
-  def cascade(name, params = {}, &block)
+  # Builds a top-level Cascade given a name and a block.
+  #
+  # The named options are:
+  # [properties] See Cascade#initialize
+  # [mode] See Cascade#initialize
+  #
+  # Example:
+  #     cascade 'wordcount', :mode => :local do
+  #       flow 'first_step' do
+  #         ...
+  #       end
+  #
+  #       flow 'second_step' do
+  #         ...
+  #       end
+  #     end
+  def cascade(name, options = {}, &block)
     raise "Could not build cascade '#{name}'; block required" unless block_given?
-    raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if params[:properties]
+    raise 'Cascading::cascade does not accept the :properties param only the global $jobconf_properties' if options[:properties]
 
-    params[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
+    options[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
 
-    cascade = Cascade.new(name, params)
+    cascade = Cascade.new(name, options)
     cascade.instance_eval(&block)
     cascade
   end
 
-  # Builds a top-level flow given a name and block for applications built of
-  # flows with no cascades.  Optionally accepts a :mode, as explained in
-  # Cascading::Flow#initialize.
-  def flow(name, params = {}, &block)
+  # Builds a top-level Flow given a name and block for applications built of
+  # flows with no cascades.
+  #
+  # The named options are:
+  # [properties] See Flow#initialize
+  # [mode] See Flow#initialize
+  #
+  # Example:
+  #     flow 'wordcount', :mode => :local do
+  #       assembly 'first_step' do
+  #         ...
+  #       end
+  #
+  #       assembly 'second_step' do
+  #         ...
+  #       end
+  #     end
+  def flow(name, options = {}, &block)
     raise "Could not build flow '#{name}'; block required" unless block_given?
-    raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if params[:properties]
+    raise 'Cascading::flow does not accept the :properties param only the global $jobconf_properties' if options[:properties]
 
-    params[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
+    options[:properties] = $jobconf_properties.dup if defined?($jobconf_properties) && $jobconf_properties
 
-    flow = Flow.new(name, nil, params)
+    flow = Flow.new(name, nil, options)
     flow.instance_eval(&block)
     flow
   end
 
+  # Produces a textual description of all Cascades in the global registry.  The
+  # description details the structure of the Cascades, the sources and sinks of
+  # each Flow, and the input and output fields of each Assembly.
+  #
+  # NOTE: will be moved to Job in later version
   def describe
     Cascade.all.map{ |cascade| cascade.describe }.join("\n")
   end
   alias desc describe
 
   # See ExprStub.expr
-  def expr(expression, params = {})
-    ExprStub.expr(expression, params)
+  def expr(expression, options = {})
+    ExprStub.expr(expression, options)
   end
 
-  # Creates a cascading.tuple.Fields instance from a string or an array of strings.
+  # Utility method for creating Cascading c.t.Fields from a field name (string)
+  # or list of field names (array of strings).  If the input fields is already a
+  # c.t.Fields or nil, it is passed through.  This allows for flexible use of
+  # the method at multiple layers in the DSL.
+  #
+  # Example:
+  #     cascading_fields = fields(['first', 'second', 'third'])
+  #     # cascading_fields.to_a == ['first', 'second', 'third']
   def fields(fields)
     if fields.nil?
       return nil
@@ -76,27 +143,45 @@ def fields(fields)
     return Java::CascadingTuple::Fields.new([fields].flatten.map{ |f| f.kind_of?(Fixnum) ? java.lang.Integer.new(f) : f }.to_java(java.lang.Comparable))
   end
 
+  # Convenience method wrapping c.t.Fields::ALL
   def all_fields
     Java::CascadingTuple::Fields::ALL
   end
 
-  def union_fields(*fields)
-    fields(fields.inject([]){ |acc, arr| acc | arr.to_a })
-  end
-
-  def difference_fields(*fields)
-    fields(fields[1..-1].inject(fields.first.to_a){ |acc, arr| acc - arr.to_a })
+  # Convenience method wrapping c.t.Fields::VALUES
+  def last_grouping_fields
+    Java::CascadingTuple::Fields::VALUES
   end
 
-  def copy_fields(fields)
-    fields.select(all_fields)
+  # Computes fields formed by removing remove_fields from base_fields.  Operates
+  # only on named fields, not positional fields.
+  #
+  # Example:
+  #     base_fields = fields(['a', 'b', 'c'])
+  #     remove_fields = fields(['b'])
+  #     result_fields = difference_fields(base_fields, remove_fields)
+  #     # results_fields.to_a == ['a', 'c']
+  def difference_fields(base_fields, remove_fields)
+    fields(base_fields.to_a - remove_fields.to_a)
   end
 
+  # Combines fields deduplicating them with trailing underscores as necessary.
+  # This is used in joins to avoid requiring the caller to unique fields before
+  # they are joined.
   def dedup_fields(*fields)
     raise 'Can only be applied to declarators' unless fields.all?{ |f| f.is_declarator? }
     fields(dedup_field_names(*fields.map{ |f| f.to_a }))
   end
 
+  # Helper used by dedup_fields that operates on arrays of field names rather
+  # than fields objects.
+  #
+  # Example:
+  #     left_names = ['a', 'b']
+  #     mid_names = ['a', 'c']
+  #     right_names = ['a', 'd']
+  #     deduped_names = dedup_field_names(left_names, mid_names, right_names)
+  #     # deduped_names == ['a', 'b', 'a_', 'c', 'a__', 'd']
   def dedup_field_names(*names)
     names.inject([]) do |acc, arr|
       acc + arr.map{ |e| search_field_name(acc, e) }
@@ -106,30 +191,22 @@ def dedup_field_names(*names)
   def search_field_name(names, candidate)
     names.include?(candidate) ? search_field_name(names, "#{candidate}_") : candidate
   end
-
-  def last_grouping_fields
-    Java::CascadingTuple::Fields::VALUES
-  end
-
-  def results_fields
-    Java::CascadingTuple::Fields::RESULTS
-  end
+  private :search_field_name
 
   # Creates a TextLine scheme (can be used in both Cascading local and hadoop
-  # modes).  Positional args are used if <tt>:source_fields</tt> is not
-  # provided.
+  # modes).  Positional args are used if :source_fields is not provided.
   #
   # The named options are:
-  # * <tt>:source_fields</tt> a string or array of strings.  Specifies the
-  #   fields to be read from a source with this scheme.  Defaults to ['offset', 'line'].
-  # * <tt>:sink_fields</tt> a string or array of strings. Specifies the fields
-  #   to be written to a sink with this scheme.  Defaults to all_fields.
-  # * <tt>:compression</tt> a symbol, either <tt>:enable</tt> or
-  #   <tt>:disable</tt>, that governs the TextLine scheme's compression.  Defaults
-  #   to the default TextLine compression (only applies to c.s.h.TextLine).
-  def text_line_scheme(*args)
-    options = args.extract_options!
-    source_fields = fields(options[:source_fields] || (args.empty? ? ['offset', 'line'] : args))
+  # [source_fields] Fields to be read from a source with this scheme.  Defaults
+  #                 to ['offset', 'line'].
+  # [sink_fields] Fields to be written to a sink with this scheme.  Defaults to
+  #               all_fields.
+  # [compression] A symbol, either :enable or :disable, that
+  #               governs the TextLine scheme's compression.  Defaults to the
+  #               default TextLine compression (only applies to c.s.h.TextLine).
+  def text_line_scheme(*args_with_options)
+    options, source_fields = args_with_options.extract_options!, args_with_options
+    source_fields = fields(options[:source_fields] || (source_fields.empty? ? ['offset', 'line'] : source_fields))
     sink_fields = fields(options[:sink_fields]) || all_fields
     sink_compression = case options[:compression]
       when :enable  then Java::CascadingSchemeHadoop::TextLine::Compress::ENABLE
@@ -153,17 +230,30 @@ def sequence_file_scheme(*fields)
     }
   end
 
+  # Convenience access to MultiTap.multi_source_tap.  This constructor is more
+  # "DSL-like" because it allows you to pass taps directly as actual args rather
+  # than in an array:
+  #     multi_source_tap tap1, tap2, tap3, ..., tapn
+  #
+  # See MultiTap.multi_source_tap for more details.
   def multi_source_tap(*taps)
     MultiTap.multi_source_tap(taps)
   end
 
+  # Convenience access to MultiTap.multi_sink_tap.  This constructor is more
+  # "DSL-like" because it allows you to pass taps directly as actual args rather
+  # than in an array:
+  #     multi_sink_tap tap1, tap2, tap3, ..., tapn
+  #
+  # See MultiTap.multi_sink_tap for more details.
   def multi_sink_tap(*taps)
     MultiTap.multi_sink_tap(taps)
   end
 
-  # Creates a Cascading::Tap given a path and optional :scheme and :sink_mode.
-  def tap(path, params = {})
-    Tap.new(path, params)
+  # Convenience constructor for a Tap, that accepts the same options as that
+  # class' constructor.  See Tap for more details.
+  def tap(path, options = {})
+    Tap.new(path, options)
   end
 
   # Constructs properties to be passed to Flow#complete or Cascade#complete
diff --git a/lib/cascading/expr_stub.rb b/lib/cascading/expr_stub.rb
index 014f70f..d1f96ae 100644
--- a/lib/cascading/expr_stub.rb
+++ b/lib/cascading/expr_stub.rb
@@ -3,15 +3,15 @@ class ExprStub
     attr_accessor :expression, :types, :input_expression
 
     # ExprStub requires a Janino expression decorated with field types.  For
-    # example: '"Found: " + (x:int + y:int) + " " + z:string'.  Type names are
-    # defined in Cascading::JAVA_TYPE_MAP.
+    # example:
+    #     expr('"Found: " + (x:int + y:int) + " " + z:string')
+    # Type names are defined in Cascading::JAVA_TYPE_MAP.
     def initialize(expression)
       @input_expression = expression
       @expression = expression.dup
       @types = {}
 
       # Simple regexp based parser for types
-
       JAVA_TYPE_MAP.each do |sym, klass|
         @expression.gsub!(/[A-Za-z0-9_]+:#{sym.to_s}/) do |match|
           name = match.split(/:/).first.gsub(/\s+/, "")
@@ -21,21 +21,38 @@ def initialize(expression)
       end
     end
 
+    # Extract Java names and types from @types hash.  Cascading constructors
+    # often require two separate Java Arrays in this fashion.
+    def names_and_types
+      names, types = split_hash(@types)
+      [names.to_java(java.lang.String), types.to_java(java.lang.Class)]
+    end
+
+    # Prints the original input expression.
     def to_s
       @input_expression
     end
 
     # Convenience constructor for an ExprStub that optionally performs
     # validation.  Takes a string to use as a Janino expression and an optional
-    # params hash.  By default, the param :validate is set to true (performs
-    # expression validation using default actual argument values) and the param
-    # :validate_with is set to {} (which doesn't override any of the default
-    # actual argument values used for validation).
-    def self.expr(expression, params = {})
-      params = { :validate => true, :validate_with => {} }.merge(params)
+    # options hash.
+    #
+    # The named options are:
+    # [validate] A boolean indicating whether expression validation using
+    #            default actual argument values should be performed.  Defaults
+    #            to true.
+    # [validate_with] A hash mapping field names (or symbols) to the value that
+    #                 should be used for validation.  Strings default to nil,
+    #                 so if you have previously filtered nulls you might use a
+    #                 marker value like 'nulls_filtered'.  Defaults to {}.
+    #
+    # Example:
+    #     insert 'x_eq_y' => expr('x:string.equals(y:string)', :validate_with => { :x => 'nulls_filtered' })
+    def self.expr(expression, options = {})
+      options = { :validate => true, :validate_with => {} }.merge(options)
       expr_stub = expression.kind_of?(ExprStub) ? expression : ExprStub.new(expression).compile
-      expr_stub.validate(params[:validate_with]) if params[:validate]
-      puts "Expression validation is disabled for '#{expression}'" unless params[:validate]
+      expr_stub.validate(options[:validate_with]) if options[:validate]
+      puts "Expression validation is disabled for '#{expression}'" unless options[:validate]
       expr_stub
     end
 
@@ -68,6 +85,9 @@ def validate(actual_args = {})
       self.eval(test_values.merge(actual_args))
     end
 
+    # Given a scope, validates that the fields required by this ExprStub are
+    # available in the values fields of the scope.  Returns those values fields
+    # which are unused in the expression.
     def validate_scope(scope)
       validate_fields(scope.values_fields.to_a)
     end
@@ -113,12 +133,6 @@ def evaluator
       end
     end
 
-    # Extract Java names and types from @types hash
-    def names_and_types
-      names, types = split_hash(@types)
-      [names.to_java(java.lang.String), types.to_java(java.lang.Class)]
-    end
-
     # Makes best effort to convert Ruby numbers into the Java numeric type
     # exepcted by a Janino expression. However, if the conversion fails, it
     # returns the original value so that the exception thrown will be from
diff --git a/lib/cascading/ext/array.rb b/lib/cascading/ext/array.rb
index 9f7fd56..c8def8e 100644
--- a/lib/cascading/ext/array.rb
+++ b/lib/cascading/ext/array.rb
@@ -1,8 +1,25 @@
+# Extensions to Arrays in support of variable length lists of field names.  This
+# is not pretty, but supports DSL features like:
+#     group_by 'field1', 'field2', :sort_by => 'field3' do
+#       ...
+#     end
+#
+# The most obvious limitation of the approach is that function definitions of
+# the form f(*args_with_options) are not self-documenting.  To compensate for
+# this, documentation of all arguments and optional parameters must be provided
+# on the DSL method.
 class Array
+  # Use this extension to extract the optional parameters from a
+  # *args_with_options argument.
+  # So if you have a function:
+  #     def f(*args_with_options)
+  # You can destructively process the args_with_options as follows:
+  #     options, just_args = args_with_options.extract_options!, args_with_options
   def extract_options!
      last.is_a?(::Hash) ? pop : {}
   end
   
+  # Non-destructive form of Array#extract_options!
   def extract_options
      last.is_a?(::Hash) ? last : {}
   end
diff --git a/lib/cascading/filter_operations.rb b/lib/cascading/filter_operations.rb
new file mode 100644
index 0000000..8ae26d8
--- /dev/null
+++ b/lib/cascading/filter_operations.rb
@@ -0,0 +1,101 @@
+module Cascading
+  # Module of filtering operations.  Unlike some of the other functional
+  # operations modules, this one does not just wrap operations defined by
+  # Cascading in cascading.operation.filter.  Instead, it provides some useful
+  # high-level DSL pipes which map many Cascading operations into a smaller
+  # number of DSL statements.
+  #
+  # Still, some are direct wrappers:
+  # filter\_null:: {FilterNull}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/filter/FilterNull.html]
+  # filter\_not\_null:: {FilterNotNull}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/filter/FilterNotNull.html]
+  module FilterOperations
+    # Filter the current assembly based on an expression or regex, but not both.
+    #
+    # The named options are:
+    # [expression] A Janino expression used to filter.  Has access to all :input
+    #              fields.
+    # [validate] Boolean passed to Cascading#expr to enable or disable
+    #            expression validation.  Defaults to true.
+    # [validate_with] Hash mapping field names to actual arguments used by
+    #                 Cascading#expr for expression validation.  Defaults to {}.
+    # [regex] A regular expression used to filter.
+    # [remove_match] Boolean indicating if regex matches should be removed or
+    #                kept.  Defaults to false, which is a bit counterintuitive.
+    # [match_each_element] Boolean indicating if regex should match entire
+    #                      incoming tuple (joined with tabs) or each field
+    #                      individually.  Defaults to false.
+    #
+    # Example:
+    #     filter :input => 'field1', :regex => /\t/, :remove_match => true
+    #     filter :expression => 'field1:long > 0 && "".equals(field2:string)'
+    def filter(options = {})
+      input_fields = options[:input] || all_fields
+      expression = options[:expression]
+      regex = options[:regex]
+
+      if expression
+        validate = options.has_key?(:validate) ? options[:validate] : true
+        validate_with = options[:validate_with] || {}
+
+        stub = expr(expression, { :validate => validate, :validate_with => validate_with })
+        stub.validate_scope(scope)
+
+        names, types = stub.names_and_types
+        each input_fields, :filter => Java::CascadingOperationExpression::ExpressionFilter.new(
+            stub.expression,
+            names,
+            types
+          )
+      elsif regex
+        parameters = [regex.to_s, options[:remove_match], options[:match_each_element]].compact
+        each input_fields, :filter => Java::CascadingOperationRegex::RegexFilter.new(*parameters)
+      else
+        raise 'filter requires one of :expression or :regex'
+      end
+    end
+
+    # Rejects tuples from the current assembly based on a Janino expression.
+    # This is just a wrapper for FilterOperations.filter.
+    #
+    # Example:
+    #     reject 'field1:long > 0 && "".equals(field2:string)'
+    def reject(expression, options = {})
+      options[:expression] = expression
+      filter(options)
+    end
+
+    # Keeps tuples from the current assembly based on a Janino expression.  This
+    # is a wrapper for FilterOperations.filter.
+    #
+    # Note that this is accomplished by inverting the given expression, and best
+    # attempt is made to support import statements prior to the expression.  If
+    # this support should break, simply negate your expression and use
+    # FilterOperations.reject.
+    #
+    # Example:
+    #     where 'field1:long > 0 && "".equals(field2:string)'
+    def where(expression, options = {})
+      _, imports, expr = expression.match(/^((?:\s*import.*;\s*)*)(.*)$/).to_a
+      options[:expression] = "#{imports}!(#{expr})"
+      filter(options)
+    end
+
+    # Rejects tuples from the current assembly if any input field is null.
+    #
+    # Example:
+    #     filter_null 'field1', 'field2'
+    def filter_null(*input_fields)
+      each(input_fields, :filter => Java::CascadingOperationFilter::FilterNull.new)
+    end
+    alias reject_null filter_null
+
+    # Rejects tuples from the current assembly if any input field is not null.
+    #
+    # Example:
+    #     filter_not_null 'field1', 'field2'
+    def filter_not_null(*input_fields)
+      each(input_fields, :filter => Java::CascadingOperationFilter::FilterNotNull.new)
+    end
+    alias where_null filter_not_null
+  end
+end
diff --git a/lib/cascading/flow.rb b/lib/cascading/flow.rb
index 631cc96..8254264 100644
--- a/lib/cascading/flow.rb
+++ b/lib/cascading/flow.rb
@@ -1,6 +1,10 @@
 require 'cascading/assembly'
 
 module Cascading
+  # A Flow wraps a c.f.Flow.  A Flow is composed of Assemblies, which are
+  # constructed using the Flow#assembly method within the block passed to the
+  # Cascading::flow or Cascade#flow constructor.  Many Assemblies may be nested
+  # within a Flow.
   class Flow < Cascading::Node
     extend Registerable
 
@@ -10,23 +14,46 @@ class Flow < Cascading::Node
     # Do not use this constructor directly.  Instead, use Cascading::flow to
     # build top-level flows and Cascade#flow to build flows within a Cascade.
     #
-    # Builds a flow given a name and a parent node (a cascade or nil).
-    # Optionally accepts :properties which allows external configuration of
-    # this flow.  The flow will side-effect the properties during composition,
-    # then pass the modified properties along to the FlowConnector for
-    # execution. See Cascading::Cascade#initialize for details on how
-    # properties are propagated through cascades.  Optionally accepts a :mode
-    # which will determine the execution mode of this flow.  See
-    # Cascading::Mode.parse for details.
-    def initialize(name, parent, params = {})
+    # Builds a Flow given a name and a parent node (a Cascade or nil).
+    #
+    # The named options are:
+    # [properties] Properties hash which allows external configuration of this
+    #              flow.  The flow will side-effect the properties during
+    #              composition, then pass the modified properties along to the
+    #              FlowConnector for execution.  See Cascade#initialize for
+    #              details on how properties are propagated through cascades.
+    # [mode] Mode which will determine the execution mode of this flow.  See
+    #        Mode.parse for details.
+    def initialize(name, parent, options = {})
       @sources, @sinks, @incoming_scopes, @outgoing_scopes, @listeners = {}, {}, {}, {}, []
-      @properties = params[:properties] || {}
-      @mode = Mode.parse(params[:mode])
+      @properties = options[:properties] || {}
+      @mode = Mode.parse(options[:mode])
       @flow_scope = Scope.flow_scope(name)
       super(name, parent)
       self.class.add(name, self)
     end
 
+    # Builds a child Assembly in this Flow given a name and block.
+    #
+    # An assembly's name is quite important as it will determine:
+    # * The sources from which it will read, if any
+    # * The name to be used in joins or unions downstream
+    # * The name to be used to sink the output of the assembly downstream
+    #
+    # Many assemblies may be built within a flow.  The Assembly#branch method
+    # is used for creating nested assemblies and produces objects of the same
+    # type as this constructor.
+    #
+    # Example:
+    #     flow 'wordcount', :mode => :local do
+    #       assembly 'first_step' do
+    #         ...
+    #       end
+    #
+    #       assembly 'second_step' do
+    #         ...
+    #       end
+    #     end
     def assembly(name, &block)
       raise "Could not build assembly '#{name}'; block required" unless block_given?
       assembly = Assembly.new(name, self, @outgoing_scopes)
@@ -49,6 +76,11 @@ def sink(name, tap)
       sinks[name] = tap
     end
 
+    # Produces a textual description of this Flow.  The description details the
+    # structure of the Flow, its sources and sinks, and the input and output
+    # fields of each Assembly.  The offset parameter allows for this describe
+    # to be nested within a calling context, which lets us indent the
+    # structural hierarchy of a job.
     def describe(offset = '')
       description =  "#{offset}#{name}:flow\n"
       description += "#{sources.keys.map{ |source| "#{offset}  #{source}:source :: #{incoming_scopes[source].values_fields.to_a.inspect}" }.join("\n")}\n"
@@ -57,18 +89,28 @@ def describe(offset = '')
       description
     end
 
+    # Accesses the outgoing scope of this Flow at the point at which it is
+    # called by default, or for the child specified by the given name, if
+    # specified.  This is useful for grabbing the values_fields at any point in
+    # the construction of the Flow.  See Scope for details.
     def scope(name = nil)
       raise 'Must specify name if no children have been defined yet' unless name || last_child
       name ||= last_child.name
       @outgoing_scopes[name]
     end
 
+    # Prints information about the scope of this Flow at the point at which it
+    # is called by default, or for the child specified by the given name, if
+    # specified.  This allows you to trace the propagation of field names
+    # through your job and is handy for debugging.  See Scope for details.
     def debug_scope(name = nil)
       scope = scope(name)
       name ||= last_child.name
       puts "Scope for '#{name}':\n  #{scope}"
     end
 
+    # Builds a map, keyed by sink name, of the sink metadata for each sink.
+    # Currently, this contains only the field names of each sink.
     def sink_metadata
       @sinks.keys.inject({}) do |sink_metadata, sink_name|
         raise "Cannot sink undefined assembly '#{sink_name}'" unless @outgoing_scopes[sink_name]
@@ -79,7 +121,16 @@ def sink_metadata
       end
     end
 
-    # TODO: support all codecs, support list of codecs
+    # Property modifier that sets the codec and type of the compression for all
+    # sinks in this flow.  Currently only supports o.a.h.i.c.DefaultCodec and
+    # o.a.h.i.c.GzipCodec, and the the NONE, RECORD, or BLOCK compressions
+    # types defined in o.a.h.i.SequenceFile.
+    #
+    # codec may be symbols like :default or :gzip and type may be symbols like
+    # :none, :record, or :block.
+    #
+    # Example:
+    #     compress_output :default, :block
     def compress_output(codec, type)
       properties['mapred.output.compress'] = 'true'
       properties['mapred.output.compression.codec'] = case codec
@@ -95,22 +146,28 @@ def compress_output(codec, type)
         end
     end
 
+    # Set the cascading.spill.list.threshold property in this flow's
+    # properties.  See c.t.c.SpillableProps for details.
     def set_spill_threshold(threshold)
-      properties['cascading.cogroup.spill.threshold'] = threshold.to_s
+      properties['cascading.spill.list.threshold'] = threshold.to_s
     end
 
+    # Adds the given path to the mapred.cache.files list property.
     def add_file_to_distributed_cache(file)
       add_to_distributed_cache(file, "mapred.cache.files")
     end
 
+    # Adds the given path to the mapred.cache.archives list property.
     def add_archive_to_distributed_cache(file)
       add_to_distributed_cache(file, "mapred.cache.archives")
     end
 
+    # Appends a FlowListener to the list of listeners for this flow.
     def add_listener(listener)
       @listeners << listener
     end
 
+    # Handles locating a file cached from S3 on local disk.  TODO: remove
     def emr_local_path_for_distributed_cache_file(file)
       # NOTE this needs to be *appended* to the property mapred.local.dir
       if file =~ /^s3n?:\/\//
@@ -122,16 +179,9 @@ def emr_local_path_for_distributed_cache_file(file)
       end
     end
 
-    def add_to_distributed_cache(file, property)
-      v = properties[property]
-
-      if v
-        properties[property] = [v.split(/,/), file].flatten.join(",")
-      else
-        properties[property] = file
-      end
-    end
-
+    # Connects this Flow, producing a c.f.Flow without completing it (the Flow
+    # is not executed).  This method is used by Cascade to connect its child
+    # Flows.  To connect and complete a Flow, see Flow#complete.
     def connect
       puts "Connecting flow '#{name}' with properties:"
       properties.keys.sort.each do |key|
@@ -149,6 +199,9 @@ def connect
       mode.connect_flow(properties, name, sources, sinks, pipes)
     end
 
+    # Completes this Flow after connecting it.  This results in execution of
+    # the c.f.Flow built from this Flow.  Use this method when executing a
+    # top-level Flow.
     def complete
       begin
         flow = connect
@@ -161,6 +214,16 @@ def complete
 
     private
 
+    def add_to_distributed_cache(file, property)
+      v = properties[property]
+
+      if v
+        properties[property] = [v.split(/,/), file].flatten.join(",")
+      else
+        properties[property] = file
+      end
+    end
+
     def make_tap_parameter(taps, pipe_accessor)
       taps.inject({}) do |map, (name, tap)|
         assembly = find_child(name)
diff --git a/lib/cascading/identity_operations.rb b/lib/cascading/identity_operations.rb
new file mode 100644
index 0000000..8f2e7ea
--- /dev/null
+++ b/lib/cascading/identity_operations.rb
@@ -0,0 +1,82 @@
+module Cascading
+  # Module of pipe assemblies that wrap the Cascading Identity operation.  These
+  # are split out only to group similar functionality.
+  module IdentityOperations
+    # Restricts the current assembly to the specified fields in the order in
+    # which they are specified (can be used to reorder fields).
+    #
+    # Example:
+    #     project 'field1', 'field2'
+    def project(*input_fields)
+      each fields(input_fields), :function => Java::CascadingOperation::Identity.new
+    end
+
+    # Removes the specified fields from the current assembly.
+    #
+    # Example:
+    #     discard 'field1', 'field2'
+    def discard(*input_fields)
+      discard_fields = fields(input_fields)
+      keep_fields = difference_fields(scope.values_fields, discard_fields)
+      project(*keep_fields.to_a)
+    end
+
+    # Renames fields according to the mapping provided, preserving the original
+    # field order.  Throws an exception if non-existent fields are specified.
+    # 
+    # Example:
+    #     rename 'field1' => 'fieldA', 'field2' => 'fieldB'
+    #
+    # Produces: ['fieldA', 'fieldB'], assuming those were the only 2 input
+    # fields.
+    def rename(name_map)
+      original_fields = scope.values_fields.to_a
+      invalid = name_map.keys - original_fields
+      raise "Invalid field names in rename: #{invalid.inspect}" unless invalid.empty?
+
+      renamed_fields = original_fields.map{ |name| name_map[name] || name }
+
+      each original_fields, :function => Java::CascadingOperation::Identity.new(fields(renamed_fields))
+    end
+
+    # Coerces fields to the Java type selected from Cascading::JAVA_TYPE_MAP.
+    #
+    # Example:
+    #     cast 'field1' => :int, 'field2' => :double
+    def cast(type_map)
+      input_fields = type_map.keys.sort
+      types = JAVA_TYPE_MAP.values_at(*type_map.values_at(*input_fields))
+      input_fields = fields(input_fields)
+      types = types.to_java(java.lang.Class)
+      each input_fields, :function => Java::CascadingOperation::Identity.new(input_fields, types)
+    end
+
+    # A field copy (not a pipe copy).  Renames fields according to name_map,
+    # appending them to the fields in the assembly in the same order as the
+    # original fields from which they are copied.  Throws an exception if
+    # non-existent fields are specified.
+    #
+    # Example:
+    #     copy 'field1' => 'fieldA', 'field2' => 'fieldB'
+    #
+    # Produces: ['field1', 'field2', 'fieldA', 'fieldB'], assuming those were
+    # the only input fields.
+    def copy(name_map)
+      original_fields = scope.values_fields.to_a
+      invalid = name_map.keys - original_fields
+      raise "Invalid field names in copy: #{invalid.inspect}" unless invalid.empty?
+
+      # Original fields in name_map in their original order
+      input_fields = original_fields - (original_fields - name_map.keys)
+      into_fields = name_map.values_at(*input_fields)
+
+      each input_fields, :function => Java::CascadingOperation::Identity.new(fields(into_fields)), :output => all_fields
+    end
+
+    # A pipe copy (not a field copy).  Can be used within a branch to copy a
+    # pipe.
+    def pass
+      each all_fields, :function => Java::CascadingOperation::Identity.new
+    end
+  end
+end
diff --git a/lib/cascading/mode.rb b/lib/cascading/mode.rb
index 91898e0..dbbe8f7 100644
--- a/lib/cascading/mode.rb
+++ b/lib/cascading/mode.rb
@@ -1,21 +1,25 @@
 module Cascading
-  # A Cascading::Mode encapsulates the idea of the execution mode for your
-  # flows.  The default is Hadoop mode, but you can request that your code run
-  # in Cascading local mode.  If you subsequently use a tap or a scheme that
-  # has no local implementation, the mode will be converted back to Hadoop
-  # mode.
+  # A Mode encapsulates the idea of the execution mode for your flows.  The
+  # default is Hadoop mode, but you can request that your code run in Cascading
+  # local mode.  If you subsequently use a tap or a scheme that has no local
+  # implementation, the mode will be converted back to Hadoop mode.
   class Mode
     attr_reader :local
 
-    # Hadoop mode is the default.  You must explicitly request Cascading local
-    # mode with values 'local' or :local.
+    # Parses a specification of which mode, Cascading local mode or Hadoop mode,
+    # to execute in.  Defaults to Hadoop mode.  You may explicitly request
+    # Cascading local mode with values 'local' or :local.  If you pass a Mode
+    # object to this method, it will be passed through.
     def self.parse(mode)
       case mode
+      when Mode then mode
       when 'local', :local then Mode.new(true)
       else Mode.new(false)
       end
     end
 
+    # Constructs a Mode given a flag indicating if it should be Cascading local
+    # mode.
     def initialize(local)
       @local = local
     end
@@ -34,9 +38,9 @@ def source_tap(name, tap)
     end
 
     # Builds a c.f.Flow given properties, name, sources, sinks, and pipes from
-    # a Cascading::Flow.  The current mode is adjusted based on the taps and
-    # schemes of the sources and sinks, then the correct taps are selected
-    # before building the flow.
+    # a Flow.  The current mode is adjusted based on the taps and schemes of
+    # the sources and sinks, then the correct taps are selected before building
+    # the flow.
     def connect_flow(properties, name, sources, sinks, pipes)
       update_local_mode(sources, sinks)
       sources = select_taps(sources)
diff --git a/lib/cascading/operations.rb b/lib/cascading/operations.rb
index c36ee9d..c816b11 100644
--- a/lib/cascading/operations.rb
+++ b/lib/cascading/operations.rb
@@ -1,116 +1,118 @@
 module Cascading
-  # The Cascading::Operations module is deprecated.  The original idea from long
-  # ago is that it would be useful to mixin operator wrappers to places other
-  # than Cascading::Assembly, but this is not true.  Instead, put Eaches in
-  # Cascading::Assembly, Everies in Cascading::Aggregations, and any more
-  # generally useful utility code directly in the Cascading module
-  # (cascading/cascading.rb).
-  #
-  # Further, the entire *args pattern should be deprecated as it leads to
-  # functions that can only be understood by reading their code.  Instead,
-  # idiomatic Ruby (positional required params and a params hash for optional
-  # args) should be used.  See Cascading::Assembly#set_value for an example.
   module Operations
-    def identity
-      Java::CascadingOperation::Identity.new
-    end
-
-    def aggregator_function(args, aggregator_klass)
-      options = args.extract_options!
-      ignore = options[:ignore]
-
-      parameters = [Cascading.fields(args), ignore].compact
-      aggregator_klass.new(*parameters)
-    end
-
-    def first_function(*args)
-      aggregator_function(args, Java::CascadingOperationAggregator::First)
-    end
-
-    def min_function(*args)
-      aggregator_function(args, Java::CascadingOperationAggregator::Min)
-    end
-
-    def max_function(*args)
-      aggregator_function(args, Java::CascadingOperationAggregator::Max)
-    end
-
-    def last_function(*args)
-      aggregator_function(args, Java::CascadingOperationAggregator::Last)
-    end
-
-    def regex_parser(*args)
-      options = args.extract_options!
-
-      pattern = args[0].to_s
-      fields = Cascading.fields(options[:fields])
-      groups = options[:groups].to_java(:int) if options[:groups]
-      parameters = [fields, pattern, groups].compact
-
-      Java::CascadingOperationRegex::RegexParser.new(*parameters)
-    end
-
-    def regex_splitter(*args)
-      options = args.extract_options!
-
-      fields = Cascading.fields(args)
-      pattern = options[:pattern].to_s
-      parameters = [fields, pattern].compact
-      Java::CascadingOperationRegex::RegexSplitter.new(*parameters)
-    end
-
-    def regex_split_generator(*args)
-      options = args.extract_options!
-
-      fields = Cascading.fields(args)
-      pattern = options[:pattern].to_s
-      parameters = [fields, pattern].compact
-      Java::CascadingOperationRegex::RegexSplitGenerator.new(*parameters)
-    end
-
-    def regex_generator(*args)
-      options = args.extract_options!
-
-      fields = Cascading.fields(args)
-      pattern = options[:pattern].to_s
-      parameters = [fields, pattern].compact
-      Java::CascadingOperationRegex::RegexGenerator.new(*parameters)
-    end
-
-    def expression_function(*args)
-      options = args.extract_options!
-
-      fields = Cascading.fields(args)
-      expression = options[:expression].to_s
-      parameters = options[:parameters]
-      parameter_names = []
-      parameter_types = []
-      if parameters.is_a? ::Hash
-        parameters.each do |name, type|
-          parameter_names << name
-          parameter_types << type
+    # Debugs the current assembly at runtime, printing every tuple and fields
+    # every 10 tuples by default.
+    #
+    # The named options are:
+    # [prefix] String to prefix prints with.
+    # [print_fields] Boolean controlling field printing, defaults to false.
+    # [tuple_interval] Integer specifying interval between printed tuples
+    # [fields_interval] Integer specifying interval between printing fields
+    #
+    # Example:
+    #     debug :prefix => 'DEBUG', :print_fields => true, :fields_interval => 1000
+    def debug(options = {})
+      input_fields = options[:input] || all_fields
+      prefix = options[:prefix]
+      print_fields = options[:print_fields]
+
+      debug = Java::CascadingOperation::Debug.new(*[prefix, print_fields].compact)
+
+      debug.print_tuple_every = options[:tuple_interval] || 1
+      debug.print_fields_every = options[:fields_interval] || 10
+
+      each(input_fields, :filter => debug)
+    end
+
+    # Inserts new fields into the current assembly.  Values may be constants or
+    # expressions (see Cascading::expr).  Fields will be inserted in
+    # lexicographic order (not necessarily the order provided).
+    #
+    # Example:
+    #     insert 'field1' => 'constant_string', 'field2' => 0, 'field3' => expr('fieldA:long + fieldB:long')
+    def insert(insert_map)
+      insert_map.keys.sort.each do |field_name|
+        value = insert_map[field_name]
+
+        if value.kind_of?(ExprStub)
+          value.validate_scope(scope)
+          names, types = value.names_and_types
+          each(
+            all_fields,
+            :function => Java::CascadingOperationExpression::ExpressionFunction.new(fields(field_name), value.expression, names, types),
+            :output => all_fields
+          )
+        else # value is a constant
+          each(
+            all_fields,
+            :function => Java::CascadingOperation::Insert.new(fields(field_name), to_java_comparable_array([value])),
+            :output => all_fields
+          )
         end
-        parameter_names = parameter_names.to_java(java.lang.String)
-        parameter_types = parameter_types.to_java(java.lang.Class)
-
-        arguments = [fields, expression, parameter_names, parameter_types].compact
-      elsif !parameters.nil?
-        arguments = [fields, expression, parameters.java_class].compact
-      else
-        arguments = [fields, expression, java.lang.String.java_class].compact
       end
-
-      Java::CascadingOperationExpression::ExpressionFunction.new(*arguments)
     end
 
-    def insert_function(*args)
-      options=args.extract_options!
-      fields = Cascading.fields(args)
-      values = options[:values]
-
-      parameters = [fields, to_java_comparable_array(values)].compact
-      Java::CascadingOperation::Insert.new(*parameters)
-    end
+    # Ungroups, or unpivots, a tuple (see Cascading's {UnGroup}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/function/UnGroup.html]).
+    #
+    # You must provide exactly one of :value_selectors and :num_values.
+    #
+    # The named options are:
+    # [value_selectors] Array of field names to ungroup. Each field will be
+    #                   ungrouped into an output tuple along with the key fields
+    #                   in the order provided.
+    # [num_values] Integer specifying the number of fields to ungroup into each
+    #              output tuple (excluding the key fields).  All input fields
+    #              will be ungrouped.
+    #
+    # Example:
+    #     ungroup 'key', ['new_key', 'val], :value_selectors => ['val1', 'val2', 'val3'], :output => ['new_key', 'val']
+    def ungroup(key, into_fields, options = {})
+      input_fields = options[:input] || all_fields
+      output = options[:output] || all_fields
+
+      raise 'You must provide exactly one of :value_selectors or :num_values to ungroup' unless options.has_key?(:value_selectors) ^ options.has_key?(:num_values)
+      value_selectors = options[:value_selectors].map{ |vs| fields(vs) }.to_java(Java::CascadingTuple::Fields) if options.has_key?(:value_selectors)
+      num_values = options[:num_values] if options.has_key?(:num_values)
+
+      parameters = [fields(into_fields), fields(key), value_selectors, num_values].compact
+      each input_fields, :function => Java::CascadingOperationFunction::UnGroup.new(*parameters), :output => output
+    end
+
+    # Inserts one of two values into the dataflow based upon the result of the
+    # supplied filter on the input_fields.  This is primarily useful for
+    # creating indicators from filters.  keep_value specifies the Java value to
+    # produce when the filter would keep the given input and remove_value
+    # specifies the Java value to produce when the filter would remove the given
+    # input.
+    #
+    # Example:
+    #     set_value 'field1', Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, 'is_field1_null'
+    def set_value(input_fields, filter, keep_value, remove_value, into_field, options = {})
+      output = options[:output] || all_fields
+      each input_fields, :function => Java::CascadingOperationFunction::SetValue.new(fields(into_field), filter, keep_value, remove_value), :output => output
+    end
+
+    # Efficient way of inserting a null indicator for any field, even one that
+    # cannot be coerced to a string.  This is accomplished using Cascading's
+    # FilterNull and SetValue operators rather than Janino.  1 is produced if
+    # the field is null and 0 otherwise.
+    #
+    # Example:
+    #     null_indicator 'field1', 'is_field1_null'
+    def null_indicator(input_field, into_field, options = {})
+      set_value input_field, Java::CascadingOperationFilter::FilterNull.new, 1.to_java, 0.to_java, into_field, :output => options[:output]
+    end
+
+    # Given an input_field and a regex, returns an indicator that is 1 if the string
+    # contains at least 1 match and 0 otherwise.
+    #
+    # Example:
+    #     regex_contains 'field1', /\w+\s+\w+/, 'does_field1_contain_pair'
+    def regex_contains(input_field, regex, into_field, options = {})
+      set_value input_field, Java::CascadingOperationRegex::RegexFilter.new(pattern.to_s), 1.to_java, 0.to_java, into_field, :output => options[:output]
+    end
+
+    private
 
     def to_java_comparable_array(arr)
       (arr.map do |v|
@@ -130,72 +132,5 @@ def coerce_to_java(v)
           java.lang.String.new(v.to_s)
       end
     end
-
-    def expression_filter(*args)
-      options = args.extract_options!
-      expression = (args[0] || options[:expression]).to_s
-      parameters = options[:parameters]
-      parameter_names = []
-      parameter_types = []
-      if parameters.is_a? ::Hash
-        parameters.each do |name, type|
-          parameter_names << name
-          parameter_types << type
-        end
-        parameter_names = parameter_names.to_java(java.lang.String)
-        parameter_types = parameter_types.to_java(java.lang.Class)
-
-        arguments = [expression, parameter_names, parameter_types].compact
-      elsif !parameters.nil?
-        arguments = [expression, parameters.java_class].compact
-      else
-        arguments = [expression, java.lang.String.java_class].compact
-      end
-
-      Java::CascadingOperationExpression::ExpressionFilter.new(*arguments)
-    end
-
-    def date_parser(field, format)
-      fields = fields(field)
-      Java::CascadingOperationText::DateParser.new(fields, format)
-    end
-
-    def date_formatter(fields, format, timezone=nil)
-      fields = fields(fields)
-      timezone = Java::JavaUtil::TimeZone.get_time_zone(timezone) if timezone
-      arguments = [fields, format, timezone].compact
-      Java::CascadingOperationText::DateFormatter.new(*arguments)
-    end
-
-    def regex_filter(*args)
-      options = args.extract_options!
-
-      pattern = args[0]
-      remove_match = options[:remove_match]
-      match_each_element = options[:match_each_element]
-      parameters = [pattern.to_s, remove_match, match_each_element].compact
-      Java::CascadingOperationRegex::RegexFilter.new(*parameters)
-    end
-
-    def regex_replace(*args)
-      options = args.extract_options!
-
-      fields = fields(args[0])
-      pattern = args[1]
-      replacement = args[2]
-      replace_all = options[:replace_all]
-
-      parameters = [fields, pattern.to_s, replacement.to_s, replace_all].compact
-      Java::CascadingOperationRegex::RegexReplace.new(*parameters)
-    end
-
-    def field_joiner(*args)
-      options = args.extract_options!
-      delimiter = options[:delimiter] || ','
-      fields = fields(options[:into])
-
-      parameters = [fields, delimiter].compact
-      Java::CascadingOperationText::FieldJoiner.new(*parameters)
-    end
   end
 end
diff --git a/lib/cascading/regex_operations.rb b/lib/cascading/regex_operations.rb
new file mode 100644
index 0000000..daa02fc
--- /dev/null
+++ b/lib/cascading/regex_operations.rb
@@ -0,0 +1,133 @@
+module Cascading
+  # Module of pipe assemblies that wrap operations defined in the Cascading
+  # cascading.operations.regex package.  These are split out only to group
+  # similar functionality.
+  #
+  # All DSL regex pipes require an input_field, a regex, and either a single
+  # into_field or one or more into_fields.  Requiring a single input field
+  # allows us to raise an exception early if the wrong input is specified and
+  # avoids the non-intuitive situation where the first of many fields is
+  # silently taken as in Cascading.  Requiring a regex means you don't have to
+  # go looking for defaults in code.  And into_field(s) means we can propagate
+  # field names through the dataflow.
+  #
+  # Mapping of DSL pipes into Cascading regex operations:
+  # parse:: {RegexParser}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexParser.html]
+  # split:: {RegexSplitter}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexSplitter.html]
+  # split\_rows:: {RegexSplitGenerator}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexSplitGenerator.html]
+  # match\_rows:: {RegexGenerator}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexGenerator.html]
+  # replace:: {RegexReplace}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/regex/RegexReplace.html]
+  module RegexOperations
+    # Parses the given input_field using the specified regular expression to
+    # produce one output per group in that expression.
+    #
+    # The named options are:
+    # [groups] Array of integers specifying which groups to capture if you want
+    #          a subset of groups.
+    #
+    # Example:
+    #     parse 'field1', /(\w+)\s+(\w+)/, ['out1', 'out2'], :groups => [1, 2]
+    def parse(input_field, regex, into_fields, options = {})
+      groups = options[:groups].to_java(:int) if options[:groups]
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+
+      parameters = [fields(into_fields), regex.to_s, groups].compact
+      each(
+        input_field,
+        :function => Java::CascadingOperationRegex::RegexParser.new(*parameters),
+        :output => output
+      )
+    end
+    alias regex_parser parse
+
+    # Splits the given input_field into multiple fields using the specified
+    # regular expression.
+    #
+    # Example:
+    #     split 'line', /\s+/, ['out1', 'out2']
+    def split(input_field, regex, into_fields, options = {})
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+
+      each(
+        input_field,
+        :function => Java::CascadingOperationRegex::RegexSplitter.new(fields(into_fields), regex.to_s),
+        :output => output
+      )
+    end
+    alias regex_splitter split
+
+    # Splits the given input_field into new rows using the specified regular
+    # expression.
+    #
+    # Example:
+    #     split_rows 'line', /\s+/, 'word'
+    def split_rows(input_field, regex, into_field, options = {})
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+      into_field = fields(into_field)
+      raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
+
+      each(
+        input_field,
+        :function => Java::CascadingOperationRegex::RegexSplitGenerator.new(into_field, regex.to_s),
+        :output => output
+      )
+    end
+    alias regex_split_generator split_rows
+
+    # Emits a new row for each regex group matched in input_field using the
+    # specified regular expression.
+    #
+    # Example:
+    #     match_rows 'line', /(\w+)\s+(\w+)/, 'word'
+    def match_rows(input_field, regex, into_field, options = {})
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+      into_field = fields(into_field)
+      raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
+
+      each(
+        input_field,
+        :function => Java::CascadingOperationRegex::RegexGenerator.new(into_field, regex.to_s),
+        :output => output
+      )
+    end
+    alias regex_generator match_rows
+
+    # Performs a query/replace on the given input_field using the specified
+    # regular expression and replacement.
+    #
+    # The named options are:
+    # [replace_all] Boolean indicating if all matches should be replaced;
+    #               defaults to true (the Cascading default).
+    #
+    # Example:
+    #     replace 'line', /[.,]*\s+/, 'tab_separated_line', "\t"
+    def replace(input_field, regex, into_field, replacement, options = {})
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+      into_field = fields(into_field)
+      raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
+
+      parameters = [into_field, regex.to_s, replacement.to_s, options[:replace_all]].compact
+      each(
+        input_field,
+        :function => Java::CascadingOperationRegex::RegexReplace.new(*parameters),
+        :output => output
+      )
+    end
+    alias regex_replace replace
+  end
+end
diff --git a/lib/cascading/scope.rb b/lib/cascading/scope.rb
index e7e619a..0062449 100644
--- a/lib/cascading/scope.rb
+++ b/lib/cascading/scope.rb
@@ -1,23 +1,35 @@
 module Cascading
+  # Scope is a wrapper for the private Cascading c.f.p.Scope object used to
+  # connect the dataflow graph by resolving fields.  cascading.jruby wraps this
+  # facility so that it may be used to propagate field names at composition
+  # time (not Cascading plan time) in the same way they will later be
+  # propagated by the planner.
   class Scope
     attr_accessor :scope
 
+    # Construct a Scope given the Cascading c.f.p.Scope to wrap.
     def initialize(scope)
       @scope = scope
     end
 
+    # Copy one Scope into another; relies upon the copy constructor of
+    # c.f.p.Scope.
     def copy
       Scope.new(Java::CascadingFlowPlanner::Scope.new(@scope))
     end
 
+    # Build a c.f.p.Scope for a Flow, which is empty except for its name.
     def self.flow_scope(name)
       Java::CascadingFlowPlanner::Scope.new(name)
     end
 
+    # Build an empty Scope, wrapping an empty c.f.p.Scope.
     def self.empty_scope(name)
       Scope.new(Java::CascadingFlowPlanner::Scope.new(name))
     end
 
+    # Build a Scope for a single source Tap.  The flow_scope is propagated
+    # through this call into a new Scope.
     def self.source_scope(name, tap, flow_scope)
       incoming_scopes = java.util.HashSet.new
       incoming_scopes.add(flow_scope)
@@ -27,28 +39,30 @@ def self.source_scope(name, tap, flow_scope)
       Scope.new(java_scope)
     end
 
+    # Build a Scope for an arbitrary flow element.  This is used to update the
+    # Scope at each stage in a pipe Assembly.
     def self.outgoing_scope(flow_element, incoming_scopes)
       java_scopes = incoming_scopes.compact.map{ |s| s.scope }
       Scope.new(outgoing_scope_for(flow_element, java.util.HashSet.new(java_scopes)))
     end
 
+    # The values fields of the Scope, which indicate the fields in the current
+    # dataflow tuple.
     def values_fields
       @scope.out_values_fields
     end
 
+    # The grouping fields of the Scope, which indicate the keys of an
+    # group/cogroup.
     def grouping_fields
       @scope.out_grouping_fields
     end
 
-    def scope_fields_to_s(accessor)
-      begin
-        fields = @scope.send(accessor)
-        fields.nil? ? 'null' : fields.to_s
-      rescue Exception => e
-        'ERROR'
-      end
-    end
-
+    # Prints a detailed description of this Scope, including its type and
+    # various selectors, fields, and key fields.  Data is bubbled up directly
+    # from the Cascading c.f.p.Scope.  This output can be useful for debugging
+    # the propagation of fields through your job (see Flow#debug_scope and
+    # Assembly#debug_scope, which both rely upon this method).
     def to_s
       kind = 'Unknown'
       kind = 'Tap'   if @scope.tap?
@@ -77,6 +91,15 @@ def to_s
 
     private
 
+    def scope_fields_to_s(accessor)
+      begin
+        fields = @scope.send(accessor)
+        fields.nil? ? 'null' : fields.to_s
+      rescue Exception => e
+        'ERROR'
+      end
+    end
+
     def self.outgoing_scope_for(flow_element, incoming_scopes)
       begin
         flow_element.outgoing_scope_for(incoming_scopes)
diff --git a/lib/cascading/sub_assembly.rb b/lib/cascading/sub_assembly.rb
index 022c120..61d11e3 100644
--- a/lib/cascading/sub_assembly.rb
+++ b/lib/cascading/sub_assembly.rb
@@ -4,17 +4,15 @@ module Cascading
   # Allows you to plugin c.p.SubAssemblies to a cascading.jruby Assembly.
   #
   # Assumptions:
-  # * You will either use the tail_pipe of the calling Assembly, or overwrite
-  # its incoming_scopes (as do join and union)
-  # * Your subassembly will have only 1 tail pipe; branching is not
-  # supported.  This allows you to continue operating upon the tail of the
-  # SubAssembly within the calling Assembly
+  # * You will either use the tail_pipe of the calling Assembly, or overwrite its incoming_scopes (as do join and union)
+  # * Your subassembly will have only 1 tail pipe; branching is not supported.  This allows you to continue operating upon the tail of the SubAssembly within the calling Assembly
   # * You will not use nested c.p.SubAssemblies
   #
   # This is a low-level tool, so be careful.
   class SubAssembly
     attr_reader :assembly, :sub_assembly, :tail_pipe, :scope
 
+    # Construct a SubAssembly within the given Assembly
     def initialize(assembly, sub_assembly)
       @assembly = assembly
       @sub_assembly = sub_assembly
@@ -26,6 +24,11 @@ def initialize(assembly, sub_assembly)
       raise 'SubAssembly must set exactly 1 tail in constructor' unless sub_assembly.tails.size == 1
     end
 
+    # Complete the addition of the SubAssembly to the Assembly.  Propagates
+    # Scope through the SubAssembly and updates the tail_pipe of the
+    # SubAssembly for passing back to the enclosing Assembly.  May accept many
+    # incoming pipes, but typically only recieves the tail_pipe of the
+    # enclosing Assembly.
     def finalize(pipes, incoming_scopes)
       # Build adjacency list for sub_assembly
       graph = {}
diff --git a/lib/cascading/tap.rb b/lib/cascading/tap.rb
index c1fb254..3583a5f 100644
--- a/lib/cascading/tap.rb
+++ b/lib/cascading/tap.rb
@@ -1,48 +1,63 @@
 module Cascading
-  # A Cascading::BaseTap wraps up a pair of Cascading taps, one for Cascading
-  # local mode and the other for Hadoop mode.
+  # A BaseTap wraps up a pair of Cascading taps, one for Cascading local mode
+  # and the other for Hadoop mode.  Note that these are optional, but at least
+  # one must be provided for most taps.  A SequenceFile is a notable example of
+  # a Scheme for which there is no Cascading local mode version, so a Tap you
+  # build with it will have no local_tap.
   class BaseTap
     attr_reader :local_tap, :hadoop_tap
 
+    # Constructor that accepts the local_tap and hadoop_tap, which may be nil
     def initialize(local_tap, hadoop_tap)
       @local_tap = local_tap
       @hadoop_tap = hadoop_tap
     end
 
+    # Passes through printing the local_tap and hadoop_tap
     def to_s
       "Local: #{local_tap}, Hadoop: #{hadoop_tap}"
     end
 
+    # Returns false if the local_tap is nil, true otherwise
     def local?
       !local_tap.nil?
     end
 
+    # Returns false if the hadoop_tap is nil, true otherwise
     def hadoop?
       !hadoop_tap.nil?
     end
   end
 
-  # A Cascading::Tap represents a non-aggregate tap with a scheme, path, and
-  # optional sink_mode.  c.t.l.FileTap is used in Cascading local mode and
-  # c.t.h.Hfs is used in Hadoop mode.  Whether or not these can be created is
-  # governed by the :scheme parameter, which must contain at least one of
-  # :local_scheme or :hadoop_scheme.  Schemes like TextLine are supported in
-  # both modes (by Cascading), but SequenceFile is only supported in Hadoop
-  # mode.
+  # A Tap represents a non-aggregate tap with a scheme, path, and optional
+  # sink_mode.  c.t.l.FileTap is used in Cascading local mode and c.t.h.Hfs is
+  # used in Hadoop mode.  Whether or not these can be created is governed by the
+  # :scheme parameter, which must contain at least one of :local_scheme or
+  # :hadoop_scheme.  Schemes like TextLine are supported in both modes (by
+  # Cascading), but SequenceFile is only supported in Hadoop mode.
   class Tap < BaseTap
     attr_reader :scheme, :path, :sink_mode
 
-    def initialize(path, params = {})
+    # Builds a Tap given a required path
+    #
+    # The named options are:
+    # [scheme] A Hash which must contain at least one of :local_scheme or
+    #          :hadoop_scheme but may contain both.  Default is
+    #          text_line_scheme, which works in both modes.
+    # [sink_mode] A symbol or string that may be :keep, :replace, or :append,
+    #             and corresponds to the c.t.SinkMode enumeration.  The default
+    #             is :keep, which matches Cascading's default.
+    def initialize(path, options = {})
       @path = path
 
-      @scheme = params[:scheme] || text_line_scheme
+      @scheme = options[:scheme] || text_line_scheme
       raise "Scheme must provide one of :local_scheme or :hadoop_scheme; received: '#{scheme.inspect}'" unless scheme[:local_scheme] || scheme[:hadoop_scheme]
 
-      @sink_mode = case params[:sink_mode] || :keep
+      @sink_mode = case options[:sink_mode] || :keep
         when :keep, 'keep'       then Java::CascadingTap::SinkMode::KEEP
         when :replace, 'replace' then Java::CascadingTap::SinkMode::REPLACE
         when :append, 'append'   then Java::CascadingTap::SinkMode::APPEND
-        else raise "Unrecognized sink mode '#{params[:sink_mode]}'"
+        else raise "Unrecognized sink mode '#{options[:sink_mode]}'"
       end
 
       local_scheme = scheme[:local_scheme]
@@ -53,19 +68,28 @@ def initialize(path, params = {})
     end
   end
 
-  # A Cascading::MultiTap represents one of Cascading's aggregate taps and is
-  # built via static constructors that accept an array of Cascading::Taps.  In
-  # order for a mode (Cascading local or Hadoop) to be supported, all provided
-  # taps must support it.
+  # A MultiTap represents one of Cascading's aggregate taps and is built via
+  # static constructors that accept an array of Taps.  In order for a mode
+  # (Cascading local or Hadoop) to be supported, all provided taps must support
+  # it.
   class MultiTap < BaseTap
+    # Do not call this constructor directly; instead, use one of
+    # MultiTap.multi_source_tap or MultiTap.multi_sink_tap.
     def initialize(local_tap, hadoop_tap)
       super(local_tap, hadoop_tap)
     end
 
+    # Static constructor that builds a MultiTap wrapping a c.t.MultiSourceTap
+    # from the given array of Taps.  The resulting MultiTap will only be
+    # available in Cascading local mode or Hadoop mode if all input taps support
+    # them.
     def self.multi_source_tap(taps)
       multi_tap(taps, Java::CascadingTap::MultiSourceTap)
     end
 
+    # Static constructor that builds a MultiTap wrapping a c.t.MultiSinkTap from
+    # the given array of Taps.  The resulting MultiTap will only be available in
+    # Cascading local mode or Hadoop mode if all input taps support them.
     def self.multi_sink_tap(taps)
       multi_tap(taps, Java::CascadingTap::MultiSinkTap)
     end
diff --git a/lib/cascading/text_operations.rb b/lib/cascading/text_operations.rb
new file mode 100644
index 0000000..39e8339
--- /dev/null
+++ b/lib/cascading/text_operations.rb
@@ -0,0 +1,67 @@
+module Cascading
+  # Module of pipe assemblies that wrap operations defined in the Cascading
+  # cascading.operations.text package.  These are split out only to group
+  # similar functionality.
+  #
+  # Mapping of DSL pipes into Cascading text operations:
+  # parse\_date:: {DateParser}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/text/DateParser.html]
+  # format\_date:: {DateFormatter}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/text/DateFormatter.html]
+  # join\_fields:: {FieldJoiner}[http://docs.cascading.org/cascading/2.1/javadoc/cascading/operation/text/FieldJoiner.html]
+  module TextOperations
+    # Parses the given input_field as a date using the provided format string.
+    #
+    # Example:
+    #     parse_date 'text_date', 'yyyy/MM/dd', 'timestamp'
+    def parse_date(input_field, date_format, into_field, options = {})
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+      into_field = fields(into_field)
+      raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
+
+      each(
+        input_field,
+        :function => Java::CascadingOperationText::DateParser.new(into_field, date_format),
+        :output => output
+      )
+    end
+
+    # Converts a timestamp into a formatted date string using the specified
+    # date_format.
+    #
+    # Example:
+    #     format_date 'timestamp', 'yyyy/MM/dd', 'text_date'
+    def format_date(input_field, date_format, into_field, options = {})
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      input_field = fields(input_field)
+      raise "input_field must declare exactly one field, was '#{input_field}'" unless input_field.size == 1
+      into_field = fields(into_field)
+      raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
+
+      each(
+        input_field,
+        :function => Java::CascadingOperationText::DateFormatter.new(into_field, date_format),
+        :output => output
+      )
+    end
+
+    # Joins multiple fields into a single field given a delimiter.
+    #
+    # Example:
+    #     join_fields ['field1', 'field2'], ',', 'comma_separated'
+    def join_fields(input_fields, delimiter, into_field)
+      output = options[:output] || all_fields # Overrides Cascading default
+
+      into_field = fields(into_field)
+      raise "into_field must declare exactly one field, was '#{into_field}'" unless into_field.size == 1
+
+      each(
+        input_fields,
+        :function => Java::CascadingOperationText::FieldJoiner.new(into_field, delimiter.to_s),
+        :output => output
+      )
+    end
+  end
+end
diff --git a/samples/branch.rb b/samples/branch.rb
index 64a6b74..3d98d4b 100755
--- a/samples/branch.rb
+++ b/samples/branch.rb
@@ -9,8 +9,7 @@
     source 'input', tap('samples/data/data2.txt')
 
     assembly 'input' do
-      split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/
-
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id']
       branch 'branch1' do
         group_by 'score1' do
           count
diff --git a/samples/group_by.rb b/samples/group_by.rb
index ce44436..3083ebc 100755
--- a/samples/group_by.rb
+++ b/samples/group_by.rb
@@ -8,7 +8,7 @@
     source 'input', tap('samples/data/data_group_by.tsv')
 
     assembly 'input' do
-      split 'line', ['id', 'city'], :output => ['id', 'city']
+      split 'line', /\t/, ['id', 'city'], :output => ['id', 'city']
 
       branch 'group_by' do
         group_by 'city', :sort_by => 'city' do
diff --git a/samples/join.rb b/samples/join.rb
index 6313ab9..7d50c62 100755
--- a/samples/join.rb
+++ b/samples/join.rb
@@ -10,15 +10,15 @@
     source 'input3', tap('samples/data/data_join3.txt')
 
     assembly 'input1' do
-      split 'line', ['id', 'name']
+      split 'line', /\t/, ['id', 'name']
     end
 
     assembly 'input2' do
-      split 'line', ['id', 'age']
+      split 'line', /\t/, ['id', 'age']
     end
 
     assembly 'input3' do
-      split 'line', ['id', 'city']
+      split 'line', /\t/, ['id', 'city']
     end
 
     assembly 'join' do
diff --git a/samples/logwordcount.rb b/samples/logwordcount.rb
index 9e93bc8..b037bf5 100755
--- a/samples/logwordcount.rb
+++ b/samples/logwordcount.rb
@@ -10,7 +10,7 @@
     source 'input', tap('samples/data/gutenberg/the_outline_of_science_vol_1')
 
     assembly 'input' do
-      split_rows 'line', 'word', :pattern => /[.,]*\s+/, :output => 'word'
+      split_rows 'line', /[.,]*\s+/, 'word', :output => 'word'
       group_by 'word' do
         count
       end
diff --git a/samples/project.rb b/samples/project.rb
index 908f5e1..50bce24 100755
--- a/samples/project.rb
+++ b/samples/project.rb
@@ -10,7 +10,7 @@
     source 'input', tap('samples/data/data2.txt')
 
     assembly 'input' do
-      split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
       assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
       project 'name', 'score1', 'score2'
       assert Java::CascadingOperationAssertion::AssertSizeEquals.new(3)
diff --git a/samples/rename.rb b/samples/rename.rb
index bbd47e1..bee222d 100755
--- a/samples/rename.rb
+++ b/samples/rename.rb
@@ -8,7 +8,7 @@
     source 'input', tap('samples/data/data2.txt')
 
     assembly 'input' do
-      split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
       assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
       rename 'name' => 'new_name', 'score1' => 'new_score1', 'score2' => 'new_score2'
       assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
diff --git a/samples/replace.rb b/samples/replace.rb
new file mode 100755
index 0000000..40e600e
--- /dev/null
+++ b/samples/replace.rb
@@ -0,0 +1,16 @@
+#! /usr/bin/env jruby
+$: << File.join(File.dirname(__FILE__), '..', 'lib')
+
+require 'cascading'
+
+cascade 'replace', :mode => :local do
+  flow 'replace' do
+    source 'input', tap('samples/data/data2.txt')
+
+    assembly 'input' do
+      replace 'line', /[.,]*\s+/, 'tab_separated_line', "\t", :output => 'tab_separated_line'
+    end
+
+    sink 'input', tap('output/replace', :sink_mode => :replace)
+  end
+end.complete
diff --git a/samples/scorenames.rb b/samples/scorenames.rb
index aadd23e..3cd3e51 100755
--- a/samples/scorenames.rb
+++ b/samples/scorenames.rb
@@ -10,7 +10,7 @@
     source 'input', tap('samples/data/genealogy/names/dist.all.last')
 
     assembly 'input' do
-      split 'line', ['name', 'val1', 'val2', 'id']
+      split 'line', /[.,]*\s+/, ['name', 'val1', 'val2', 'id']
       insert 'val3' => expr('val2:double < 40.0 ? val1:double : val2:double')
       project 'name', 'val3', 'id'
     end
diff --git a/samples/splitter.rb b/samples/splitter.rb
index 81042c9..021feaf 100755
--- a/samples/splitter.rb
+++ b/samples/splitter.rb
@@ -8,7 +8,7 @@
     source 'input', tap('samples/data/data2.txt')
 
     assembly 'input' do
-      split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
       group_by 'score1' do
         count
       end
diff --git a/samples/sub_assembly.rb b/samples/sub_assembly.rb
index a6101ec..a089c56 100755
--- a/samples/sub_assembly.rb
+++ b/samples/sub_assembly.rb
@@ -8,7 +8,7 @@
     source 'input', tap('samples/data/data2.txt')
 
     assembly 'input' do
-      split 'line', ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
       assert Java::CascadingOperationAssertion::AssertSizeEquals.new(4)
       sub_assembly Java::CascadingPipeAssembly::Discard.new(tail_pipe, fields('id'))
       assert Java::CascadingOperationAssertion::AssertSizeEquals.new(3)
diff --git a/samples/ungroup.rb b/samples/ungroup.rb
index 0a70545..e8e94ab 100755
--- a/samples/ungroup.rb
+++ b/samples/ungroup.rb
@@ -11,22 +11,19 @@
   source 'input', tap('samples/data/ungroup.tsv')
 
   a = assembly 'input' do
-    split 'line', ['key', 'val1', 'val2', 'val3'], :output => ['key', 'val1', 'val2', 'val3']
+    split 'line', /\t/, ['key', 'val1', 'val2', 'val3'], :output => ['key', 'val1', 'val2', 'val3']
 
     branch 'ungroup_using_value_selectors' do
-      #each all_fields, :function => Java::CascadingOperationFunction::UnGroup.new(fields(['new_key', 'val']), fields('key'), [fields('val1'), fields('val2'), fields('val3')].to_java(Java::CascadingTuple::Fields)), :output => ['new_key', 'val']
-      ungroup :key => 'key', :value_selectors => ['val1', 'val2', 'val3'], :into => ['new_key', 'val'], :output => ['new_key', 'val']
+      ungroup 'key', ['new_key', 'val'], :value_selectors => ['val1', 'val2', 'val3'], :output => ['new_key', 'val']
     end
 
     branch 'ungroup_using_num_values' do
-      #each all_fields, :function => Java::CascadingOperationFunction::UnGroup.new(fields(['new_key', 'val']), fields('key'), 1), :output => ['new_key', 'val']
-      ungroup :key => 'key', :num_values => 1, :into => ['new_key', 'val'], :output => ['new_key', 'val']
+      ungroup 'key', ['new_key', 'val'], :num_values => 1, :output => ['new_key', 'val']
     end
 
     # This pairs up the first and last two fields with no "key"
     branch 'ungroup_no_key' do
-      #each all_fields, :function => Java::CascadingOperationFunction::UnGroup.new(fields(['left', 'right']), fields([]), 2), :output => ['left', 'right']
-      ungroup :key => [], :num_values => 2, :into => ['left', 'right'], :output => ['left', 'right']
+      ungroup [], ['left', 'right'], :num_values => 2, :output => ['left', 'right']
     end
   end
 
diff --git a/samples/union.rb b/samples/union.rb
index 1e402ef..1e16772 100755
--- a/samples/union.rb
+++ b/samples/union.rb
@@ -10,7 +10,7 @@
     source 'input', tap('samples/data/genealogy/names/dist.all.last')
 
     assembly 'input' do
-      split 'line', ['name', 'score1', 'score2', 'id']
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id']
 
       branch 'branch1' do
         group_by 'score1', 'name' do
diff --git a/samples/unique.rb b/samples/unique.rb
index 3c75fa9..c2cc0c4 100755
--- a/samples/unique.rb
+++ b/samples/unique.rb
@@ -10,7 +10,7 @@
     source 'input', tap('samples/data/data_group_by.tsv')
 
     assembly 'input' do
-      split 'line', ['id', 'city'], :output => ['id', 'city']
+      split 'line', /\t/, ['id', 'city'], :output => ['id', 'city']
 
       branch 'unique' do
         sub_assembly Java::CascadingPipeAssembly::Unique.new(tail_pipe, fields('city'))
diff --git a/spec/cascading_spec.rb b/spec/cascading_spec.rb
index a075661..faec9e1 100644
--- a/spec/cascading_spec.rb
+++ b/spec/cascading_spec.rb
@@ -85,12 +85,12 @@
         source 'right', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
 
         assembly 'left' do
-          split 'line', ['x', 'y', 'z'], :pattern => /,/
+          split 'line', /,/, ['x', 'y', 'z']
           project 'x', 'y', 'z'
         end
 
         assembly 'right' do
-          split 'line', ['x', 'y', 'z'], :pattern => /,/
+          split 'line', /,/, ['x', 'y', 'z']
           project 'x', 'y', 'z'
 
           branch 'branch_join' do
diff --git a/spec/jruby_version_spec.rb b/spec/jruby_version_spec.rb
index f6bf3a4..e78f9fa 100644
--- a/spec/jruby_version_spec.rb
+++ b/spec/jruby_version_spec.rb
@@ -19,7 +19,7 @@
     end
 
     thrown.should == 'InvocationTargetException'
-    if JRUBY_VERSION == '1.7.0'
+    if JRUBY_VERSION == '1.7.0' || JRUBY_VERSION == '1.7.3'
       exception.java_class.should be Java::JavaLangReflect::InvocationTargetException.java_class
     else
       # How can this be?  A nil exception?
@@ -58,7 +58,7 @@
       result = e.validate
       result.should == 0
     end
-  when '1.5.3', '1.6.5', '1.6.7.2', '1.7.0'
+  when '1.5.3', '1.6.5', '1.6.7.2', '1.7.0', '1.7.3'
     it 'should handle Fixnum -> Integer for ExprStub#eval' do
       e = ExprStub.new('x:int + y:int')
       result = e.eval(:x => 2, :y => 3)
diff --git a/spec/scope_spec.rb b/spec/scope_spec.rb
index 3bacd26..c32b277 100644
--- a/spec/scope_spec.rb
+++ b/spec/scope_spec.rb
@@ -22,7 +22,7 @@
       check_scope :values_fields => ['offset', 'line']
       assert_size_equals 2
 
-      split 'line', ['x', 'y'], :pattern => /,/
+      split 'line', /,/, ['x', 'y']
       check_scope :values_fields => ['offset', 'line', 'x', 'y']
       assert_size_equals 4
     end
@@ -33,7 +33,7 @@
       check_scope :values_fields => ['offset', 'line']
       assert_size_equals 2
 
-      split 'line', ['x', 'y'], :pattern => /,/, :output => ['x', 'y']
+      split 'line', /,/, ['x', 'y'], :output => ['x', 'y']
       check_scope :values_fields => ['x', 'y']
       assert_size_equals 2
     end
diff --git a/spec/spec_util.rb b/spec/spec_util.rb
index 890b554..127a994 100644
--- a/spec/spec_util.rb
+++ b/spec/spec_util.rb
@@ -2,14 +2,14 @@
 BUILD_DIR = 'build/spec'
 
 module ScopeTests
-  def check_scope(params = {})
-    name_params = [params[:source]].compact
-    scope = scope(*name_params)
-    values_fields = params[:values_fields]
-    grouping_fields = params[:grouping_fields] || values_fields
+  def check_scope(options = {})
+    name_options = [options[:source]].compact
+    scope = scope(*name_options)
+    values_fields = options[:values_fields]
+    grouping_fields = options[:grouping_fields] || values_fields
 
-    debug = params[:debug]
-    debug_scope(*name_params) if debug
+    debug = options[:debug]
+    debug_scope(*name_options) if debug
 
     scope.values_fields.to_a.should == values_fields
     scope.grouping_fields.to_a.should == grouping_fields
@@ -29,8 +29,8 @@ def test_flow(&block)
   cascade.complete
 end
 
-def test_assembly(params = {}, &block)
-  branches = params[:branches] || []
+def test_assembly(options = {}, &block)
+  branches = options[:branches] || []
 
   test_flow do
     source 'input', tap('spec/resource/test_input.txt', :scheme => text_line_scheme)
@@ -49,9 +49,9 @@ def test_assembly(params = {}, &block)
   end
 end
 
-def test_join_assembly(params = {}, &block)
-  branches = params[:branches] || []
-  post_join_block = params[:post_join_block]
+def test_join_assembly(options = {}, &block)
+  branches = options[:branches] || []
+  post_join_block = options[:post_join_block]
 
   test_flow do
     source 'left', tap('spec/resource/join_input.txt', :scheme => text_line_scheme)
@@ -63,13 +63,13 @@ def test_join_assembly(params = {}, &block)
 
     assembly 'left' do
       check_scope :values_fields => ['offset', 'line']
-      split 'line', ['x', 'y', 'z'], :pattern => /,/
+      split 'line', /,/, ['x', 'y', 'z']
       check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
     end
 
     assembly 'right' do
       check_scope :values_fields => ['offset', 'line']
-      split 'line', ['x', 'y', 'z'], :pattern => /,/
+      split 'line', /,/, ['x', 'y', 'z']
       check_scope :values_fields => ['offset', 'line', 'x', 'y', 'z']
     end
 
diff --git a/test/mock_assemblies.rb b/test/mock_assemblies.rb
index cfa8284..c926b9f 100644
--- a/test/mock_assemblies.rb
+++ b/test/mock_assemblies.rb
@@ -39,11 +39,11 @@ def mock_two_input_assembly(&block)
       source 'test2', tap('test/data/data2.txt')
 
       assembly 'test1' do
-        split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+        split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
       end
 
       assembly 'test2' do
-        split 'line', :pattern => /[.,]*\s+/, :into => ['name',  'id', 'town'], :output => ['name',  'id', 'town']
+        split 'line', /[.,]*\s+/, ['name',  'id', 'town'], :output => ['name',  'id', 'town']
       end
 
       assembly = assembly 'test', &block
diff --git a/test/test_assembly.rb b/test/test_assembly.rb
index bda8e28..1b03ffd 100644
--- a/test/test_assembly.rb
+++ b/test/test_assembly.rb
@@ -23,7 +23,7 @@ def test_create_assembly_simple
 
   def test_each_identity
     assembly = mock_assembly do
-      each 'offset', :function => identity
+      each 'offset', :function => Java::CascadingOperation::Identity.new
     end
 
     flow = assembly.parent
@@ -35,7 +35,7 @@ def test_each_identity
   def test_create_each
     # You can apply an Each to 0 fields
     assembly = mock_assembly do
-      each(:function => identity)
+      each(:function => Java::CascadingOperation::Identity.new)
     end
     assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
 
@@ -547,7 +547,7 @@ def test_hash_join_with_block
         end
       end
     end
-    assert_equal "hash joins don't support aggregations", ex.message
+    assert_equal "HashJoin doesn't support aggregations so the block provided to hash_join will be ignored", ex.message
   end
 
   def test_branch_unique
@@ -579,7 +579,7 @@ def test_branch_single
     assembly = mock_assembly do
       branch 'branch1' do
         branch 'branch2' do
-          each 'line', :function => identity
+          each 'line', :function => Java::CascadingOperation::Identity.new
         end
       end
     end
@@ -659,43 +659,41 @@ def test_sum_by_sub_assembly
     assert_equal ['line', 'sum'], assembly.scope.grouping_fields.to_a
   end
 
-  def test_empty_where
+  def test_where
     assembly = mock_assembly do
-      split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
-      where
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      where 'score1:double < score2:double'
     end
     assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
-
-    # Empty where compiles away
-    assert_equal Java::CascadingOperationRegex::RegexSplitter, assembly.tail_pipe.operation.class
+    assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
   end
 
-  def test_where
+  def test_where_with_import
     assembly = mock_assembly do
-      split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
-      where 'score1:double < score2:double'
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      names = ['SMITH', 'JONES', 'BROWN']
+      where "import java.util.Arrays;\nArrays.asList(new String[] { \"#{names.join('", "')}\" }).contains(name:string)"
     end
     assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
     assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
   end
 
-  def test_where_with_expression
+  def test_rename
     assembly = mock_assembly do
-      split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
-      where :expression => 'score1:double < score2:double'
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      rename 'score2' => 'new_score2', 'score1' => 'new_score1', 'name' => 'new_name'
     end
-    assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
-    assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
+    # Original order preserved
+    assert_equal ['new_name', 'new_score1', 'new_score2', 'id'], assembly.scope.values_fields.to_a
   end
 
-  def test_where_with_import
+  def test_copy
     assembly = mock_assembly do
-      split 'line', ['name', 'score1', 'score2', 'id'], :pattern => /[.,]*\s+/, :output => ['name', 'score1', 'score2', 'id']
-      names = ['SMITH', 'JONES', 'BROWN']
-      where "import java.util.Arrays;\nArrays.asList(new String[] { \"#{names.join('", "')}\" }).contains(name:string)"
+      split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+      copy 'score2' => 'new_score2', 'id' => 'new_id', 'name' => 'new_name'
     end
-    assert_equal Java::CascadingPipe::Each, assembly.tail_pipe.class
-    assert_equal Java::CascadingOperationExpression::ExpressionFilter, assembly.tail_pipe.operation.class
+    # Original order preserved in copied fields
+    assert_equal ['name', 'score1', 'score2', 'id', 'new_name', 'new_score2', 'new_id'], assembly.scope.values_fields.to_a
   end
 
   def test_smoke_test_describe
diff --git a/test/test_local_execution.rb b/test/test_local_execution.rb
index 5756acb..d262be1 100644
--- a/test/test_local_execution.rb
+++ b/test/test_local_execution.rb
@@ -36,7 +36,7 @@ def test_splitter
       source 'copy', tap('test/data/data1.txt')
 
       assembly 'copy' do
-        split 'line', :pattern => /[.,]*\s+/, :into=>['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+        split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
         assert_size_equals 4
         assert_not_null
         debug :print_fields => true
@@ -70,14 +70,14 @@ def test_join1
         source 'data2', tap('test/data/data2.txt')
 
         assembly1 = assembly 'data1' do
-          split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+          split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
           assert_size_equals 4
           assert_not_null
           debug :print_fields => true
         end
 
         assembly2 = assembly 'data2' do
-          split 'line', :pattern => /[.,]*\s+/, :into => ['name',  'id', 'town'], :output => ['name',  'id', 'town']
+          split 'line', /[.,]*\s+/, ['name', 'id', 'town'], :output => ['name', 'id', 'town']
           assert_size_equals 3
           assert_not_null
           debug :print_fields => true
@@ -106,12 +106,12 @@ def test_join2
       source 'data2', tap('test/data/data2.txt')
 
       assembly 'data1' do
-        split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+        split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
         debug :print_fields => true
       end
 
       assembly 'data2' do
-        split 'line', :pattern => /[.,]*\s+/, :into => ['name',  'code', 'town'], :output => ['name',  'code', 'town']
+        split 'line', /[.,]*\s+/, ['name', 'code', 'town'], :output => ['name', 'code', 'town']
         debug :print_fields => true
       end
 
@@ -135,7 +135,7 @@ def test_union
         source 'data2', tap('test/data/data2.txt')
 
         assembly 'data1' do
-          split 'line', :pattern => /[.,]*\s+/, :into => ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
+          split 'line', /[.,]*\s+/, ['name', 'score1', 'score2', 'id'], :output => ['name', 'score1', 'score2', 'id']
           assert_size_equals 4
           assert_not_null
 
@@ -144,7 +144,7 @@ def test_union
         end
 
         assembly 'data2' do
-          split 'line', :pattern => /[.,]*\s+/, :into => ['name',  'code', 'town'], :output => ['name',  'code', 'town']
+          split 'line', /[.,]*\s+/, ['name', 'code', 'town'], :output => ['name', 'code', 'town']
           assert_size_equals 3
           assert_not_null
 
diff --git a/test/test_operations.rb b/test/test_operations.rb
index 9f098d8..5d54d27 100644
--- a/test/test_operations.rb
+++ b/test/test_operations.rb
@@ -4,16 +4,6 @@
 class TC_Operations < Test::Unit::TestCase
   include Operations
 
-  def test_aggregator_function_ignore_values
-    min = min_function 'min_field', :ignore => [nil].to_java(:string)
-    assert_not_nil min
-  end
-
-  def test_aggregator_function_ignore_tuples
-    first = first_function 'first_field', :ignore => [Java::CascadingTuple::Tuple.new(-1)].to_java(Java::CascadingTuple::Tuple)
-    assert_not_nil first
-  end
-
   def test_coerce_to_java_int
     result = coerce_to_java(1)