apache · alamb · Mar 15, 2021 · Mar 19, 2021 · returnString · Mar 17, 2021
diff --git a/rust/arrow/src/json/writer.rs b/rust/arrow/src/json/writer.rs
@@ -329,7 +329,7 @@ fn set_column_for_json_rows(
 }
 
 /// Converts an arrow [`RecordBatch`] into a `Vec` of Serde JSON
-/// [`serde_json::map::JsonMap`]s (objects)
+/// [`JsonMap`]s (objects)
 pub fn record_batches_to_json_rows(
     batches: &[RecordBatch],
 ) -> Vec<JsonMap<String, Value>> {

diff --git a/rust/datafusion/README.md b/rust/datafusion/README.md
@@ -58,6 +58,69 @@ Here are some of the projects known to use DataFusion:
 
 (if you know of another project, please submit a PR to add a link!)
 
+## Example Usage
+
+Run a SQL query against data stored in a CSV:
+
+```rust
+use datafusion::prelude::*;
+use arrow::util::pretty::print_batches;
+use arrow::record_batch::RecordBatch;
+
+#[tokio::main]
+async fn main() -> datafusion::error::Result<()> {
+  // create the dataframe
+  let mut ctx = ExecutionContext::new();
+  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
+
+  let mut ctx = ExecutionContext::new();
+  ctx.register_csv("example", "tests/example.csv", CsvReadOptions::new())?;
+
+  // create a plan to run a SQL query
+  let df = ctx.sql("SELECT a, MIN(b) FROM example GROUP BY a LIMIT 100")?;
+
+  // execute and print results
+  let results: Vec<RecordBatch> = df.collect().await?;
+  print_batches(&results)?;
+  Ok(())
+}
+```
+
+Use the DataFrame API to process data stored in a CSV:
+
+```rust
+use datafusion::prelude::*;
+use arrow::util::pretty::print_batches;
+use arrow::record_batch::RecordBatch;
+
+#[tokio::main]
+async fn main() -> datafusion::error::Result<()> {
+  // create the dataframe
+  let mut ctx = ExecutionContext::new();
+  let df = ctx.read_csv("tests/example.csv", CsvReadOptions::new())?;
+
+  let df = df.filter(col("a").lt_eq(col("b")))?
+           .aggregate(&[col("a")], &[min(col("b"))])?
+           .limit(100)?;
+
+  // execute and print results
+  let results: Vec<RecordBatch> = df.collect().await?;
+  print_batches(&results)?;
+  Ok(())
+}
+```
+
+Both of these examples will produce
+
+```text
++---+--------+
+| a | MIN(b) |
++---+--------+
+| 1 | 2      |
++---+--------+
+```
+
+
 
 ## Using DataFusion as a library
 

diff --git a/rust/datafusion/src/lib.rs b/rust/datafusion/src/lib.rs
@@ -31,7 +31,8 @@
 //! as well as a query optimizer and execution engine capable of parallel execution
 //! against partitioned data sources (CSV and Parquet) using threads.
 //!
-//! Below is an example of how to execute a query against a CSV using [`DataFrames`](dataframe::DataFrame):
+//! Below is an example of how to execute a query against data stored
+//! in a CSV file using a [`DataFrame`](dataframe::DataFrame):
 //!
 //! ```rust
 //! # use datafusion::prelude::*;
@@ -52,6 +53,19 @@
 //!
 //! // execute the plan
 //! let results: Vec<RecordBatch> = df.collect().await?;
+//!
+//! // format the results
+//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
+//!
+//! let expected = vec![
+//!     "+---+--------+",
+//!     "| a | MIN(b) |",
+//!     "+---+--------+",
+//!     "| 1 | 2      |",
+//!     "+---+--------+"
+//! ];
+//!
+//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
 //! # Ok(())
 //! # }
 //! ```
@@ -74,6 +88,19 @@
 //!
 //! // execute the plan
 //! let results: Vec<RecordBatch> = df.collect().await?;
+//!
+//! // format the results
+//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?;
+//!
+//! let expected = vec![
+//!     "+---+--------+",
+//!     "| a | MIN(b) |",
+//!     "+---+--------+",
+//!     "| 1 | 2      |",
+//!     "+---+--------+"
+//! ];
+//!
+//! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
 //! # Ok(())
 //! # }
 //! ```

diff --git a/rust/datafusion/src/physical_plan/regex_expressions.rs b/rust/datafusion/src/physical_plan/regex_expressions.rs
@@ -54,8 +54,9 @@ fn regex_replace_posix_groups(replacement: &str) -> String {
         .into_owned()
 }
 
-/// Replaces substring(s) matching a POSIX regular expression
-/// regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'
+/// Replaces substring(s) matching a POSIX regular expression.
+///
+/// example: `regexp_replace('Thomas', '.[mN]a.', 'M') = 'ThM'`
 pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     // creating Regex is expensive so create hashmap for memoization
     let mut patterns: HashMap<String, Regex> = HashMap::new();