Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
db361ae
add skeleton for StructsToJson
andygrove Aug 10, 2024
9f8cab1
first test passes
andygrove Aug 10, 2024
65be1d4
add support for nested structs
andygrove Aug 10, 2024
779344e
add support for strings and improve test
andygrove Aug 10, 2024
e769d2f
clippy
andygrove Aug 11, 2024
5eccf37
format
andygrove Aug 11, 2024
636b95a
prepare for review
andygrove Aug 11, 2024
08ea988
remove perf results
andygrove Aug 11, 2024
05bbc5d
update user guide
andygrove Aug 11, 2024
525d260
add microbenchmark
andygrove Aug 11, 2024
5c2f551
remove comment
andygrove Aug 11, 2024
6976d1f
update docs
andygrove Aug 12, 2024
c8e0f34
Merge remote-tracking branch 'apache/main' into to-json
andygrove Aug 12, 2024
0225642
reduce size of diff
andygrove Aug 12, 2024
4488da6
Merge remote-tracking branch 'apache/main' into to-json
andygrove Aug 25, 2024
d2f55e2
add failing test for quotes in field names and values
andygrove Aug 25, 2024
d327a68
test passes
andygrove Aug 25, 2024
d84f294
clippy
andygrove Aug 25, 2024
d1b6b24
revert a docs change
andygrove Aug 25, 2024
c34b69a
Update native/spark-expr/src/to_json.rs
andygrove Aug 25, 2024
8e6ca9f
address feedback
andygrove Aug 25, 2024
6910b65
Merge branch 'to-json' of github.com:andygrove/datafusion-comet into …
andygrove Aug 25, 2024
e76501d
support tabs
andygrove Aug 26, 2024
7402e56
newlines
andygrove Aug 26, 2024
e17506d
backspace
andygrove Aug 26, 2024
a1a7f21
clippy
andygrove Aug 26, 2024
5b23f1f
fix test regression
andygrove Aug 28, 2024
d3d2201
upmerge
andygrove Aug 28, 2024
525c9eb
cargo fmt
andygrove Aug 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion docs/source/user-guide/expressions.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,14 @@ The following Spark expressions are currently available. Any known compatibility
| VariancePop | |
| VarianceSamp | |

## Complex Types

| Expression | Notes |
| ----------------- | ----- |
| CreateNamedStruct | |
| GetElementAt | |
| StructsToJson | |

## Other

| Expression | Notes |
Expand All @@ -191,4 +199,3 @@ The following Spark expressions are currently available. Any known compatibility
| ScalarSubquery | |
| Coalesce | |
| NormalizeNaNAndZero | |
| CreateNamedStruct | |
6 changes: 5 additions & 1 deletion native/core/src/execution/datafusion/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ use datafusion_comet_proto::{
};
use datafusion_comet_spark_expr::{
Cast, CreateNamedStruct, DateTruncExpr, GetStructField, HourExpr, IfExpr, MinuteExpr, RLike,
SecondExpr, TimestampTruncExpr,
SecondExpr, TimestampTruncExpr, ToJson,
};
use datafusion_common::scalar::ScalarStructBuilder;
use datafusion_common::{
Expand Down Expand Up @@ -655,6 +655,10 @@ impl PhysicalPlanner {
self.create_expr(expr.child.as_ref().unwrap(), Arc::clone(&input_schema))?;
Ok(Arc::new(GetStructField::new(child, expr.ordinal as usize)))
}
ExprStruct::ToJson(expr) => {
let child = self.create_expr(expr.child.as_ref().unwrap(), input_schema)?;
Ok(Arc::new(ToJson::new(child, &expr.timezone)))
}
expr => Err(ExecutionError::GeneralError(format!(
"Not implemented: {:?}",
expr
Expand Down
10 changes: 10 additions & 0 deletions native/proto/src/proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ message Expr {
BloomFilterMightContain bloom_filter_might_contain = 52;
CreateNamedStruct create_named_struct = 53;
GetStructField get_struct_field = 54;
ToJson to_json = 55;
}
}

Expand Down Expand Up @@ -343,6 +344,15 @@ message StringSpace {
Expr child = 1;
}

message ToJson {
Expr child = 1;
string timezone = 2;
string date_format = 3;
string timestamp_format = 4;
string timestamp_ntz_format = 5;
bool ignore_null_fields = 6;
}

message Hour {
Expr child = 1;
string timezone = 2;
Expand Down
4 changes: 2 additions & 2 deletions native/spark-expr/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ pub fn spark_cast(
arg: ColumnarValue,
data_type: &DataType,
eval_mode: EvalMode,
timezone: String,
timezone: &str,
allow_incompat: bool,
) -> DataFusionResult<ColumnarValue> {
match arg {
Expand Down Expand Up @@ -1414,7 +1414,7 @@ impl PhysicalExpr for Cast {
arg,
&self.data_type,
self.eval_mode,
self.timezone.clone(),
&self.timezone,
self.allow_incompat,
)
}
Expand Down
2 changes: 2 additions & 0 deletions native/spark-expr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub mod spark_hash;
mod structs;
mod temporal;
pub mod timezone;
mod to_json;
pub mod utils;
mod xxhash64;

Expand All @@ -39,6 +40,7 @@ pub use if_expr::IfExpr;
pub use regexp::RLike;
pub use structs::{CreateNamedStruct, GetStructField};
pub use temporal::{DateTruncExpr, HourExpr, MinuteExpr, SecondExpr, TimestampTruncExpr};
pub use to_json::ToJson;

/// Spark supports three evaluation modes when evaluating expressions, which affect
/// the behavior when processing input values that are invalid or would result in an
Expand Down
Loading