diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index a6c1597162c05..db1ead15e855a 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1931,14 +1931,14 @@ dependencies = [ [[package]] name = "nix" -version = "0.25.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a" dependencies = [ - "autocfg", "bitflags", "cfg-if", "libc", + "static_assertions", ] [[package]] @@ -2548,9 +2548,9 @@ checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" [[package]] name = "rustyline" -version = "10.1.1" +version = "11.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e83c32c3f3c33b08496e0d1df9ea8c64d39adb8eb36a1ebb1440c690697aef" +checksum = "5dfc8644681285d1fb67a467fb3021bfea306b99b4146b166a1fe3ada965eece" dependencies = [ "bitflags", "cfg-if", diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 7106307435320..77d84468f9470 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -38,6 +38,6 @@ env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } object_store = { version = "0.5.5", features = ["aws", "gcp", "aws_profile"] } parking_lot = { version = "0.12" } -rustyline = "10.0" +rustyline = "11.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } url = "2.2" diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index 1497154b28d57..1d8950256a6c0 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -19,7 +19,7 @@ use crate::{ command::{Command, OutputFormat}, - helper::CliHelper, + helper::{unescape_input, CliHelper}, object_storage::{ get_gcs_object_store_builder, get_oss_object_store_builder, get_s3_object_store_builder, @@ -58,9 +58,12 @@ pub async fn exec_from_lines( let line = line.trim_end(); query.push_str(line); if line.ends_with(';') { - match exec_and_print(ctx, print_options, query).await { - Ok(_) => {} - Err(err) => println!("{err}"), + match unescape_input(line) { + Ok(sql) => match exec_and_print(ctx, print_options, sql).await { + Ok(_) => {} + Err(err) => eprintln!("{err}"), + }, + Err(err) => eprintln!("{err}"), } query = "".to_owned(); } else { @@ -102,7 +105,7 @@ pub async fn exec_from_repl( ctx: &mut SessionContext, print_options: &mut PrintOptions, ) -> rustyline::Result<()> { - let mut rl = Editor::::new()?; + let mut rl = Editor::new()?; rl.set_helper(Some(CliHelper::default())); rl.load_history(".history").ok(); @@ -111,7 +114,7 @@ pub async fn exec_from_repl( loop { match rl.readline("❯ ") { Ok(line) if line.starts_with('\\') => { - rl.add_history_entry(line.trim_end()); + rl.add_history_entry(line.trim_end())?; let command = line.split_whitespace().collect::>().join(" "); if let Ok(cmd) = &command[1..].parse::() { match cmd { @@ -145,9 +148,12 @@ pub async fn exec_from_repl( } } Ok(line) => { - rl.add_history_entry(line.trim_end()); - match exec_and_print(ctx, &print_options, line).await { - Ok(_) => {} + rl.add_history_entry(line.trim_end())?; + match unescape_input(&line) { + Ok(sql) => match exec_and_print(ctx, &print_options, sql).await { + Ok(_) => {} + Err(err) => eprintln!("{err}"), + }, Err(err) => eprintln!("{err}"), } } @@ -179,7 +185,7 @@ async fn exec_and_print( let plan = ctx.state().create_logical_plan(&sql).await?; let df = match &plan { LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) => { - create_external_table(&ctx, cmd)?; + create_external_table(ctx, cmd)?; ctx.execute_logical_plan(plan).await? } _ => ctx.execute_logical_plan(plan).await?, diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs index 42eeb83f86e93..15464eec13a05 100644 --- a/datafusion-cli/src/helper.rs +++ b/datafusion-cli/src/helper.rs @@ -18,7 +18,9 @@ //! Helper that helps with interactive editing, including multi-line parsing and validation, //! and auto-completion for file name during creating external table. +use datafusion::error::DataFusionError; use datafusion::sql::parser::{DFParser, Statement}; +use datafusion::sql::sqlparser::parser::ParserError; use rustyline::completion::Completer; use rustyline::completion::FilenameCompleter; use rustyline::completion::Pair; @@ -37,6 +39,38 @@ pub struct CliHelper { completer: FilenameCompleter, } +impl CliHelper { + fn validate_input(&self, input: &str) -> Result { + if let Some(sql) = input.strip_suffix(';') { + let sql = match unescape_input(sql) { + Ok(sql) => sql, + Err(err) => { + return Ok(ValidationResult::Invalid(Some(format!( + " 🤔 Invalid statement: {err}", + )))) + } + }; + match DFParser::parse_sql(&sql) { + Ok(statements) if statements.is_empty() => Ok(ValidationResult::Invalid( + Some(" 🤔 You entered an empty statement".to_string()), + )), + Ok(statements) if statements.len() > 1 => Ok(ValidationResult::Invalid( + Some(" 🤔 You entered more than one statement".to_string()), + )), + Ok(_statements) => Ok(ValidationResult::Valid(None)), + Err(err) => Ok(ValidationResult::Invalid(Some(format!( + " 🤔 Invalid statement: {err}", + )))), + } + } else if input.starts_with('\\') { + // command + Ok(ValidationResult::Valid(None)) + } else { + Ok(ValidationResult::Incomplete) + } + } +} + impl Highlighter for CliHelper {} impl Hinter for CliHelper { @@ -76,27 +110,117 @@ impl Completer for CliHelper { impl Validator for CliHelper { fn validate(&self, ctx: &mut ValidationContext<'_>) -> Result { let input = ctx.input().trim_end(); - if let Some(sql) = input.strip_suffix(';') { - match DFParser::parse_sql(sql) { - Ok(statements) if statements.is_empty() => Ok(ValidationResult::Invalid( - Some(" 🤔 You entered an empty statement".to_string()), - )), - Ok(statements) if statements.len() > 1 => Ok(ValidationResult::Invalid( - Some(" 🤔 You entered more than one statement".to_string()), - )), - Ok(_statements) => Ok(ValidationResult::Valid(None)), - Err(err) => Ok(ValidationResult::Invalid(Some(format!( - " 🤔 Invalid statement: {}", - err - )))), + self.validate_input(input) + } +} + +impl Helper for CliHelper {} + +/// Unescape input string from readline. +/// +/// The data read from stdio will be escaped, so we need to unescape the input before executing the input +pub fn unescape_input(input: &str) -> datafusion::error::Result { + let mut chars = input.chars(); + + let mut result = String::with_capacity(input.len()); + while let Some(char) = chars.next() { + if char == '\\' { + if let Some(next_char) = chars.next() { + // https://static.rust-lang.org/doc/master/reference.html#literals + result.push(match next_char { + '0' => '\0', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + _ => { + return Err(DataFusionError::SQL(ParserError::TokenizerError( + format!("unsupported escape char: '\\{}'", next_char), + ))) + } + }); } - } else if input.starts_with('\\') { - // command - Ok(ValidationResult::Valid(None)) } else { - Ok(ValidationResult::Incomplete) + result.push(char); } } + + Ok(result) } -impl Helper for CliHelper {} +#[cfg(test)] +mod tests { + use std::io::{BufRead, Cursor}; + + use super::*; + + fn readline_direct( + mut reader: impl BufRead, + validator: &CliHelper, + ) -> Result { + let mut input = String::new(); + + if reader.read_line(&mut input)? == 0 { + return Err(ReadlineError::Eof); + } + + validator.validate_input(&input) + } + + #[test] + fn unescape_readline_input() -> Result<()> { + let validator = CliHelper::default(); + + // shoule be valid + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter ',';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\0';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\n';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\r';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\t';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\\';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + // should be invalid + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter ',,';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Invalid(Some(_)))); + + let result = readline_direct( + Cursor::new(r"create external table test stored as csv location 'data.csv' delimiter '\u{07}';".as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Invalid(Some(_)))); + + Ok(()) + } +}