Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ rand = "0.7"
csv = "1.1"
num = "0.3"
regex = "1.3"
lazy_static = "1.4"
packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
chrono = "0.4"
flatbuffers = "0.6"
Expand Down
38 changes: 20 additions & 18 deletions rust/arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
//! let batch = csv.next().unwrap().unwrap();
//! ```

use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::sync::Arc;
Expand All @@ -60,13 +58,8 @@ use crate::{

use self::csv_crate::{Error, StringRecord, StringRecordsIntoIter};

lazy_static! {
static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
.case_insensitive(true)
.build()
.unwrap();
fn all_digit(string: &str) -> bool {
string.chars().all(|c| c.is_ascii_digit())
}

/// Infer the data type of a record
Expand All @@ -77,14 +70,23 @@ fn infer_field_schema(string: &str) -> DataType {
return DataType::Utf8;
}
// match regex in a particular order
if BOOLEAN_RE.is_match(string) {
DataType::Boolean
} else if DECIMAL_RE.is_match(string) {
DataType::Float64
} else if INTEGER_RE.is_match(string) {
DataType::Int64
} else {
DataType::Utf8
if string.eq_ignore_ascii_case("true") || string.eq_ignore_ascii_case("false") {
return DataType::Boolean;
}
let skip_minus = if string.starts_with('-') { 1 } else { 0 };
let mut parts = string[skip_minus..].splitn(3, '.');

match (parts.next(), parts.next()) {
(Some(left), None) if all_digit(left) => DataType::Int64,
(Some(left), Some(right)) if all_digit(left) && all_digit(right) => {
let no_remainder = parts.next().is_none();
if no_remainder {
DataType::Float64
} else {
DataType::Utf8
}
}
_ => DataType::Utf8,
}
}

Expand Down Expand Up @@ -888,7 +890,7 @@ mod tests {
format!("{:?}", e)
),
Ok(_) => panic!("should have failed"),
}
},
None => panic!("should have failed"),
}
}
Expand Down