diff --git a/.travis.yml b/.travis.yml index 2a186885..c85ed936 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,8 +39,9 @@ install: - if [ "$BUILD_ANDROID" == true ]; then ./compile-android.sh; else - cargo build --bin aw-server-rust $($RELEASE && echo '--release'); cargo build --lib $($RELEASE && echo '--release'); + cargo build --bin aw-server-rust $($RELEASE && echo '--release'); + cargo build --bin aw-sync-rust $($RELEASE && echo '--release'); fi script: diff --git a/Cargo.lock b/Cargo.lock index 27462912..fcfed585 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,6 +79,7 @@ dependencies = [ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "multipart 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)", "plex 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rocket 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rocket_contrib 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rocket_cors 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 3d950feb..8c7b506c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ log = "0.4" fern = { version = "0.5", features = ["colored"] } toml = "0.5" gethostname = "0.2" +regex = "1.0" [target.'cfg(target_os="android")'.dependencies] jni = { version = "0.5", default-features = false } diff --git a/src/models/event.rs b/src/models/event.rs index 7daf46f5..6ccf1a45 100644 --- a/src/models/event.rs +++ b/src/models/event.rs @@ -30,6 +30,17 @@ impl PartialEq for Event { } } +impl Default for Event { + fn default() -> Self { + Event { + id: None, + timestamp: Utc::now(), + duration: Duration::seconds(0), + data: serde_json::Map::new() + } + } +} + fn default_duration() -> Duration { Duration::seconds(0) } diff --git a/src/query/datatype.rs b/src/query/datatype.rs new file mode 100644 index 00000000..dc156d1d --- /dev/null +++ b/src/query/datatype.rs @@ -0,0 +1,247 @@ +use std::collections::HashMap; +use std::fmt; +use std::convert::{TryFrom,TryInto}; + +use crate::models::Event; +use crate::transform::classify::Rule; +use super::QueryError; +use super::functions; + +use serde::Serializer; +use serde_json::value::Value; +use serde_json::Number; + + +// TODO: greater/less comparisons + +#[derive(Clone,Serialize)] +#[serde(untagged)] +pub enum DataType { + None(), + Bool(bool), + Number(f64), + String(String), + Event(Event), + List(Vec), + Dict(HashMap), + #[serde(serialize_with = "serialize_function")] + Function(String, functions::QueryFn), +} + +fn serialize_function(_element: &String, _fun: &functions::QueryFn, _serializer: S) -> Result + where S: Serializer +{ + panic!("Query function was unevaluated and was attempted to be serialized, panic!"); + //element.id.serialize(serializer) +} + + +// Needed because of a limitation in rust where you cannot derive(Debug) on a +// enum which has a fn with reference parameters which our QueryFn has +// https://stackoverflow.com/questions/53380040/function-pointer-with-a-reference-argument-cannot-derive-debug +impl fmt::Debug for DataType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DataType::None() => write!(f, "None()"), + DataType::Bool(b) => write!(f, "Bool({})", b), + DataType::Number(n) => write!(f, "Number({})", n), + DataType::String(s) => write!(f, "String({})", s), + DataType::Event(e) => write!(f, "Event({:?})", e), + DataType::List(l) => write!(f, "List({:?})", l), + DataType::Dict(d) => write!(f, "Dict({:?})", d), + DataType::Function(name, _fun) => write!(f, "Function({})", name), + } + } +} + +/* Like eq, but raises an error when comparing between different types. + * Should be used as often as possible */ +impl DataType { + pub fn query_eq(&self, other: &DataType) -> Result { + match (self, other) { + // TODO: Comparisons of bool == num, bool == str + (DataType::None(), DataType::None()) => Ok(false), + (DataType::Bool(b1), DataType::Bool(b2)) => Ok(b1 == b2), + (DataType::Number(n1), DataType::Number(n2)) => Ok(n1 == n2), + (DataType::String(s1), DataType::String(s2)) => Ok(s1 == s2), + (DataType::Event(e1), DataType::Event(e2)) => Ok(e1 == e2), + (DataType::List(l1), DataType::List(l2)) => Ok(l1 == l2), + (DataType::Dict(d1), DataType::Dict(d2)) => Ok(d1 == d2), + // We do not care about comparing functions + _ => Err(QueryError::InvalidType(format!("Cannot compare values of different types {:?} and {:?}", self, other))), + } + } +} + +/* Required for query_eq when comparing two dicts */ +impl PartialEq for DataType { + fn eq(&self, other: &DataType) -> bool { + match (self, other) { + (DataType::None(), DataType::None()) => true, + // TODO: Comparisons of bool == num, bool == str + (DataType::Bool(b1), DataType::Bool(b2)) => b1 == b2, + (DataType::Number(n1), DataType::Number(n2)) => n1 == n2, + (DataType::String(s1), DataType::String(s2)) => s1 == s2, + (DataType::Event(e1), DataType::Event(e2)) => e1 == e2, + (DataType::List(l1), DataType::List(l2)) => l1 == l2, + (DataType::Dict(d1), DataType::Dict(d2)) => d1 == d2, + // We do not care about comparing functions + _ => false + } + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::List(ref s) => Ok(s.clone()), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List, got {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for String { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::String(s) => Ok(s.clone()), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List of Strings, list contains {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_strings: Vec = value.try_into()?; + let mut strings = Vec::new(); + for string in tagged_strings.drain(..) { + let s: String = (&string).try_into()?; + strings.push(s); + } + return Ok(strings); + } +} + +impl TryFrom<&DataType> for Rule { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let rulemap: HashMap = (match value { + DataType::Dict(d) => { + let map: HashMap = d.iter().map(|(k, v)| { + let s: String = v.try_into().unwrap(); + (k.clone(), s.clone()) + }).collect(); + Ok(map) + }, + _ => Err(QueryError::InvalidFunctionParameters( + format!("Expected rule, found something else") + )) + })?; + Ok(Rule::from(rulemap)) + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_events: Vec = value.try_into()?; + let mut events = Vec::new(); + for event in tagged_events.drain(..) { + match event { + DataType::Event(e) => events.push(e.clone()), + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List of Events, list contains {:?}", invalid_type) + )) + } + } + return Ok(events); + } +} + +impl TryFrom<&DataType> for Vec<(String, Rule)> { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_lists: Vec = value.try_into()?; + let mut lists: Vec<(String, Rule)> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(ref l) => { + let tag: String = l.get(0).unwrap().try_into()?; + let rule: Rule = l.get(1).unwrap().try_into()?; + lists.push((tag, rule)); + }, + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of (tag, rule) tuples, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } +} + +impl TryFrom<&DataType> for Vec<(Vec, Rule)> { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_lists: Vec = value.try_into()?; + let mut lists: Vec<(Vec, Rule)> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(ref l) => { + let category: Vec = l.get(0).unwrap().try_into()?; + let rule: Rule = l.get(1).unwrap().try_into()?; + lists.push((category, rule)); + }, + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of (category, rule) tuples, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } +} + +impl TryFrom<&DataType> for f64 { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::Number(f) => Ok(*f), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type Number, got {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for usize { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let f: f64 = value.try_into()?; + Ok(f as usize) + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_strings: Vec = value.try_into()?; + let mut strings = Vec::new(); + for string in tagged_strings.drain(..) { + match string { + DataType::String(s) => strings.push(Value::String(s)), + DataType::Number(n) => strings.push(Value::Number(Number::from_f64(n).unwrap())), + //DataType::Bool(b) => strings.push(json!(b)), + DataType::None() => strings.push(Value::Null), + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Query2 support for parsing values is limited and only supports strings, numbers and null, list contains {:?}", invalid_type) + )) + } + } + return Ok(strings); + } +} diff --git a/src/query/functions.rs b/src/query/functions.rs index 1b1a763b..d0e75ad6 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -1,7 +1,7 @@ +use std::collections::HashMap; use crate::query::DataType; use crate::query::QueryError; use crate::datastore::Datastore; -use std::collections::HashMap; pub type QueryFn = fn(args: Vec, env: &HashMap<&str, DataType>, ds: &Datastore) -> Result; @@ -21,17 +21,22 @@ pub fn fill_env<'a>(env: &mut HashMap<&'a str, DataType>) { env.insert("filter_period_intersect", DataType::Function("filter_period_intersect".to_string(), qfunctions::filter_period_intersect)); env.insert("split_url_events", DataType::Function("split_url_events".to_string(), qfunctions::split_url_events)); env.insert("concat", DataType::Function("concat".to_string(), qfunctions::concat)); + env.insert("categorize", DataType::Function("categorize".into(), qfunctions::categorize)); + env.insert("tag", DataType::Function("tag".into(), qfunctions::tag)); } mod qfunctions { + use std::convert::TryFrom; + use std::convert::TryInto; + use std::collections::HashMap; + use crate::transform::classify::Rule; use crate::query::DataType; use crate::query::QueryError; use crate::datastore::Datastore; + use crate::models::Event; use crate::transform; use super::validate; - use std::collections::HashMap; - pub fn print(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { for arg in args { info!("{:?}", arg); @@ -42,10 +47,10 @@ mod qfunctions { pub fn query_bucket(args: Vec, env: &HashMap<&str, DataType>, ds: &Datastore) -> Result { // Typecheck validate::args_length(&args, 1)?; - let bucket_id = validate::arg_type_string(&args[0])?; + let bucket_id: String = (&args[0]).try_into()?; let interval = validate::get_timeinterval (env)?; - let events = match ds.get_events(bucket_id, Some(interval.start().clone()), Some(interval.end().clone()), None) { + let events = match ds.get_events(bucket_id.as_str(), Some(interval.start().clone()), Some(interval.end().clone()), None) { Ok(events) => events, Err(e) => return Err(QueryError::BucketQueryError(format!("Failed to query bucket: {:?}", e))) }; @@ -90,7 +95,7 @@ mod qfunctions { pub fn flood(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let events = validate::arg_type_event_list(&args[0])?.clone(); + let events: Vec = (&args[0]).try_into()?; // Run flood let mut flooded_events = transform::flood(events, chrono::Duration::seconds(5)); // Put events back into DataType::Event container @@ -101,10 +106,48 @@ mod qfunctions { return Ok(DataType::List(tagged_flooded_events)); } + pub fn categorize( + args: Vec, + _env: &HashMap<&str, DataType>, + _ds: &Datastore, + ) -> Result { + // typecheck + validate::args_length(&args, 2)?; + let events: Vec = Vec::try_from(&args[0])?; + let rules: Vec<(Vec, Rule)> = Vec::try_from(&args[1])?; + // Run categorize + let mut flooded_events = transform::classify::categorize(events, &rules); + // Put events back into DataType::Event container + let mut tagged_flooded_events = Vec::new(); + for event in flooded_events.drain(..) { + tagged_flooded_events.push(DataType::Event(event)); + } + return Ok(DataType::List(tagged_flooded_events)); + } + + pub fn tag( + args: Vec, + _env: &HashMap<&str, DataType>, + _ds: &Datastore, + ) -> Result { + // typecheck + validate::args_length(&args, 2)?; + let events: Vec = Vec::try_from(&args[0])?; + let rules: Vec<(String, Rule)> = Vec::try_from(&args[1])?; + // Run categorize + let mut flooded_events = transform::classify::tag(events, &rules); + // Put events back into DataType::Event container + let mut tagged_flooded_events = Vec::new(); + for event in flooded_events.drain(..) { + tagged_flooded_events.push(DataType::Event(event)); + } + return Ok(DataType::List(tagged_flooded_events)); + } + pub fn sort_by_duration(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let events = validate::arg_type_event_list(&args[0])?; + let events: Vec = (&args[0]).try_into()?; // Sort by duration let mut sorted_events = transform::sort_by_duration(events); @@ -119,8 +162,8 @@ mod qfunctions { pub fn limit_events(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let mut events = validate::arg_type_event_list(&args[0])?.clone(); - let mut limit = validate::arg_type_number(&args[1])? as usize; + let mut events: Vec = (&args[0]).try_into()?; + let mut limit: usize = (&args[1]).try_into()?; if events.len() < limit { limit = events.len() } let mut limited_tagged_events = Vec::new(); @@ -133,7 +176,7 @@ mod qfunctions { pub fn sort_by_timestamp(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let events = validate::arg_type_event_list(&args[0])?; + let events: Vec = (&args[0]).try_into()?; // Sort by duration let mut sorted_events = transform::sort_by_timestamp(events); @@ -148,7 +191,7 @@ mod qfunctions { pub fn sum_durations(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let mut events = validate::arg_type_event_list(&args[0])?.clone(); + let mut events: Vec = (&args[0]).try_into()?; // Sort by duration let mut sum_durations = chrono::Duration::zero(); @@ -161,8 +204,8 @@ mod qfunctions { pub fn merge_events_by_keys(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?; - let keys = validate::arg_type_string_list(&args[1])?; + let events: Vec = (&args[0]).try_into()?; + let keys: Vec = (&args[1]).try_into()?; let mut merged_events = transform::merge_events_by_keys(events, keys); let mut merged_tagged_events = Vec::new(); @@ -175,8 +218,8 @@ mod qfunctions { pub fn chunk_events_by_key(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?; - let key = validate::arg_type_string(&args[1])?; + let events: Vec = (&args[0]).try_into()?; + let key: String = (&args[1]).try_into()?; let mut merged_events = transform::chunk_events_by_key(events, &key); let mut merged_tagged_events = Vec::new(); @@ -189,9 +232,9 @@ mod qfunctions { pub fn filter_keyvals(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 3)?; - let events = validate::arg_type_event_list(&args[0])?; - let key = validate::arg_type_string(&args[1])?; - let vals = validate::arg_type_value_list(&args[2])?; + let events = (&args[0]).try_into()?; + let key: String = (&args[1]).try_into()?; + let vals: Vec<_> = (&args[2]).try_into()?; let mut filtered_events = transform::filter_keyvals(events, &key, &vals); let mut filtered_tagged_events = Vec::new(); @@ -204,8 +247,8 @@ mod qfunctions { pub fn filter_period_intersect(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?; - let filter_events = validate::arg_type_event_list(&args[1])?; + let events = (&args[0]).try_into()?; + let filter_events = (&args[1]).try_into()?; let mut filtered_events = transform::filter_period_intersect(&events, &filter_events); let mut filtered_tagged_events = Vec::new(); @@ -218,7 +261,7 @@ mod qfunctions { pub fn split_url_events(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let mut events = validate::arg_type_event_list(&args[0])?; + let mut events: Vec = (&args[0]).try_into()?; let mut tagged_split_url_events = Vec::new(); for mut event in events.drain(..) { @@ -231,7 +274,7 @@ mod qfunctions { pub fn concat(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { let mut event_list = Vec::new(); for arg in args { - let mut events = validate::arg_type_event_list(&arg)?; + let mut events: Vec = (&arg).try_into()?; for event in events.drain(..) { event_list.push(DataType::Event(event)); } @@ -242,7 +285,6 @@ mod qfunctions { mod validate { use crate::query::{QueryError, DataType}; - use crate::models::Event; use crate::models::TimeInterval; use std::collections::HashMap; @@ -255,80 +297,6 @@ mod validate { return Ok(()); } - pub fn arg_type_string (arg: &DataType) -> Result<&String, QueryError> { - match arg { - DataType::String(ref s) => Ok(s), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type String, got {:?}", invalid_type) - )) - } - } - - pub fn arg_type_number (arg: &DataType) -> Result { - match arg { - DataType::Number(f) => Ok(*f), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type Number, got {:?}", invalid_type) - )) - } - } - - pub fn arg_type_list (arg: &DataType) -> Result<&Vec, QueryError> { - match arg { - DataType::List(ref s) => Ok(s), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List, got {:?}", invalid_type) - )) - } - } - - pub fn arg_type_event_list (arg: &DataType) -> Result, QueryError> { - let mut tagged_events = arg_type_list(arg)?.clone(); - let mut events = Vec::new(); - for event in tagged_events.drain(..) { - match event { - DataType::Event(e) => events.push(e.clone()), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List of Events, list contains {:?}", invalid_type) - )) - } - } - return Ok(events); - } - - pub fn arg_type_string_list (arg: &DataType) -> Result, QueryError> { - let mut tagged_strings = arg_type_list(arg)?.clone(); - let mut strings = Vec::new(); - for string in tagged_strings.drain(..) { - match string { - DataType::String(s) => strings.push(s.clone()), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List of Strings, list contains {:?}", invalid_type) - )) - } - } - return Ok(strings); - } - - use serde_json::value::Value; - use serde_json::Number; - pub fn arg_type_value_list (arg: &DataType) -> Result, QueryError> { - let mut tagged_strings = arg_type_list(arg)?.clone(); - let mut strings = Vec::new(); - for string in tagged_strings.drain(..) { - match string { - DataType::String(s) => strings.push(Value::String(s)), - DataType::Number(n) => strings.push(Value::Number(Number::from_f64(n).unwrap())), - //DataType::Bool(b) => strings.push(json!(b)), - DataType::None() => strings.push(Value::Null), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Query2 support for parsing values is limited and only supports strings, numbers and null, list contains {:?}", invalid_type) - )) - } - } - return Ok(strings); - } - pub fn get_timeinterval (env: &HashMap<&str, DataType>) -> Result { let interval_str = match env.get("TIMEINTERVAL") { Some(data_ti) => match data_ti { diff --git a/src/query/mod.rs b/src/query/mod.rs index 8bc8424b..8182e02b 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -1,13 +1,14 @@ -use std::collections::HashMap; +use std::fmt; use crate::datastore::Datastore; -use crate::models::Event; use crate::models::TimeInterval; -use serde::Serializer; + +pub mod datatype; + +pub use crate::query::datatype::DataType; // TODO: add line numbers to errors // (works during lexing, but not during parsing I believe) -// TODO: greater/less comparisons #[derive(Debug)] pub enum QueryError { @@ -30,85 +31,6 @@ impl fmt::Display for QueryError { } } -#[derive(Clone,Serialize)] -#[serde(untagged)] -pub enum DataType { - None(), - Bool(bool), - Number(f64), - String(String), - Event(Event), - List(Vec), - Dict(HashMap), - #[serde(serialize_with = "serialize_function")] - Function(String, functions::QueryFn), -} - -fn serialize_function(_element: &String, _fun: &functions::QueryFn, _serializer: S) -> Result - where S: Serializer -{ - panic!("Query function was unevaluated and was attempted to be serialized, panic!"); - //element.id.serialize(serializer) -} - -use std::fmt; - -// Needed because of a limitation in rust where you cannot derive(Debug) on a -// enum which has a fn with reference parameters which our QueryFn has -// https://stackoverflow.com/questions/53380040/function-pointer-with-a-reference-argument-cannot-derive-debug -impl fmt::Debug for DataType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DataType::None() => write!(f, "None()"), - DataType::Bool(b) => write!(f, "Bool({})", b), - DataType::Number(n) => write!(f, "Number({})", n), - DataType::String(s) => write!(f, "String({})", s), - DataType::Event(e) => write!(f, "Event({:?})", e), - DataType::List(l) => write!(f, "List({:?})", l), - DataType::Dict(d) => write!(f, "Dict({:?})", d), - DataType::Function(name, _fun) => write!(f, "Function({})", name), - } - } -} - -/* Like eq, but raises an error when comparing between different types. - * Should be used as often as possible */ -impl DataType { - fn query_eq(&self, other: &DataType) -> Result { - match (self, other) { - // TODO: Comparisons of bool == num, bool == str - (DataType::None(), DataType::None()) => Ok(false), - (DataType::Bool(b1), DataType::Bool(b2)) => Ok(b1 == b2), - (DataType::Number(n1), DataType::Number(n2)) => Ok(n1 == n2), - (DataType::String(s1), DataType::String(s2)) => Ok(s1 == s2), - (DataType::Event(e1), DataType::Event(e2)) => Ok(e1 == e2), - (DataType::List(l1), DataType::List(l2)) => Ok(l1 == l2), - (DataType::Dict(d1), DataType::Dict(d2)) => Ok(d1 == d2), - // We do not care about comparing functions - _ => Err(QueryError::InvalidType(format!("Cannot compare values of different types {:?} and {:?}", self, other))), - } - } -} - -/* Required for query_eq when comparing two dicts */ -impl PartialEq for DataType { - fn eq(&self, other: &DataType) -> bool { - match (self, other) { - (DataType::None(), DataType::None()) => true, - // TODO: Comparisons of bool == num, bool == str - (DataType::Bool(b1), DataType::Bool(b2)) => b1 == b2, - (DataType::Number(n1), DataType::Number(n2)) => n1 == n2, - (DataType::String(s1), DataType::String(s2)) => s1 == s2, - (DataType::Event(e1), DataType::Event(e2)) => e1 == e2, - (DataType::List(l1), DataType::List(l2)) => l1 == l2, - (DataType::Dict(d1), DataType::Dict(d2)) => d1 == d2, - // We do not care about comparing functions - _ => false - } - } -} - - mod lexer { use plex::lexer; diff --git a/src/transform/classify.rs b/src/transform/classify.rs new file mode 100644 index 00000000..d2b748b6 --- /dev/null +++ b/src/transform/classify.rs @@ -0,0 +1,191 @@ +/// Transforms for classifying (tagging and categorizing) events. +/// +/// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py + +use std::collections::HashMap; +use crate::models::Event; +use regex::Regex; +use serde_json; + +/// This struct defines the rules for classification. +/// For now it just needs to contain the regex to match with, but in the future it might contain a +/// glob-pattern, or other options for classifying. +/// It's puropse is to make the API easy to extend in the future without having to break backwards +/// compatibility (or have to maintain "old" query2 functions). +pub struct Rule { + regex: Option, +} + +impl Rule { + fn matches(&self, event: &Event) -> bool { + event + .data + .values() + .filter(|val| val.is_string()) + .any(|val| { + return match &self.regex { + Some(re) => re.is_match(val.as_str().unwrap()), + None => false, + }; + }) + } +} + +impl From for Rule { + fn from(re: Regex) -> Self { + Self { + regex: Some(re.clone()), + } + } +} + +impl From> for Rule { + fn from(obj: HashMap) -> Self { + Self { + regex: Some(Regex::new(obj.get("regex").unwrap()).unwrap()), + } + } +} + +/// Categorizes a list of events +/// +/// An event can only have one category, although the category may have a hierarchy, +/// for instance: "Work -> ActivityWatch -> aw-server-rust" +/// If multiple categories match, the deepest one will be chosen. +pub fn categorize(mut events: Vec, rules: &Vec<(Vec, Rule)>) -> Vec { + let mut classified_events = Vec::new(); + for event in events.drain(..) { + classified_events.push(categorize_one(event, rules)); + } + return classified_events; +} + +fn categorize_one(mut event: Event, rules: &Vec<(Vec, Rule)>) -> Event { + let mut category: Vec = vec!["Uncategorized".into()]; + for (cat, rule) in rules { + if rule.matches(&event) { + category = _pick_highest_ranking_category(category, &cat); + } + } + event + .data + .insert("$category".into(), serde_json::json!(category)); + return event; +} + +/// Tags a list of events +/// +/// An event can have many tags (as opposed to only one category) which will be put into the `$tags` key of +/// the event data object. +pub fn tag(mut events: Vec, rules: &Vec<(String, Rule)>) -> Vec { + let mut events_tagged = Vec::new(); + for event in events.drain(..) { + events_tagged.push(tag_one(event, &rules)); + } + return events_tagged; +} + +fn tag_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { + let mut tags: Vec = Vec::new(); + for (cls, rule) in rules { + if rule.matches(&event) { + tags.push(cls.clone()); + } + } + tags.sort_unstable(); + tags.dedup(); + event.data.insert("$tags".into(), serde_json::json!(tags)); + event +} + +fn _pick_highest_ranking_category(acc: Vec, item: &Vec) -> Vec { + if item.len() >= acc.len() { + // If tag is category with greater or equal depth than current, then choose the new one instead. + item.clone() + } else { + acc + } +} + +fn _cat_format_to_vec(cat: String) -> Vec { + cat.split("->").map(|s| s.trim().into()).collect() +} + +fn _cat_vec_to_format(cat: Vec) -> String { + cat.join(" -> ") +} + +#[test] +fn test_categorize() { + let mut e = Event::default(); + e.data + .insert("test".into(), serde_json::json!("just a test")); + + let mut events = vec![e]; + let rules: Vec<(Vec, Rule)> = vec![ + ( + vec!["Test".into()], + Rule::from(Regex::new(r"test").unwrap()), + ), + ( + vec!["Test".into(), "Subtest".into()], + Rule::from(Regex::new(r"test").unwrap()), + ), + ( + vec!["Other".into()], + Rule::from(Regex::new(r"nonmatching").unwrap()), + ), + ]; + events = categorize(events, &rules); + + assert_eq!(events.len(), 1); + assert_eq!( + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!(vec!["Test", "Subtest"]) + ); +} + +#[test] +fn test_categorize_uncategorized() { + // Checks that the category correctly becomes uncategorized when no category matches + let mut e = Event::default(); + e.data + .insert("test".into(), serde_json::json!("just a test")); + + let mut events = vec![e]; + let rules: Vec<(Vec, Rule)> = vec![( + vec!["Non-matching".into(), "test".into()], + Rule::from(Regex::new(r"not going to match").unwrap()), + )]; + events = categorize(events, &rules); + + assert_eq!(events.len(), 1); + assert_eq!( + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!(vec!["Uncategorized"]) + ); +} + +#[test] +fn test_tag() { + let mut e = Event::default(); + e.data + .insert("test".into(), serde_json::json!("just a test")); + + let mut events = vec![e]; + let rules: Vec<(String, Rule)> = vec![ + ("test".into(), Rule::from(Regex::new(r"test").unwrap())), + ("test-2".into(), Rule::from(Regex::new(r"test").unwrap())), + ( + "nomatch".into(), + Rule::from(Regex::new(r"nomatch").unwrap()), + ), + ]; + events = tag(events, &rules); + + assert_eq!(events.len(), 1); + + let event = events.first().unwrap(); + let tags = event.data.get("$tags").unwrap(); + assert_eq!(tags, &serde_json::json!(vec!["test", "test-2"])); +} diff --git a/src/transform/mod.rs b/src/transform/mod.rs index 32e8cd95..bb6376af 100644 --- a/src/transform/mod.rs +++ b/src/transform/mod.rs @@ -2,6 +2,8 @@ use std::collections::HashMap; use crate::models::Event; use serde_json::value::Value; +pub mod classify; + // TODO: Compare with aw-cores version to make sure it works correctly pub fn heartbeat(last_event: &Event, heartbeat: &Event, pulsetime: f64) -> Option { // Verify that data is the same @@ -272,7 +274,7 @@ pub fn split_url_event(event: &mut Event) { }; // Protocol let protocol = uri.scheme().to_string(); - event.data.insert("protocol".to_string(), Value::String(protocol)); + event.data.insert("$protocol".to_string(), Value::String(protocol)); // Domain let domain = match uri.authority() { Some(authority) => { @@ -280,13 +282,13 @@ pub fn split_url_event(event: &mut Event) { }, None => "".to_string(), }; - event.data.insert("domain".to_string(), Value::String(domain)); + event.data.insert("$domain".to_string(), Value::String(domain)); // Path let path = match uri.origin() { Some(origin) => origin.path().to_string(), None => "".to_string() }; - event.data.insert("path".to_string(), Value::String(path)); + event.data.insert("$path".to_string(), Value::String(path)); // Params // TODO: What's the difference between params and query? let params = match uri.origin() { @@ -296,7 +298,7 @@ pub fn split_url_event(event: &mut Event) { }, None => "".to_string() }; - event.data.insert("params".to_string(), Value::String(params)); + event.data.insert("$params".to_string(), Value::String(params)); // TODO: aw-server-python also has options and identifier } diff --git a/tests/query.rs b/tests/query.rs index 3b468d96..31ac3bcd 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -9,6 +9,7 @@ mod query_tests { use chrono; use chrono::Duration; use serde_json::json; + use std::convert::TryFrom; use aw_server::query; use aw_server::query::QueryError; @@ -293,13 +294,16 @@ mod query_tests { let code = String::from("query_bucket(\"testid\");"); query::query(&code, &interval, &ds).unwrap(); - let code = format!(r#" + let code = format!( + r#" events = query_bucket("{}"); events = flood(events); events = sort_by_duration(events); events = limit_events(events, 10000); events = sort_by_timestamp(events); events = concat(events, query_bucket("{}")); + events = categorize(events, [[["test"], {{ "regex": "value$" }}], [["test", "testing"], {{ "regex": "value$" }}]]); + events = tag(events, [["testtag", {{ "regex": "test$" }}], ["another testtag", {{ "regex": "test-pat$" }}]]); total_duration = sum_durations(events); bucketnames = query_bucket_names(); print("test", "test2"); @@ -317,6 +321,33 @@ mod query_tests { // TODO: assert_eq result } + #[test] + fn test_classify() { + let ds = setup_datastore_populated(); + let interval = TimeInterval::new_from_string(TIME_INTERVAL).unwrap(); + + let code = String::from("query_bucket(\"testid\");"); + query::query(&code, &interval, &ds).unwrap(); + + let code = format!( + r#" + events = query_bucket("{}"); + events = categorize(events, [[["Test", "Subtest"], {{ "regex": "^value$" }}]]); + events = tag(events, [["testtag", {{ "regex": "value$" }}], ["another testtag", {{ "regex": "value$" }}]]); + test = {{}}; + RETURN = events;"#, + "testid" + ); + let result: DataType = query::query(&code, &interval, &ds).unwrap(); + let events: Vec = Vec::try_from(&result).unwrap(); + + let event = events.first().unwrap(); + let tags = event.data.get("$tags").unwrap().as_array().unwrap(); + let cats = event.data.get("$category").unwrap(); + assert_eq!(tags.len(), 2); + assert_eq!(cats, &serde_json::json!(vec!["Test", "Subtest"])); + } + #[test] fn test_string() { let ds = setup_datastore_empty(); diff --git a/tests/sync.rs b/tests/sync.rs index 693a7116..031da2d6 100644 --- a/tests/sync.rs +++ b/tests/sync.rs @@ -3,7 +3,7 @@ #[cfg(test)] mod sync_tests { use std::collections::{HashMap}; - use chrono::{DateTime, Utc, Duration}; + use chrono::{DateTime, Utc}; use aw_server::models::{Bucket, Event}; use aw_server::datastore::{Datastore, DatastoreError}; diff --git a/tests/transform.rs b/tests/transform.rs index c1f87999..1d12a344 100644 --- a/tests/transform.rs +++ b/tests/transform.rs @@ -300,10 +300,10 @@ mod transform_tests { transform::split_url_event(&mut e1); assert_eq!(e1.data, json_map!{ "url": json!("http://www.google.com/path?query=1"), - "protocol": json!("http"), - "domain": json!("google.com"), - "path": json!("/path"), - "params": json!("query=1") + "$protocol": json!("http"), + "$domain": json!("google.com"), + "$path": json!("/path"), + "$params": json!("query=1") }); } }