From 9706d8bb591181669c5e005009d79971341deda3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 20 Sep 2019 12:32:50 +0200 Subject: [PATCH 01/16] progress on classifier transform --- .travis.yml | 3 +- Cargo.lock | 1 + Cargo.toml | 1 + src/models/event.rs | 11 +++++++ src/transform/classify.rs | 64 +++++++++++++++++++++++++++++++++++++++ src/transform/mod.rs | 2 ++ 6 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 src/transform/classify.rs diff --git a/.travis.yml b/.travis.yml index 2a186885..ba52726e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,8 +39,9 @@ install: - if [ "$BUILD_ANDROID" == true ]; then ./compile-android.sh; else - cargo build --bin aw-server-rust $($RELEASE && echo '--release'); cargo build --lib $($RELEASE && echo '--release'); + cargo build --bin aw-server-rust $($RELEASE && echo '--release'); + cargo build --bin aw-sync $($RELEASE && echo '--release'); fi script: diff --git a/Cargo.lock b/Cargo.lock index 27462912..fcfed585 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,6 +79,7 @@ dependencies = [ "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "multipart 0.16.1 (registry+https://github.com/rust-lang/crates.io-index)", "plex 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rocket 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rocket_contrib 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "rocket_cors 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 3d950feb..9b518fec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ log = "0.4" fern = { version = "0.5", features = ["colored"] } toml = "0.5" gethostname = "0.2" +regex = "1.0.5" [target.'cfg(target_os="android")'.dependencies] jni = { version = "0.5", default-features = false } diff --git a/src/models/event.rs b/src/models/event.rs index 7daf46f5..18a128ee 100644 --- a/src/models/event.rs +++ b/src/models/event.rs @@ -16,6 +16,17 @@ pub struct Event { } impl Event { + pub fn new_test() -> Event { + let mut e = Event { + id: None, + timestamp: Utc::now(), + duration: Duration::seconds(42), + data: serde_json::Map::new() + }; + e.data.insert("test".into(), serde_json::json!("just a test")); + return e; + } + pub fn calculate_endtime(&self) -> DateTime { self.timestamp + chrono::Duration::nanoseconds(self.duration.num_nanoseconds().unwrap() as i64) } diff --git a/src/transform/classify.rs b/src/transform/classify.rs new file mode 100644 index 00000000..9d1ba5a7 --- /dev/null +++ b/src/transform/classify.rs @@ -0,0 +1,64 @@ +/// Transforms for classifying (tagging and categorizing) events. +/// +/// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py + +use regex::Regex; +use serde_json; + +use std::collections::HashSet; +use crate::models::Event; + +pub fn classify(events: Vec, classes: Vec<(Regex, String)>) -> Vec { + // TODO: There is probably a better way that avoids the clone? + events.iter().map(|e| classify_one(e.clone(), &classes)).collect() +} + +fn classify_one(mut event: Event, classes: &Vec<(Regex, String)>) -> Event { + let mut tags: HashSet = HashSet::new(); + for (re, cls) in classes { + for val in event.data.values() { + // TODO: Recurse if value is object/array + if val.is_string() && re.is_match(val.as_str().unwrap()) { + tags.insert(cls.clone()); + break; + } + } + } + + // An event can have many tags + event.data.insert("$tags".into(), serde_json::json!(tags)); + + // An event can only have one category, although the category may have a hierarchy, + // for instance: "Work -> ActivityWatch -> aw-server-rust" + // A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) + let category = choose_category(tags); + event.data.insert("$category".into(), serde_json::json!(category)); + event +} + +fn choose_category(tags: HashSet) -> String { + tags.iter().fold(&"Uncategorized".to_string(), |acc: &String, item: &String| { + if item.matches("->").count() >= acc.matches("->").count() { + item + } else { + acc + } + }).clone() +} + +#[test] +fn test_classify() { + let e = Event::new_test(); + let events = vec!(e); + let classes: Vec<(Regex, String)> = vec!( + (Regex::new(r"test").unwrap(), "#test-tag".into()), + (Regex::new(r"test").unwrap(), "Test".into()), + (Regex::new(r"test").unwrap(), "Test -> Subtest".into()), + (Regex::new(r"nonmatching").unwrap(), "Other".into()), + ); + let events_classified = classify(events, classes); + + assert_eq!(events_classified.len(), 1); + assert_eq!(events_classified.first().unwrap().data.get("$tags").unwrap().as_array().unwrap().len(), 3); + assert_eq!(events_classified.first().unwrap().data.get("$category").unwrap(), &serde_json::json!("Test -> Subtest")); +} diff --git a/src/transform/mod.rs b/src/transform/mod.rs index 32e8cd95..e68f5f1a 100644 --- a/src/transform/mod.rs +++ b/src/transform/mod.rs @@ -2,6 +2,8 @@ use std::collections::HashMap; use crate::models::Event; use serde_json::value::Value; +pub mod classify; + // TODO: Compare with aw-cores version to make sure it works correctly pub fn heartbeat(last_event: &Event, heartbeat: &Event, pulsetime: f64) -> Option { // Verify that data is the same From 4b2981be18fc0f07fba6804c1c2769dece101c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 20 Sep 2019 13:23:30 +0200 Subject: [PATCH 02/16] added classify as a query function --- src/query/functions.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/query/functions.rs b/src/query/functions.rs index 1b1a763b..3d672833 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -1,7 +1,7 @@ +use std::collections::HashMap; use crate::query::DataType; use crate::query::QueryError; use crate::datastore::Datastore; -use std::collections::HashMap; pub type QueryFn = fn(args: Vec, env: &HashMap<&str, DataType>, ds: &Datastore) -> Result; @@ -21,16 +21,19 @@ pub fn fill_env<'a>(env: &mut HashMap<&'a str, DataType>) { env.insert("filter_period_intersect", DataType::Function("filter_period_intersect".to_string(), qfunctions::filter_period_intersect)); env.insert("split_url_events", DataType::Function("split_url_events".to_string(), qfunctions::split_url_events)); env.insert("concat", DataType::Function("concat".to_string(), qfunctions::concat)); + env.insert("classify", DataType::Function("classify".into(), qfunctions::classify)); } mod qfunctions { + use std::collections::HashMap; + use regex::Regex; + use crate::query::DataType; use crate::query::QueryError; use crate::datastore::Datastore; use crate::transform; use super::validate; - use std::collections::HashMap; pub fn print(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { for arg in args { @@ -101,6 +104,25 @@ mod qfunctions { return Ok(DataType::List(tagged_flooded_events)); } + pub fn classify(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { + // typecheck + validate::args_length(&args, 2)?; + let events = validate::arg_type_event_list(&args[0])?.clone(); + let classes = validate::arg_type_list_of_list_of_strings(&args[1])?.clone(); + // Run classify + let classes_tuples: Vec<(Regex, String)> = classes + .iter() + .map(|l| (Regex::new(l.get(0).unwrap()).unwrap(), l.get(1).unwrap().to_string())) + .collect(); + let mut flooded_events = transform::classify::classify(events, classes_tuples); + // Put events back into DataType::Event container + let mut tagged_flooded_events = Vec::new(); + for event in flooded_events.drain(..) { + tagged_flooded_events.push(DataType::Event(event)); + } + return Ok(DataType::List(tagged_flooded_events)); + } + pub fn sort_by_duration(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; @@ -282,6 +304,20 @@ mod validate { } } + pub fn arg_type_list_of_list_of_strings (arg: &DataType) -> Result>, QueryError> { + let mut tagged_lists = arg_type_list(arg)?.clone(); + let mut lists: Vec> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(_) => lists.push(arg_type_string_list(&list)?.clone()), + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of tuples of strings, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } + pub fn arg_type_event_list (arg: &DataType) -> Result, QueryError> { let mut tagged_events = arg_type_list(arg)?.clone(); let mut events = Vec::new(); From 50b3b3ed61ce1be60a2ff7e0b596b88e2d0cd422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 20 Sep 2019 13:43:53 +0200 Subject: [PATCH 03/16] added query test for classify, changed tuple order --- src/query/functions.rs | 4 ++-- src/transform/classify.rs | 16 ++++++++-------- tests/query.rs | 25 +++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/query/functions.rs b/src/query/functions.rs index 3d672833..6ffb54f4 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -110,9 +110,9 @@ mod qfunctions { let events = validate::arg_type_event_list(&args[0])?.clone(); let classes = validate::arg_type_list_of_list_of_strings(&args[1])?.clone(); // Run classify - let classes_tuples: Vec<(Regex, String)> = classes + let classes_tuples: Vec<(String, Regex)> = classes .iter() - .map(|l| (Regex::new(l.get(0).unwrap()).unwrap(), l.get(1).unwrap().to_string())) + .map(|l| (l.get(0).unwrap().to_string(), Regex::new(l.get(1).unwrap()).unwrap())) .collect(); let mut flooded_events = transform::classify::classify(events, classes_tuples); // Put events back into DataType::Event container diff --git a/src/transform/classify.rs b/src/transform/classify.rs index 9d1ba5a7..1a023a5c 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -8,14 +8,14 @@ use serde_json; use std::collections::HashSet; use crate::models::Event; -pub fn classify(events: Vec, classes: Vec<(Regex, String)>) -> Vec { +pub fn classify(events: Vec, classes: Vec<(String, Regex)>) -> Vec { // TODO: There is probably a better way that avoids the clone? events.iter().map(|e| classify_one(e.clone(), &classes)).collect() } -fn classify_one(mut event: Event, classes: &Vec<(Regex, String)>) -> Event { +fn classify_one(mut event: Event, classes: &Vec<(String, Regex)>) -> Event { let mut tags: HashSet = HashSet::new(); - for (re, cls) in classes { + for (cls, re) in classes { for val in event.data.values() { // TODO: Recurse if value is object/array if val.is_string() && re.is_match(val.as_str().unwrap()) { @@ -50,11 +50,11 @@ fn choose_category(tags: HashSet) -> String { fn test_classify() { let e = Event::new_test(); let events = vec!(e); - let classes: Vec<(Regex, String)> = vec!( - (Regex::new(r"test").unwrap(), "#test-tag".into()), - (Regex::new(r"test").unwrap(), "Test".into()), - (Regex::new(r"test").unwrap(), "Test -> Subtest".into()), - (Regex::new(r"nonmatching").unwrap(), "Other".into()), + let classes: Vec<(String, Regex)> = vec!( + ("#test-tag".into(), Regex::new(r"test").unwrap()), + ("Test".into(), Regex::new(r"test").unwrap()), + ("Test -> Subtest".into(), Regex::new(r"test").unwrap()), + ("Other".into(), Regex::new(r"nonmatching").unwrap()), ); let events_classified = classify(events, classes); diff --git a/tests/query.rs b/tests/query.rs index 3b468d96..e91681d3 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -300,6 +300,7 @@ mod query_tests { events = limit_events(events, 10000); events = sort_by_timestamp(events); events = concat(events, query_bucket("{}")); + events = classify(events, [["test", "\#test-tag"], ["just", "Test -> Testing"]]); total_duration = sum_durations(events); bucketnames = query_bucket_names(); print("test", "test2"); @@ -317,6 +318,30 @@ mod query_tests { // TODO: assert_eq result } + #[test] + fn test_classify() { + let ds = setup_datastore_populated(); + let interval = TimeInterval::new_from_string(TIME_INTERVAL).unwrap(); + + let code = String::from("query_bucket(\"testid\");"); + query::query(&code, &interval, &ds).unwrap(); + + let code = format!(r#" + events = query_bucket("{}"); + events = classify(events, [["test-tag", "^value$"], ["Test -> Subtest", "^value$"]]); + RETURN = events;"#, + "testid"); + let events = match query::query(&code, &interval, &ds).unwrap() { + query::DataType::List(l) => l, + ref data => panic!("Wrong datatype, {:?}", data) + }; + + println!("{:?}", events.first().unwrap()); + // TODO: assert_eq result + //assert_eq!(events.first().unwrap().data.get("$tags").unwrap().len(), 2); + //assert_eq!(events.first().unwrap().data.get("$category").unwrap(), "Test -> Subtest"); + } + #[test] fn test_string() { let ds = setup_datastore_empty(); From cdd61b1a199e6b71abc2354206fe8f7b1e7c5bbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 20 Sep 2019 13:45:01 +0200 Subject: [PATCH 04/16] fixed issue in travis config --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ba52726e..c85ed936 100644 --- a/.travis.yml +++ b/.travis.yml @@ -41,7 +41,7 @@ install: else cargo build --lib $($RELEASE && echo '--release'); cargo build --bin aw-server-rust $($RELEASE && echo '--release'); - cargo build --bin aw-sync $($RELEASE && echo '--release'); + cargo build --bin aw-sync-rust $($RELEASE && echo '--release'); fi script: From 2747c2f633d532835a3473317f0de78ee9eb870b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Sat, 21 Sep 2019 13:23:46 +0200 Subject: [PATCH 05/16] renamed Event::new_test -> Event::new (and removed test-specific behavior) --- src/models/event.rs | 8 +++----- src/transform/classify.rs | 4 +++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/models/event.rs b/src/models/event.rs index 18a128ee..056d287c 100644 --- a/src/models/event.rs +++ b/src/models/event.rs @@ -16,15 +16,13 @@ pub struct Event { } impl Event { - pub fn new_test() -> Event { - let mut e = Event { + pub fn new() -> Event { + return Event { id: None, timestamp: Utc::now(), - duration: Duration::seconds(42), + duration: Duration::seconds(0), data: serde_json::Map::new() }; - e.data.insert("test".into(), serde_json::json!("just a test")); - return e; } pub fn calculate_endtime(&self) -> DateTime { diff --git a/src/transform/classify.rs b/src/transform/classify.rs index 1a023a5c..889c7f7b 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -48,7 +48,9 @@ fn choose_category(tags: HashSet) -> String { #[test] fn test_classify() { - let e = Event::new_test(); + let mut e = Event::new(); + e.data.insert("test".into(), serde_json::json!("just a test")); + let events = vec!(e); let classes: Vec<(String, Regex)> = vec!( ("#test-tag".into(), Regex::new(r"test").unwrap()), From b2543c092b8ba26c41c7e86224f976cd90440227 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Sat, 21 Sep 2019 13:32:56 +0200 Subject: [PATCH 06/16] avoided unecessary clone --- Cargo.toml | 2 +- src/transform/classify.rs | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9b518fec..8c7b506c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ log = "0.4" fern = { version = "0.5", features = ["colored"] } toml = "0.5" gethostname = "0.2" -regex = "1.0.5" +regex = "1.0" [target.'cfg(target_os="android")'.dependencies] jni = { version = "0.5", default-features = false } diff --git a/src/transform/classify.rs b/src/transform/classify.rs index 889c7f7b..f705746e 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -8,9 +8,12 @@ use serde_json; use std::collections::HashSet; use crate::models::Event; -pub fn classify(events: Vec, classes: Vec<(String, Regex)>) -> Vec { - // TODO: There is probably a better way that avoids the clone? - events.iter().map(|e| classify_one(e.clone(), &classes)).collect() +pub fn classify(mut events: Vec, classes: Vec<(String, Regex)>) -> Vec { + let mut classified_events = Vec::new(); + for event in events.drain(..) { + classified_events.push(classify_one(event, &classes)); + } + return classified_events; } fn classify_one(mut event: Event, classes: &Vec<(String, Regex)>) -> Event { From 41dd23dd64d1e567b12291b7fb781c4e23828a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Tue, 1 Oct 2019 13:41:26 +0200 Subject: [PATCH 07/16] prefixed computed data properties with '$', implemented Default trait for Event, fixed non-category tags not accidentally being used as categories --- src/models/event.rs | 20 +++++++++++--------- src/query/functions.rs | 2 +- src/transform/classify.rs | 18 +++++++++++------- src/transform/mod.rs | 8 ++++---- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/src/models/event.rs b/src/models/event.rs index 056d287c..6ccf1a45 100644 --- a/src/models/event.rs +++ b/src/models/event.rs @@ -16,15 +16,6 @@ pub struct Event { } impl Event { - pub fn new() -> Event { - return Event { - id: None, - timestamp: Utc::now(), - duration: Duration::seconds(0), - data: serde_json::Map::new() - }; - } - pub fn calculate_endtime(&self) -> DateTime { self.timestamp + chrono::Duration::nanoseconds(self.duration.num_nanoseconds().unwrap() as i64) } @@ -39,6 +30,17 @@ impl PartialEq for Event { } } +impl Default for Event { + fn default() -> Self { + Event { + id: None, + timestamp: Utc::now(), + duration: Duration::seconds(0), + data: serde_json::Map::new() + } + } +} + fn default_duration() -> Duration { Duration::seconds(0) } diff --git a/src/query/functions.rs b/src/query/functions.rs index 6ffb54f4..dd548d89 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -114,7 +114,7 @@ mod qfunctions { .iter() .map(|l| (l.get(0).unwrap().to_string(), Regex::new(l.get(1).unwrap()).unwrap())) .collect(); - let mut flooded_events = transform::classify::classify(events, classes_tuples); + let mut flooded_events = transform::classify::classify(events, &classes_tuples); // Put events back into DataType::Event container let mut tagged_flooded_events = Vec::new(); for event in flooded_events.drain(..) { diff --git a/src/transform/classify.rs b/src/transform/classify.rs index f705746e..817d8459 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -8,10 +8,10 @@ use serde_json; use std::collections::HashSet; use crate::models::Event; -pub fn classify(mut events: Vec, classes: Vec<(String, Regex)>) -> Vec { +pub fn classify(mut events: Vec, classes: &Vec<(String, Regex)>) -> Vec { let mut classified_events = Vec::new(); for event in events.drain(..) { - classified_events.push(classify_one(event, &classes)); + classified_events.push(classify_one(event, classes)); } return classified_events; } @@ -34,14 +34,18 @@ fn classify_one(mut event: Event, classes: &Vec<(String, Regex)>) -> Event { // An event can only have one category, although the category may have a hierarchy, // for instance: "Work -> ActivityWatch -> aw-server-rust" // A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) - let category = choose_category(tags); + let category = choose_category(&tags); event.data.insert("$category".into(), serde_json::json!(category)); event } -fn choose_category(tags: HashSet) -> String { +fn choose_category(tags: &HashSet) -> String { tags.iter().fold(&"Uncategorized".to_string(), |acc: &String, item: &String| { - if item.matches("->").count() >= acc.matches("->").count() { + if item.starts_with('#') { + // If tag is not a category, then skip. + acc + } else if item.matches("->").count() >= acc.matches("->").count() { + // If tag is category with greater or equal depth than current, then choose the new one instead. item } else { acc @@ -51,7 +55,7 @@ fn choose_category(tags: HashSet) -> String { #[test] fn test_classify() { - let mut e = Event::new(); + let mut e = Event::default(); e.data.insert("test".into(), serde_json::json!("just a test")); let events = vec!(e); @@ -61,7 +65,7 @@ fn test_classify() { ("Test -> Subtest".into(), Regex::new(r"test").unwrap()), ("Other".into(), Regex::new(r"nonmatching").unwrap()), ); - let events_classified = classify(events, classes); + let events_classified = classify(events, &classes); assert_eq!(events_classified.len(), 1); assert_eq!(events_classified.first().unwrap().data.get("$tags").unwrap().as_array().unwrap().len(), 3); diff --git a/src/transform/mod.rs b/src/transform/mod.rs index e68f5f1a..bb6376af 100644 --- a/src/transform/mod.rs +++ b/src/transform/mod.rs @@ -274,7 +274,7 @@ pub fn split_url_event(event: &mut Event) { }; // Protocol let protocol = uri.scheme().to_string(); - event.data.insert("protocol".to_string(), Value::String(protocol)); + event.data.insert("$protocol".to_string(), Value::String(protocol)); // Domain let domain = match uri.authority() { Some(authority) => { @@ -282,13 +282,13 @@ pub fn split_url_event(event: &mut Event) { }, None => "".to_string(), }; - event.data.insert("domain".to_string(), Value::String(domain)); + event.data.insert("$domain".to_string(), Value::String(domain)); // Path let path = match uri.origin() { Some(origin) => origin.path().to_string(), None => "".to_string() }; - event.data.insert("path".to_string(), Value::String(path)); + event.data.insert("$path".to_string(), Value::String(path)); // Params // TODO: What's the difference between params and query? let params = match uri.origin() { @@ -298,7 +298,7 @@ pub fn split_url_event(event: &mut Event) { }, None => "".to_string() }; - event.data.insert("params".to_string(), Value::String(params)); + event.data.insert("$params".to_string(), Value::String(params)); // TODO: aw-server-python also has options and identifier } From 32b0374fea9a1f6d18173bf114543431f8530db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Tue, 1 Oct 2019 15:22:30 +0200 Subject: [PATCH 08/16] fixed test --- tests/transform.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/transform.rs b/tests/transform.rs index c1f87999..1d12a344 100644 --- a/tests/transform.rs +++ b/tests/transform.rs @@ -300,10 +300,10 @@ mod transform_tests { transform::split_url_event(&mut e1); assert_eq!(e1.data, json_map!{ "url": json!("http://www.google.com/path?query=1"), - "protocol": json!("http"), - "domain": json!("google.com"), - "path": json!("/path"), - "params": json!("query=1") + "$protocol": json!("http"), + "$domain": json!("google.com"), + "$path": json!("/path"), + "$params": json!("query=1") }); } } From 391186facb7e94df77daa63af6421dc2b5b4a6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Tue, 1 Oct 2019 15:53:23 +0200 Subject: [PATCH 09/16] added test for classifying event without matching category --- src/transform/classify.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/transform/classify.rs b/src/transform/classify.rs index 817d8459..ce631819 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -71,3 +71,19 @@ fn test_classify() { assert_eq!(events_classified.first().unwrap().data.get("$tags").unwrap().as_array().unwrap().len(), 3); assert_eq!(events_classified.first().unwrap().data.get("$category").unwrap(), &serde_json::json!("Test -> Subtest")); } + + +#[test] +fn test_classify_uncategorized() { + // Checks that the category correctly becomes uncategorized when no category matches + let mut e = Event::default(); + e.data.insert("test".into(), serde_json::json!("just a test")); + + let classes: Vec<(String, Regex)> = vec!( + ("#test-tag".into(), Regex::new(r"test").unwrap()), + ); + let events_classified = classify(vec!(e), &classes); + assert_eq!(events_classified.len(), 1); + assert_eq!(events_classified.first().unwrap().data.get("$tags").unwrap().as_array().unwrap().len(), 1); + assert_eq!(events_classified.first().unwrap().data.get("$category").unwrap(), &serde_json::json!("Uncategorized")); +} From 4f059d5b1bd83a1639085a6af1291da0d30e4a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 4 Oct 2019 19:48:49 +0200 Subject: [PATCH 10/16] started splitting classify into categorize and autotag --- src/transform/classify.rs | 251 +++++++++++++++++++++++++++++++------- 1 file changed, 210 insertions(+), 41 deletions(-) diff --git a/src/transform/classify.rs b/src/transform/classify.rs index ce631819..c8f1d1ae 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -1,30 +1,61 @@ /// Transforms for classifying (tagging and categorizing) events. /// /// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py +use std::collections::HashSet; +use crate::models::Event; use regex::Regex; use serde_json; -use std::collections::HashSet; -use crate::models::Event; +/// This struct defines the rules +/// For now it just needs to contain the regex to match with, but in the future it might contain a +/// glob-pattern, or other options for classifying. +/// It's puropse is to make the API easy to extend in the future without having to break backwards +/// compatibility (or have to maintain "old" query2 functions). +pub struct Rule { + regex: Option, +} + +impl Rule { + fn from_regex(re: &Regex) -> Self { + Self { + regex: Some(re.clone()), + } + } -pub fn classify(mut events: Vec, classes: &Vec<(String, Regex)>) -> Vec { + fn matches(&self, event: &Event) -> bool { + event + .data + .values() + .filter(|val| val.is_string()) + .any(|val| { + return match &self.regex { + Some(re) => re.is_match(val.as_str().unwrap()), + None => false, + }; + }) + } +} + +// TODO: Deprecate in favor of `categorize` and `autotag` +pub fn classify(mut events: Vec, rules: &Vec<(String, Regex)>) -> Vec { let mut classified_events = Vec::new(); + let rules_new: Vec<(String, Rule)> = rules + .iter() + .map(|(cls, re)| (cls.clone(), Rule::from_regex(re))) + .collect(); for event in events.drain(..) { - classified_events.push(classify_one(event, classes)); + classified_events.push(classify_one(event, &rules_new)); } return classified_events; } -fn classify_one(mut event: Event, classes: &Vec<(String, Regex)>) -> Event { +/// First tags and then selects the deepest matching tag as category (by counting number of "->" in name) +fn classify_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { let mut tags: HashSet = HashSet::new(); - for (cls, re) in classes { - for val in event.data.values() { - // TODO: Recurse if value is object/array - if val.is_string() && re.is_match(val.as_str().unwrap()) { - tags.insert(cls.clone()); - break; - } + for (cls, rule) in rules { + if rule.matches(&event) { + tags.insert(cls.clone()); } } @@ -34,56 +65,194 @@ fn classify_one(mut event: Event, classes: &Vec<(String, Regex)>) -> Event { // An event can only have one category, although the category may have a hierarchy, // for instance: "Work -> ActivityWatch -> aw-server-rust" // A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) - let category = choose_category(&tags); - event.data.insert("$category".into(), serde_json::json!(category)); + let category = _choose_category(&tags); + event + .data + .insert("$category".into(), serde_json::json!(category)); event } -fn choose_category(tags: &HashSet) -> String { - tags.iter().fold(&"Uncategorized".to_string(), |acc: &String, item: &String| { - if item.starts_with('#') { - // If tag is not a category, then skip. - acc - } else if item.matches("->").count() >= acc.matches("->").count() { - // If tag is category with greater or equal depth than current, then choose the new one instead. - item - } else { - acc +/// Categorizes a list of events +/// +/// An event can only have one category, although the category may have a hierarchy, +/// for instance: "Work -> ActivityWatch -> aw-server-rust" +/// A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) +// TODO: Classes should be &Vec<(String, Rule)> +pub fn categorize(mut events: Vec, rules: &Vec<(Vec, Regex)>) -> Vec { + let mut classified_events = Vec::new(); + for event in events.drain(..) { + classified_events.push(categorize_one(event, rules)); + } + return classified_events; +} + +// TODO: Classes should be &Vec<(String, Rule)> +pub fn autotag(mut events: Vec, rules: &Vec<(String, Regex)>) -> Vec { + let mut events_tagged = Vec::new(); + let new_rules: Vec<(String, Rule)> = rules + .iter() + .map(|(cls, re)| (cls.clone(), Rule::from_regex(re))) + .collect(); + for event in events.drain(..) { + events_tagged.push(classify_one(event, &new_rules)); + } + return events_tagged; +} + +fn categorize_one(mut event: Event, categories: &Vec<(Vec, Regex)>) -> Event { + let mut category: String = "Uncategorized".into(); + for (cat, re) in categories { + if _match(&event, &re) { + // TODO: This shouldn't be cat.join("->"), but if we do end up deciding on this API it'll be easy + // to remove. + category = _pick_highest_ranking_category(category, &cat.join("->")); } - }).clone() + } + event.data.insert( + "$category".into(), + serde_json::json!(_cat_format_to_vec(category)), + ); + return event; +} + +fn _match(event: &Event, re: &Regex) -> bool { + for val in event.data.values() { + if val.is_string() && re.is_match(val.as_str().unwrap()) { + return true; + } + } + return false; +} + +fn _pick_highest_ranking_category(acc: String, item: &String) -> String { + if item.matches("->").count() >= acc.matches("->").count() { + // If tag is category with greater or equal depth than current, then choose the new one instead. + item.clone() + } else { + acc + } +} + +fn _choose_category(tags: &HashSet) -> String { + tags.iter() + .fold("Uncategorized".to_string(), |acc, item| { + return _pick_highest_ranking_category(acc, &item); + }) + .clone() +} + +fn _cat_format_to_vec(cat: String) -> Vec { + cat.split("->").map(|s| s.trim().into()).collect() +} + +fn _cat_vec_to_format(cat: Vec) -> String { + cat.join(" -> ") +} + +#[test] +fn test_categorize() { + let mut e = Event::default(); + e.data + .insert("test".into(), serde_json::json!("just a test")); + + let mut events = vec![e]; + let cats: Vec<(Vec, Regex)> = vec![ + (vec!["Test".into()], Regex::new(r"test").unwrap()), + ( + vec!["Test".into(), "Subtest".into()], + Regex::new(r"test").unwrap(), + ), + (vec!["Other".into()], Regex::new(r"nonmatching").unwrap()), + ]; + events = categorize(events, &cats); + + assert_eq!(events.len(), 1); + assert_eq!( + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!(vec!["Test", "Subtest"]) + ); +} + +#[test] +fn test_autotag() { + let mut e = Event::default(); + e.data + .insert("test".into(), serde_json::json!("just a test")); + + let mut events = vec![e]; + let rules: Vec<(String, Regex)> = vec![ + ("test".into(), Regex::new(r"test").unwrap()), + ("test-2".into(), Regex::new(r"test").unwrap()), + ("nonmatching".into(), Regex::new(r"nonmatching").unwrap()), + ]; + events = classify(events, &rules); + + assert_eq!(events.len(), 1); + assert_eq!( + events + .first() + .unwrap() + .data + .get("$tags") + .unwrap() + .as_array() + .unwrap() + .len(), + 2 + ); } #[test] fn test_classify() { let mut e = Event::default(); - e.data.insert("test".into(), serde_json::json!("just a test")); + e.data + .insert("test".into(), serde_json::json!("just a test")); - let events = vec!(e); - let classes: Vec<(String, Regex)> = vec!( + let mut events = vec![e]; + let classes: Vec<(String, Regex)> = vec![ ("#test-tag".into(), Regex::new(r"test").unwrap()), ("Test".into(), Regex::new(r"test").unwrap()), ("Test -> Subtest".into(), Regex::new(r"test").unwrap()), ("Other".into(), Regex::new(r"nonmatching").unwrap()), - ); - let events_classified = classify(events, &classes); + ]; + events = classify(events, &classes); - assert_eq!(events_classified.len(), 1); - assert_eq!(events_classified.first().unwrap().data.get("$tags").unwrap().as_array().unwrap().len(), 3); - assert_eq!(events_classified.first().unwrap().data.get("$category").unwrap(), &serde_json::json!("Test -> Subtest")); + assert_eq!(events.len(), 1); + assert_eq!( + events + .first() + .unwrap() + .data + .get("$tags") + .unwrap() + .as_array() + .unwrap() + .len(), + 3 + ); + assert_eq!( + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!("Test -> Subtest") + ); } - #[test] fn test_classify_uncategorized() { // Checks that the category correctly becomes uncategorized when no category matches let mut e = Event::default(); - e.data.insert("test".into(), serde_json::json!("just a test")); + e.data + .insert("test".into(), serde_json::json!("just a test")); - let classes: Vec<(String, Regex)> = vec!( - ("#test-tag".into(), Regex::new(r"test").unwrap()), + let mut events = vec![e]; + let classes: Vec<(String, Regex)> = vec![( + "Non-matching -> Test".into(), + Regex::new(r"not going to match").unwrap(), + )]; + events = classify(events, &classes); + + assert_eq!(events.len(), 1); + assert_eq!( + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!("Uncategorized") ); - let events_classified = classify(vec!(e), &classes); - assert_eq!(events_classified.len(), 1); - assert_eq!(events_classified.first().unwrap().data.get("$tags").unwrap().as_array().unwrap().len(), 1); - assert_eq!(events_classified.first().unwrap().data.get("$category").unwrap(), &serde_json::json!("Uncategorized")); } From 825f50b14810aef1347d9dd6ea58c458cb21055b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 4 Oct 2019 20:43:47 +0200 Subject: [PATCH 11/16] added categorize as a query2 function --- src/query/functions.rs | 34 ++++--- src/transform/classify.rs | 184 +++++++++++++------------------------- tests/query.rs | 15 ++-- 3 files changed, 90 insertions(+), 143 deletions(-) diff --git a/src/query/functions.rs b/src/query/functions.rs index dd548d89..40af5110 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -21,7 +21,7 @@ pub fn fill_env<'a>(env: &mut HashMap<&'a str, DataType>) { env.insert("filter_period_intersect", DataType::Function("filter_period_intersect".to_string(), qfunctions::filter_period_intersect)); env.insert("split_url_events", DataType::Function("split_url_events".to_string(), qfunctions::split_url_events)); env.insert("concat", DataType::Function("concat".to_string(), qfunctions::concat)); - env.insert("classify", DataType::Function("classify".into(), qfunctions::classify)); + env.insert("categorize", DataType::Function("categorize".into(), qfunctions::categorize)); } mod qfunctions { @@ -33,6 +33,7 @@ mod qfunctions { use crate::datastore::Datastore; use crate::transform; use super::validate; + use crate::transform::classify::Rule; pub fn print(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { @@ -104,17 +105,17 @@ mod qfunctions { return Ok(DataType::List(tagged_flooded_events)); } - pub fn classify(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { + pub fn categorize( + args: Vec, + _env: &HashMap<&str, DataType>, + _ds: &Datastore, + ) -> Result { // typecheck validate::args_length(&args, 2)?; let events = validate::arg_type_event_list(&args[0])?.clone(); - let classes = validate::arg_type_list_of_list_of_strings(&args[1])?.clone(); - // Run classify - let classes_tuples: Vec<(String, Regex)> = classes - .iter() - .map(|l| (l.get(0).unwrap().to_string(), Regex::new(l.get(1).unwrap()).unwrap())) - .collect(); - let mut flooded_events = transform::classify::classify(events, &classes_tuples); + let rules = validate::arg_type_list_of_category_rules(&args[1])?; + // Run categorize + let mut flooded_events = transform::classify::categorize(events, &rules); // Put events back into DataType::Event container let mut tagged_flooded_events = Vec::new(); for event in flooded_events.drain(..) { @@ -266,6 +267,8 @@ mod validate { use crate::query::{QueryError, DataType}; use crate::models::Event; use crate::models::TimeInterval; + use crate::transform::classify::Rule; + use regex::Regex; use std::collections::HashMap; pub fn args_length(args: &Vec, len: usize) -> Result<(), QueryError> { @@ -304,14 +307,19 @@ mod validate { } } - pub fn arg_type_list_of_list_of_strings (arg: &DataType) -> Result>, QueryError> { + pub fn arg_type_list_of_category_rules( + arg: &DataType, + ) -> Result, Rule)>, QueryError> { let mut tagged_lists = arg_type_list(arg)?.clone(); - let mut lists: Vec> = Vec::new(); + let mut lists: Vec<(Vec, Rule)> = Vec::new(); for list in tagged_lists.drain(..) { match list { - DataType::List(_) => lists.push(arg_type_string_list(&list)?.clone()), + DataType::List(ref l) => { + let regex: Regex = Regex::new(arg_type_string(l.get(1).unwrap())?).unwrap(); + lists.push((arg_type_string_list(l.get(0).unwrap())?.clone(), Rule::from(regex))); + }, ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type list of tuples of strings, list contains {:?}", invalid_type) + format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) )) } } diff --git a/src/transform/classify.rs b/src/transform/classify.rs index c8f1d1ae..f545b8ac 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -7,7 +7,7 @@ use crate::models::Event; use regex::Regex; use serde_json; -/// This struct defines the rules +/// This struct defines the rules for classification. /// For now it just needs to contain the regex to match with, but in the future it might contain a /// glob-pattern, or other options for classifying. /// It's puropse is to make the API easy to extend in the future without having to break backwards @@ -17,12 +17,6 @@ pub struct Rule { } impl Rule { - fn from_regex(re: &Regex) -> Self { - Self { - regex: Some(re.clone()), - } - } - fn matches(&self, event: &Event) -> bool { event .data @@ -37,39 +31,12 @@ impl Rule { } } -// TODO: Deprecate in favor of `categorize` and `autotag` -pub fn classify(mut events: Vec, rules: &Vec<(String, Regex)>) -> Vec { - let mut classified_events = Vec::new(); - let rules_new: Vec<(String, Rule)> = rules - .iter() - .map(|(cls, re)| (cls.clone(), Rule::from_regex(re))) - .collect(); - for event in events.drain(..) { - classified_events.push(classify_one(event, &rules_new)); - } - return classified_events; -} - -/// First tags and then selects the deepest matching tag as category (by counting number of "->" in name) -fn classify_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { - let mut tags: HashSet = HashSet::new(); - for (cls, rule) in rules { - if rule.matches(&event) { - tags.insert(cls.clone()); +impl From for Rule { + fn from(re: Regex) -> Self { + Self { + regex: Some(re.clone()), } } - - // An event can have many tags - event.data.insert("$tags".into(), serde_json::json!(tags)); - - // An event can only have one category, although the category may have a hierarchy, - // for instance: "Work -> ActivityWatch -> aw-server-rust" - // A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) - let category = _choose_category(&tags); - event - .data - .insert("$category".into(), serde_json::json!(category)); - event } /// Categorizes a list of events @@ -77,8 +44,7 @@ fn classify_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { /// An event can only have one category, although the category may have a hierarchy, /// for instance: "Work -> ActivityWatch -> aw-server-rust" /// A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) -// TODO: Classes should be &Vec<(String, Rule)> -pub fn categorize(mut events: Vec, rules: &Vec<(Vec, Regex)>) -> Vec { +pub fn categorize(mut events: Vec, rules: &Vec<(Vec, Rule)>) -> Vec { let mut classified_events = Vec::new(); for event in events.drain(..) { classified_events.push(categorize_one(event, rules)); @@ -86,33 +52,36 @@ pub fn categorize(mut events: Vec, rules: &Vec<(Vec, Regex)>) -> return classified_events; } -// TODO: Classes should be &Vec<(String, Rule)> -pub fn autotag(mut events: Vec, rules: &Vec<(String, Regex)>) -> Vec { +fn categorize_one(mut event: Event, rules: &Vec<(Vec, Rule)>) -> Event { + let mut category: Vec = vec!["Uncategorized".into()]; + for (cat, rule) in rules { + if rule.matches(&event) { + category = _pick_highest_ranking_category(category, &cat); + } + } + event + .data + .insert("$category".into(), serde_json::json!(category)); + return event; +} + +pub fn autotag(mut events: Vec, rules: &Vec<(String, Rule)>) -> Vec { let mut events_tagged = Vec::new(); - let new_rules: Vec<(String, Rule)> = rules - .iter() - .map(|(cls, re)| (cls.clone(), Rule::from_regex(re))) - .collect(); for event in events.drain(..) { - events_tagged.push(classify_one(event, &new_rules)); + events_tagged.push(autotag_one(event, &rules)); } return events_tagged; } -fn categorize_one(mut event: Event, categories: &Vec<(Vec, Regex)>) -> Event { - let mut category: String = "Uncategorized".into(); - for (cat, re) in categories { - if _match(&event, &re) { - // TODO: This shouldn't be cat.join("->"), but if we do end up deciding on this API it'll be easy - // to remove. - category = _pick_highest_ranking_category(category, &cat.join("->")); +fn autotag_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { + let mut tags: HashSet = HashSet::new(); + for (cls, rule) in rules { + if rule.matches(&event) { + tags.insert(cls.clone()); } } - event.data.insert( - "$category".into(), - serde_json::json!(_cat_format_to_vec(category)), - ); - return event; + event.data.insert("$tags".into(), serde_json::json!(tags)); + event } fn _match(event: &Event, re: &Regex) -> bool { @@ -124,8 +93,8 @@ fn _match(event: &Event, re: &Regex) -> bool { return false; } -fn _pick_highest_ranking_category(acc: String, item: &String) -> String { - if item.matches("->").count() >= acc.matches("->").count() { +fn _pick_highest_ranking_category(acc: Vec, item: &Vec) -> Vec { + if item.len() >= acc.len() { // If tag is category with greater or equal depth than current, then choose the new one instead. item.clone() } else { @@ -133,14 +102,6 @@ fn _pick_highest_ranking_category(acc: String, item: &String) -> String { } } -fn _choose_category(tags: &HashSet) -> String { - tags.iter() - .fold("Uncategorized".to_string(), |acc, item| { - return _pick_highest_ranking_category(acc, &item); - }) - .clone() -} - fn _cat_format_to_vec(cat: String) -> Vec { cat.split("->").map(|s| s.trim().into()).collect() } @@ -156,15 +117,21 @@ fn test_categorize() { .insert("test".into(), serde_json::json!("just a test")); let mut events = vec![e]; - let cats: Vec<(Vec, Regex)> = vec![ - (vec!["Test".into()], Regex::new(r"test").unwrap()), + let rules: Vec<(Vec, Rule)> = vec![ + ( + vec!["Test".into()], + Rule::from(Regex::new(r"test").unwrap()), + ), ( vec!["Test".into(), "Subtest".into()], - Regex::new(r"test").unwrap(), + Rule::from(Regex::new(r"test").unwrap()), + ), + ( + vec!["Other".into()], + Rule::from(Regex::new(r"nonmatching").unwrap()), ), - (vec!["Other".into()], Regex::new(r"nonmatching").unwrap()), ]; - events = categorize(events, &cats); + events = categorize(events, &rules); assert_eq!(events.len(), 1); assert_eq!( @@ -174,48 +141,42 @@ fn test_categorize() { } #[test] -fn test_autotag() { +fn test_categorize_uncategorized() { + // Checks that the category correctly becomes uncategorized when no category matches let mut e = Event::default(); e.data .insert("test".into(), serde_json::json!("just a test")); let mut events = vec![e]; - let rules: Vec<(String, Regex)> = vec![ - ("test".into(), Regex::new(r"test").unwrap()), - ("test-2".into(), Regex::new(r"test").unwrap()), - ("nonmatching".into(), Regex::new(r"nonmatching").unwrap()), - ]; - events = classify(events, &rules); + let rules: Vec<(Vec, Rule)> = vec![( + vec!["Non-matching".into(), "test".into()], + Rule::from(Regex::new(r"not going to match").unwrap()), + )]; + events = categorize(events, &rules); assert_eq!(events.len(), 1); assert_eq!( - events - .first() - .unwrap() - .data - .get("$tags") - .unwrap() - .as_array() - .unwrap() - .len(), - 2 + events.first().unwrap().data.get("$category").unwrap(), + &serde_json::json!(vec!["Uncategorized"]) ); } #[test] -fn test_classify() { +fn test_autotag() { let mut e = Event::default(); e.data .insert("test".into(), serde_json::json!("just a test")); let mut events = vec![e]; - let classes: Vec<(String, Regex)> = vec![ - ("#test-tag".into(), Regex::new(r"test").unwrap()), - ("Test".into(), Regex::new(r"test").unwrap()), - ("Test -> Subtest".into(), Regex::new(r"test").unwrap()), - ("Other".into(), Regex::new(r"nonmatching").unwrap()), + let rules: Vec<(String, Rule)> = vec![ + ("test".into(), Rule::from(Regex::new(r"test").unwrap())), + ("test-2".into(), Rule::from(Regex::new(r"test").unwrap())), + ( + "nomatch".into(), + Rule::from(Regex::new(r"nomatch").unwrap()), + ), ]; - events = classify(events, &classes); + events = autotag(events, &rules); assert_eq!(events.len(), 1); assert_eq!( @@ -228,31 +189,6 @@ fn test_classify() { .as_array() .unwrap() .len(), - 3 - ); - assert_eq!( - events.first().unwrap().data.get("$category").unwrap(), - &serde_json::json!("Test -> Subtest") - ); -} - -#[test] -fn test_classify_uncategorized() { - // Checks that the category correctly becomes uncategorized when no category matches - let mut e = Event::default(); - e.data - .insert("test".into(), serde_json::json!("just a test")); - - let mut events = vec![e]; - let classes: Vec<(String, Regex)> = vec![( - "Non-matching -> Test".into(), - Regex::new(r"not going to match").unwrap(), - )]; - events = classify(events, &classes); - - assert_eq!(events.len(), 1); - assert_eq!( - events.first().unwrap().data.get("$category").unwrap(), - &serde_json::json!("Uncategorized") + 2 ); } diff --git a/tests/query.rs b/tests/query.rs index e91681d3..841c10f6 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -293,14 +293,15 @@ mod query_tests { let code = String::from("query_bucket(\"testid\");"); query::query(&code, &interval, &ds).unwrap(); - let code = format!(r#" + let code = format!( + r#" events = query_bucket("{}"); events = flood(events); events = sort_by_duration(events); events = limit_events(events, 10000); events = sort_by_timestamp(events); events = concat(events, query_bucket("{}")); - events = classify(events, [["test", "\#test-tag"], ["just", "Test -> Testing"]]); + events = categorize(events, [[["test"], "test$"], [["test", "testing"], "test-pat$"]]); total_duration = sum_durations(events); bucketnames = query_bucket_names(); print("test", "test2"); @@ -326,14 +327,16 @@ mod query_tests { let code = String::from("query_bucket(\"testid\");"); query::query(&code, &interval, &ds).unwrap(); - let code = format!(r#" + let code = format!( + r#" events = query_bucket("{}"); - events = classify(events, [["test-tag", "^value$"], ["Test -> Subtest", "^value$"]]); + events = categorize(events, [[["Test", "Subtest"], "^value$"]]); RETURN = events;"#, - "testid"); + "testid" + ); let events = match query::query(&code, &interval, &ds).unwrap() { query::DataType::List(l) => l, - ref data => panic!("Wrong datatype, {:?}", data) + ref data => panic!("Wrong datatype, {:?}", data), }; println!("{:?}", events.first().unwrap()); From 85404307cf83d65c07043609718addb617c44db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Fri, 4 Oct 2019 21:53:05 +0200 Subject: [PATCH 12/16] added proper/extended parsing of category rules --- src/query/functions.rs | 24 ++++++++++++++++++------ src/transform/classify.rs | 9 +++++++++ tests/query.rs | 2 ++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/query/functions.rs b/src/query/functions.rs index 40af5110..84ac0200 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -26,15 +26,11 @@ pub fn fill_env<'a>(env: &mut HashMap<&'a str, DataType>) { mod qfunctions { use std::collections::HashMap; - use regex::Regex; - use crate::query::DataType; use crate::query::QueryError; use crate::datastore::Datastore; use crate::transform; use super::validate; - use crate::transform::classify::Rule; - pub fn print(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { for arg in args { @@ -315,8 +311,24 @@ mod validate { for list in tagged_lists.drain(..) { match list { DataType::List(ref l) => { - let regex: Regex = Regex::new(arg_type_string(l.get(1).unwrap())?).unwrap(); - lists.push((arg_type_string_list(l.get(0).unwrap())?.clone(), Rule::from(regex))); + let category = arg_type_string_list(l.get(0).unwrap())?.clone(); + //let regex: Regex = Regex::new(arg_type_string(l.get(1).unwrap())?).unwrap(); + let rulemap: HashMap = (match l.get(1).unwrap() { + DataType::Dict(d) => { + let map: HashMap = d.iter().map(|(k, v)| { (k.clone(), arg_type_string(v).unwrap().clone()) }).collect(); + Ok(map) + }, + DataType::String(s) => { + let regex_str: String = s.clone(); + let tuple: Vec<(String, String)> = vec![("regex".into(), regex_str)]; + let map: HashMap = tuple.iter().cloned().collect(); + Ok(map) + } + _ => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of ruleset, found something else") + )) + })?; + lists.push((category, Rule::from(rulemap))); }, ref invalid_type => return Err(QueryError::InvalidFunctionParameters( format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) diff --git a/src/transform/classify.rs b/src/transform/classify.rs index f545b8ac..14c9a117 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -2,6 +2,7 @@ /// /// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py use std::collections::HashSet; +use std::collections::HashMap; use crate::models::Event; use regex::Regex; @@ -39,6 +40,14 @@ impl From for Rule { } } +impl From> for Rule { + fn from(obj: HashMap) -> Self { + Self { + regex: Some(Regex::new(obj.get("regex").unwrap()).unwrap()), + } + } +} + /// Categorizes a list of events /// /// An event can only have one category, although the category may have a hierarchy, diff --git a/tests/query.rs b/tests/query.rs index 841c10f6..82fd2969 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -331,6 +331,8 @@ mod query_tests { r#" events = query_bucket("{}"); events = categorize(events, [[["Test", "Subtest"], "^value$"]]); + events = categorize(events, [[["Test", "Subtest"], {{ "regex": "^value$" }}]]); + test = {{}}; RETURN = events;"#, "testid" ); From eefda1510351298b26bce5303298db9c966e8a27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Mon, 7 Oct 2019 13:34:06 +0200 Subject: [PATCH 13/16] made tag function fully functional in query2, changed to using TryFrom trait for conversion into/from query2 datatypes --- src/query/functions.rs | 116 ++++++++++---------------------- src/query/mod.rs | 138 ++++++++++++++++++++++++++++++++++++++ src/transform/classify.rs | 49 ++++++-------- tests/query.rs | 22 +++--- 4 files changed, 203 insertions(+), 122 deletions(-) diff --git a/src/query/functions.rs b/src/query/functions.rs index 84ac0200..f538b68f 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -22,13 +22,17 @@ pub fn fill_env<'a>(env: &mut HashMap<&'a str, DataType>) { env.insert("split_url_events", DataType::Function("split_url_events".to_string(), qfunctions::split_url_events)); env.insert("concat", DataType::Function("concat".to_string(), qfunctions::concat)); env.insert("categorize", DataType::Function("categorize".into(), qfunctions::categorize)); + env.insert("tag", DataType::Function("tag".into(), qfunctions::tag)); } mod qfunctions { + use std::convert::TryFrom; use std::collections::HashMap; + use crate::transform::classify::Rule; use crate::query::DataType; use crate::query::QueryError; use crate::datastore::Datastore; + use crate::models::Event; use crate::transform; use super::validate; @@ -45,7 +49,7 @@ mod qfunctions { let bucket_id = validate::arg_type_string(&args[0])?; let interval = validate::get_timeinterval (env)?; - let events = match ds.get_events(bucket_id, Some(interval.start().clone()), Some(interval.end().clone()), None) { + let events = match ds.get_events(bucket_id.as_str(), Some(interval.start().clone()), Some(interval.end().clone()), None) { Ok(events) => events, Err(e) => return Err(QueryError::BucketQueryError(format!("Failed to query bucket: {:?}", e))) }; @@ -108,8 +112,8 @@ mod qfunctions { ) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?.clone(); - let rules = validate::arg_type_list_of_category_rules(&args[1])?; + let events: Vec = Vec::try_from(&args[0])?; + let rules: Vec<(Vec, Rule)> = Vec::try_from(&args[1])?; // Run categorize let mut flooded_events = transform::classify::categorize(events, &rules); // Put events back into DataType::Event container @@ -120,6 +124,25 @@ mod qfunctions { return Ok(DataType::List(tagged_flooded_events)); } + pub fn tag( + args: Vec, + _env: &HashMap<&str, DataType>, + _ds: &Datastore, + ) -> Result { + // typecheck + validate::args_length(&args, 2)?; + let events: Vec = Vec::try_from(&args[0])?; + let rules: Vec<(String, Rule)> = Vec::try_from(&args[1])?; + // Run categorize + let mut flooded_events = transform::classify::tag(events, &rules); + // Put events back into DataType::Event container + let mut tagged_flooded_events = Vec::new(); + for event in flooded_events.drain(..) { + tagged_flooded_events.push(DataType::Event(event)); + } + return Ok(DataType::List(tagged_flooded_events)); + } + pub fn sort_by_duration(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; @@ -263,9 +286,8 @@ mod validate { use crate::query::{QueryError, DataType}; use crate::models::Event; use crate::models::TimeInterval; - use crate::transform::classify::Rule; - use regex::Regex; use std::collections::HashMap; + use std::convert::TryFrom; pub fn args_length(args: &Vec, len: usize) -> Result<(), QueryError> { if args.len() != len { @@ -276,94 +298,24 @@ mod validate { return Ok(()); } - pub fn arg_type_string (arg: &DataType) -> Result<&String, QueryError> { - match arg { - DataType::String(ref s) => Ok(s), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type String, got {:?}", invalid_type) - )) - } + pub fn arg_type_string (arg: &DataType) -> Result { + String::try_from(arg) } pub fn arg_type_number (arg: &DataType) -> Result { - match arg { - DataType::Number(f) => Ok(*f), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type Number, got {:?}", invalid_type) - )) - } + f64::try_from(arg) } - pub fn arg_type_list (arg: &DataType) -> Result<&Vec, QueryError> { - match arg { - DataType::List(ref s) => Ok(s), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List, got {:?}", invalid_type) - )) - } - } - - pub fn arg_type_list_of_category_rules( - arg: &DataType, - ) -> Result, Rule)>, QueryError> { - let mut tagged_lists = arg_type_list(arg)?.clone(); - let mut lists: Vec<(Vec, Rule)> = Vec::new(); - for list in tagged_lists.drain(..) { - match list { - DataType::List(ref l) => { - let category = arg_type_string_list(l.get(0).unwrap())?.clone(); - //let regex: Regex = Regex::new(arg_type_string(l.get(1).unwrap())?).unwrap(); - let rulemap: HashMap = (match l.get(1).unwrap() { - DataType::Dict(d) => { - let map: HashMap = d.iter().map(|(k, v)| { (k.clone(), arg_type_string(v).unwrap().clone()) }).collect(); - Ok(map) - }, - DataType::String(s) => { - let regex_str: String = s.clone(); - let tuple: Vec<(String, String)> = vec![("regex".into(), regex_str)]; - let map: HashMap = tuple.iter().cloned().collect(); - Ok(map) - } - _ => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of ruleset, found something else") - )) - })?; - lists.push((category, Rule::from(rulemap))); - }, - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) - )) - } - } - return Ok(lists); + pub fn arg_type_list (arg: &DataType) -> Result, QueryError> { + Vec::try_from(arg) } pub fn arg_type_event_list (arg: &DataType) -> Result, QueryError> { - let mut tagged_events = arg_type_list(arg)?.clone(); - let mut events = Vec::new(); - for event in tagged_events.drain(..) { - match event { - DataType::Event(e) => events.push(e.clone()), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List of Events, list contains {:?}", invalid_type) - )) - } - } - return Ok(events); + Vec::try_from(arg) } pub fn arg_type_string_list (arg: &DataType) -> Result, QueryError> { - let mut tagged_strings = arg_type_list(arg)?.clone(); - let mut strings = Vec::new(); - for string in tagged_strings.drain(..) { - match string { - DataType::String(s) => strings.push(s.clone()), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List of Strings, list contains {:?}", invalid_type) - )) - } - } - return Ok(strings); + Vec::try_from(arg) } use serde_json::value::Value; diff --git a/src/query/mod.rs b/src/query/mod.rs index 8bc8424b..f00450cb 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -108,6 +108,144 @@ impl PartialEq for DataType { } } +use std::convert::TryFrom; + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::List(ref s) => Ok(s.clone()), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List, got {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for String { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::String(s) => Ok(s.clone()), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List of Strings, list contains {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_strings: Vec = Vec::try_from(value)?; + let mut strings = Vec::new(); + for string in tagged_strings.drain(..) { + let s: String = String::try_from(&string)?; + strings.push(s); + } + return Ok(strings); + } +} + +impl TryFrom<&DataType> for Rule { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let rulemap: HashMap = (match value { + DataType::Dict(d) => { + let map: HashMap = d.iter().map(|(k, v)| { + let s: String = String::try_from(v).unwrap(); + (k.clone(), s.clone()) + }).collect(); + Ok(map) + }, + DataType::String(s) => { + let regex_str: String = s.clone(); + let tuple: Vec<(String, String)> = vec![("regex".into(), regex_str)]; + let map: HashMap = tuple.iter().cloned().collect(); + Ok(map) + } + _ => Err(QueryError::InvalidFunctionParameters( + format!("Expected rule, found something else") + )) + })?; + Ok(Rule::from(rulemap)) + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_events = Vec::try_from(value)?; + let mut events = Vec::new(); + for event in tagged_events.drain(..) { + match event { + DataType::Event(e) => events.push(e.clone()), + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List of Events, list contains {:?}", invalid_type) + )) + } + } + return Ok(events); + } +} + +use crate::transform::classify::Rule; + +impl TryFrom<&DataType> for Vec<(String, Rule)> { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_lists: Vec = Vec::try_from(value)?; + let mut lists: Vec<(String, Rule)> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(ref l) => { + let category: String = String::try_from(l.get(0).unwrap())?; + let rule = Rule::try_from(l.get(1).unwrap())?; + lists.push((category, rule)); + }, + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } +} + +impl TryFrom<&DataType> for Vec<(Vec, Rule)> { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_lists: Vec = Vec::try_from(value)?; + let mut lists: Vec<(Vec, Rule)> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(ref l) => { + let category: Vec = Vec::try_from(l.get(0).unwrap())?; + let rule = Rule::try_from(l.get(1).unwrap())?; + lists.push((category, rule)); + }, + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } +} + + +impl TryFrom<&DataType> for f64 { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::Number(f) => Ok(*f), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type Number, got {:?}", invalid_type) + )) + } + } +} + mod lexer { use plex::lexer; diff --git a/src/transform/classify.rs b/src/transform/classify.rs index 14c9a117..62b18066 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -1,8 +1,8 @@ +use std::collections::HashMap; /// Transforms for classifying (tagging and categorizing) events. /// /// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py use std::collections::HashSet; -use std::collections::HashMap; use crate::models::Event; use regex::Regex; @@ -52,7 +52,7 @@ impl From> for Rule { /// /// An event can only have one category, although the category may have a hierarchy, /// for instance: "Work -> ActivityWatch -> aw-server-rust" -/// A category is chosed out of the tags used some rule (such as picking the one that's deepest in the hierarchy) +/// If multiple categories match, the deepest one will be chosen. pub fn categorize(mut events: Vec, rules: &Vec<(Vec, Rule)>) -> Vec { let mut classified_events = Vec::new(); for event in events.drain(..) { @@ -74,34 +74,31 @@ fn categorize_one(mut event: Event, rules: &Vec<(Vec, Rule)>) -> Event { return event; } -pub fn autotag(mut events: Vec, rules: &Vec<(String, Rule)>) -> Vec { +/// Tags a list of events +/// +/// An event can have many tags (as opposed to only one category) which will be put into the `$tags` key of +/// the event data object. +pub fn tag(mut events: Vec, rules: &Vec<(String, Rule)>) -> Vec { let mut events_tagged = Vec::new(); for event in events.drain(..) { - events_tagged.push(autotag_one(event, &rules)); + events_tagged.push(tag_one(event, &rules)); } return events_tagged; } -fn autotag_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { - let mut tags: HashSet = HashSet::new(); +fn tag_one(mut event: Event, rules: &Vec<(String, Rule)>) -> Event { + let mut tags: Vec = Vec::new(); for (cls, rule) in rules { if rule.matches(&event) { - tags.insert(cls.clone()); + tags.push(cls.clone()); } } + tags.sort_unstable(); + tags.dedup(); event.data.insert("$tags".into(), serde_json::json!(tags)); event } -fn _match(event: &Event, re: &Regex) -> bool { - for val in event.data.values() { - if val.is_string() && re.is_match(val.as_str().unwrap()) { - return true; - } - } - return false; -} - fn _pick_highest_ranking_category(acc: Vec, item: &Vec) -> Vec { if item.len() >= acc.len() { // If tag is category with greater or equal depth than current, then choose the new one instead. @@ -171,7 +168,7 @@ fn test_categorize_uncategorized() { } #[test] -fn test_autotag() { +fn test_tag() { let mut e = Event::default(); e.data .insert("test".into(), serde_json::json!("just a test")); @@ -185,19 +182,11 @@ fn test_autotag() { Rule::from(Regex::new(r"nomatch").unwrap()), ), ]; - events = autotag(events, &rules); + events = tag(events, &rules); assert_eq!(events.len(), 1); - assert_eq!( - events - .first() - .unwrap() - .data - .get("$tags") - .unwrap() - .as_array() - .unwrap() - .len(), - 2 - ); + + let event = events.first().unwrap(); + let tags = event.data.get("$tags").unwrap(); + assert_eq!(tags, &serde_json::json!(vec!["test", "test-2"])); } diff --git a/tests/query.rs b/tests/query.rs index 82fd2969..2c70acd3 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -9,6 +9,7 @@ mod query_tests { use chrono; use chrono::Duration; use serde_json::json; + use std::convert::TryFrom; use aw_server::query; use aw_server::query::QueryError; @@ -301,7 +302,8 @@ mod query_tests { events = limit_events(events, 10000); events = sort_by_timestamp(events); events = concat(events, query_bucket("{}")); - events = categorize(events, [[["test"], "test$"], [["test", "testing"], "test-pat$"]]); + events = categorize(events, [[["test"], "value$"], [["test", "testing"], "value$"]]); + events = tag(events, [["testtag", "test$"], ["another testtag", "test-pat$"]]); total_duration = sum_durations(events); bucketnames = query_bucket_names(); print("test", "test2"); @@ -332,19 +334,19 @@ mod query_tests { events = query_bucket("{}"); events = categorize(events, [[["Test", "Subtest"], "^value$"]]); events = categorize(events, [[["Test", "Subtest"], {{ "regex": "^value$" }}]]); + events = tag(events, [["testtag", "value$"], ["another testtag", "value$"]]); test = {{}}; RETURN = events;"#, "testid" ); - let events = match query::query(&code, &interval, &ds).unwrap() { - query::DataType::List(l) => l, - ref data => panic!("Wrong datatype, {:?}", data), - }; - - println!("{:?}", events.first().unwrap()); - // TODO: assert_eq result - //assert_eq!(events.first().unwrap().data.get("$tags").unwrap().len(), 2); - //assert_eq!(events.first().unwrap().data.get("$category").unwrap(), "Test -> Subtest"); + let result: DataType = query::query(&code, &interval, &ds).unwrap(); + let events: Vec = Vec::try_from(&result).unwrap(); + + let event = events.first().unwrap(); + let tags = event.data.get("$tags").unwrap().as_array().unwrap(); + let cats = event.data.get("$category").unwrap(); + assert_eq!(tags.len(), 2); + assert_eq!(cats, &serde_json::json!(vec!["Test", "Subtest"])); } #[test] From 73b52bf6d5125d61b84333f0a0b62bdfbeb7caa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Mon, 7 Oct 2019 14:26:49 +0200 Subject: [PATCH 14/16] moved all DataType-related things to seperate file, removed unused imports, removed arg_type_x functions --- src/query/datatype.rs | 253 ++++++++++++++++++++++++++++++++++++++ src/query/functions.rs | 78 +++--------- src/query/mod.rs | 226 +--------------------------------- src/transform/classify.rs | 3 +- tests/sync.rs | 2 +- 5 files changed, 279 insertions(+), 283 deletions(-) create mode 100644 src/query/datatype.rs diff --git a/src/query/datatype.rs b/src/query/datatype.rs new file mode 100644 index 00000000..d6c2dca8 --- /dev/null +++ b/src/query/datatype.rs @@ -0,0 +1,253 @@ +use std::collections::HashMap; +use std::fmt; +use std::convert::{TryFrom,TryInto}; + +use crate::models::Event; +use crate::transform::classify::Rule; +use super::QueryError; +use super::functions; + +use serde::Serializer; +use serde_json::value::Value; +use serde_json::Number; + + +// TODO: greater/less comparisons + +#[derive(Clone,Serialize)] +#[serde(untagged)] +pub enum DataType { + None(), + Bool(bool), + Number(f64), + String(String), + Event(Event), + List(Vec), + Dict(HashMap), + #[serde(serialize_with = "serialize_function")] + Function(String, functions::QueryFn), +} + +fn serialize_function(_element: &String, _fun: &functions::QueryFn, _serializer: S) -> Result + where S: Serializer +{ + panic!("Query function was unevaluated and was attempted to be serialized, panic!"); + //element.id.serialize(serializer) +} + + +// Needed because of a limitation in rust where you cannot derive(Debug) on a +// enum which has a fn with reference parameters which our QueryFn has +// https://stackoverflow.com/questions/53380040/function-pointer-with-a-reference-argument-cannot-derive-debug +impl fmt::Debug for DataType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DataType::None() => write!(f, "None()"), + DataType::Bool(b) => write!(f, "Bool({})", b), + DataType::Number(n) => write!(f, "Number({})", n), + DataType::String(s) => write!(f, "String({})", s), + DataType::Event(e) => write!(f, "Event({:?})", e), + DataType::List(l) => write!(f, "List({:?})", l), + DataType::Dict(d) => write!(f, "Dict({:?})", d), + DataType::Function(name, _fun) => write!(f, "Function({})", name), + } + } +} + +/* Like eq, but raises an error when comparing between different types. + * Should be used as often as possible */ +impl DataType { + pub fn query_eq(&self, other: &DataType) -> Result { + match (self, other) { + // TODO: Comparisons of bool == num, bool == str + (DataType::None(), DataType::None()) => Ok(false), + (DataType::Bool(b1), DataType::Bool(b2)) => Ok(b1 == b2), + (DataType::Number(n1), DataType::Number(n2)) => Ok(n1 == n2), + (DataType::String(s1), DataType::String(s2)) => Ok(s1 == s2), + (DataType::Event(e1), DataType::Event(e2)) => Ok(e1 == e2), + (DataType::List(l1), DataType::List(l2)) => Ok(l1 == l2), + (DataType::Dict(d1), DataType::Dict(d2)) => Ok(d1 == d2), + // We do not care about comparing functions + _ => Err(QueryError::InvalidType(format!("Cannot compare values of different types {:?} and {:?}", self, other))), + } + } +} + +/* Required for query_eq when comparing two dicts */ +impl PartialEq for DataType { + fn eq(&self, other: &DataType) -> bool { + match (self, other) { + (DataType::None(), DataType::None()) => true, + // TODO: Comparisons of bool == num, bool == str + (DataType::Bool(b1), DataType::Bool(b2)) => b1 == b2, + (DataType::Number(n1), DataType::Number(n2)) => n1 == n2, + (DataType::String(s1), DataType::String(s2)) => s1 == s2, + (DataType::Event(e1), DataType::Event(e2)) => e1 == e2, + (DataType::List(l1), DataType::List(l2)) => l1 == l2, + (DataType::Dict(d1), DataType::Dict(d2)) => d1 == d2, + // We do not care about comparing functions + _ => false + } + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::List(ref s) => Ok(s.clone()), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List, got {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for String { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::String(s) => Ok(s.clone()), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List of Strings, list contains {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_strings: Vec = Vec::try_from(value)?; + let mut strings = Vec::new(); + for string in tagged_strings.drain(..) { + let s: String = String::try_from(&string)?; + strings.push(s); + } + return Ok(strings); + } +} + +impl TryFrom<&DataType> for Rule { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let rulemap: HashMap = (match value { + DataType::Dict(d) => { + let map: HashMap = d.iter().map(|(k, v)| { + let s: String = String::try_from(v).unwrap(); + (k.clone(), s.clone()) + }).collect(); + Ok(map) + }, + DataType::String(s) => { + let regex_str: String = s.clone(); + let tuple: Vec<(String, String)> = vec![("regex".into(), regex_str)]; + let map: HashMap = tuple.iter().cloned().collect(); + Ok(map) + } + _ => Err(QueryError::InvalidFunctionParameters( + format!("Expected rule, found something else") + )) + })?; + Ok(Rule::from(rulemap)) + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_events = Vec::try_from(value)?; + let mut events = Vec::new(); + for event in tagged_events.drain(..) { + match event { + DataType::Event(e) => events.push(e.clone()), + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type List of Events, list contains {:?}", invalid_type) + )) + } + } + return Ok(events); + } +} + +impl TryFrom<&DataType> for Vec<(String, Rule)> { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_lists: Vec = Vec::try_from(value)?; + let mut lists: Vec<(String, Rule)> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(ref l) => { + let category: String = String::try_from(l.get(0).unwrap())?; + let rule = Rule::try_from(l.get(1).unwrap())?; + lists.push((category, rule)); + }, + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } +} + +impl TryFrom<&DataType> for Vec<(Vec, Rule)> { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_lists: Vec = Vec::try_from(value)?; + let mut lists: Vec<(Vec, Rule)> = Vec::new(); + for list in tagged_lists.drain(..) { + match list { + DataType::List(ref l) => { + let category: Vec = Vec::try_from(l.get(0).unwrap())?; + let rule = Rule::try_from(l.get(1).unwrap())?; + lists.push((category, rule)); + }, + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) + )) + } + } + return Ok(lists); + } +} + +impl TryFrom<&DataType> for f64 { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + match value { + DataType::Number(f) => Ok(*f), + ref invalid_type => Err(QueryError::InvalidFunctionParameters( + format!("Expected function parameter of type Number, got {:?}", invalid_type) + )) + } + } +} + +impl TryFrom<&DataType> for usize { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let f: f64 = value.try_into()?; + Ok(f as usize) + } +} + +impl TryFrom<&DataType> for Vec { + type Error = QueryError; + fn try_from(value: &DataType) -> Result { + let mut tagged_strings: Vec = value.try_into()?; + let mut strings = Vec::new(); + for string in tagged_strings.drain(..) { + match string { + DataType::String(s) => strings.push(Value::String(s)), + DataType::Number(n) => strings.push(Value::Number(Number::from_f64(n).unwrap())), + //DataType::Bool(b) => strings.push(json!(b)), + DataType::None() => strings.push(Value::Null), + ref invalid_type => return Err(QueryError::InvalidFunctionParameters( + format!("Query2 support for parsing values is limited and only supports strings, numbers and null, list contains {:?}", invalid_type) + )) + } + } + return Ok(strings); + } +} diff --git a/src/query/functions.rs b/src/query/functions.rs index f538b68f..d0e75ad6 100644 --- a/src/query/functions.rs +++ b/src/query/functions.rs @@ -27,6 +27,7 @@ pub fn fill_env<'a>(env: &mut HashMap<&'a str, DataType>) { mod qfunctions { use std::convert::TryFrom; + use std::convert::TryInto; use std::collections::HashMap; use crate::transform::classify::Rule; use crate::query::DataType; @@ -46,7 +47,7 @@ mod qfunctions { pub fn query_bucket(args: Vec, env: &HashMap<&str, DataType>, ds: &Datastore) -> Result { // Typecheck validate::args_length(&args, 1)?; - let bucket_id = validate::arg_type_string(&args[0])?; + let bucket_id: String = (&args[0]).try_into()?; let interval = validate::get_timeinterval (env)?; let events = match ds.get_events(bucket_id.as_str(), Some(interval.start().clone()), Some(interval.end().clone()), None) { @@ -94,7 +95,7 @@ mod qfunctions { pub fn flood(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let events = validate::arg_type_event_list(&args[0])?.clone(); + let events: Vec = (&args[0]).try_into()?; // Run flood let mut flooded_events = transform::flood(events, chrono::Duration::seconds(5)); // Put events back into DataType::Event container @@ -146,7 +147,7 @@ mod qfunctions { pub fn sort_by_duration(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let events = validate::arg_type_event_list(&args[0])?; + let events: Vec = (&args[0]).try_into()?; // Sort by duration let mut sorted_events = transform::sort_by_duration(events); @@ -161,8 +162,8 @@ mod qfunctions { pub fn limit_events(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let mut events = validate::arg_type_event_list(&args[0])?.clone(); - let mut limit = validate::arg_type_number(&args[1])? as usize; + let mut events: Vec = (&args[0]).try_into()?; + let mut limit: usize = (&args[1]).try_into()?; if events.len() < limit { limit = events.len() } let mut limited_tagged_events = Vec::new(); @@ -175,7 +176,7 @@ mod qfunctions { pub fn sort_by_timestamp(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let events = validate::arg_type_event_list(&args[0])?; + let events: Vec = (&args[0]).try_into()?; // Sort by duration let mut sorted_events = transform::sort_by_timestamp(events); @@ -190,7 +191,7 @@ mod qfunctions { pub fn sum_durations(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let mut events = validate::arg_type_event_list(&args[0])?.clone(); + let mut events: Vec = (&args[0]).try_into()?; // Sort by duration let mut sum_durations = chrono::Duration::zero(); @@ -203,8 +204,8 @@ mod qfunctions { pub fn merge_events_by_keys(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?; - let keys = validate::arg_type_string_list(&args[1])?; + let events: Vec = (&args[0]).try_into()?; + let keys: Vec = (&args[1]).try_into()?; let mut merged_events = transform::merge_events_by_keys(events, keys); let mut merged_tagged_events = Vec::new(); @@ -217,8 +218,8 @@ mod qfunctions { pub fn chunk_events_by_key(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?; - let key = validate::arg_type_string(&args[1])?; + let events: Vec = (&args[0]).try_into()?; + let key: String = (&args[1]).try_into()?; let mut merged_events = transform::chunk_events_by_key(events, &key); let mut merged_tagged_events = Vec::new(); @@ -231,9 +232,9 @@ mod qfunctions { pub fn filter_keyvals(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 3)?; - let events = validate::arg_type_event_list(&args[0])?; - let key = validate::arg_type_string(&args[1])?; - let vals = validate::arg_type_value_list(&args[2])?; + let events = (&args[0]).try_into()?; + let key: String = (&args[1]).try_into()?; + let vals: Vec<_> = (&args[2]).try_into()?; let mut filtered_events = transform::filter_keyvals(events, &key, &vals); let mut filtered_tagged_events = Vec::new(); @@ -246,8 +247,8 @@ mod qfunctions { pub fn filter_period_intersect(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 2)?; - let events = validate::arg_type_event_list(&args[0])?; - let filter_events = validate::arg_type_event_list(&args[1])?; + let events = (&args[0]).try_into()?; + let filter_events = (&args[1]).try_into()?; let mut filtered_events = transform::filter_period_intersect(&events, &filter_events); let mut filtered_tagged_events = Vec::new(); @@ -260,7 +261,7 @@ mod qfunctions { pub fn split_url_events(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { // typecheck validate::args_length(&args, 1)?; - let mut events = validate::arg_type_event_list(&args[0])?; + let mut events: Vec = (&args[0]).try_into()?; let mut tagged_split_url_events = Vec::new(); for mut event in events.drain(..) { @@ -273,7 +274,7 @@ mod qfunctions { pub fn concat(args: Vec, _env: &HashMap<&str, DataType>, _ds: &Datastore) -> Result { let mut event_list = Vec::new(); for arg in args { - let mut events = validate::arg_type_event_list(&arg)?; + let mut events: Vec = (&arg).try_into()?; for event in events.drain(..) { event_list.push(DataType::Event(event)); } @@ -284,10 +285,8 @@ mod qfunctions { mod validate { use crate::query::{QueryError, DataType}; - use crate::models::Event; use crate::models::TimeInterval; use std::collections::HashMap; - use std::convert::TryFrom; pub fn args_length(args: &Vec, len: usize) -> Result<(), QueryError> { if args.len() != len { @@ -298,45 +297,6 @@ mod validate { return Ok(()); } - pub fn arg_type_string (arg: &DataType) -> Result { - String::try_from(arg) - } - - pub fn arg_type_number (arg: &DataType) -> Result { - f64::try_from(arg) - } - - pub fn arg_type_list (arg: &DataType) -> Result, QueryError> { - Vec::try_from(arg) - } - - pub fn arg_type_event_list (arg: &DataType) -> Result, QueryError> { - Vec::try_from(arg) - } - - pub fn arg_type_string_list (arg: &DataType) -> Result, QueryError> { - Vec::try_from(arg) - } - - use serde_json::value::Value; - use serde_json::Number; - pub fn arg_type_value_list (arg: &DataType) -> Result, QueryError> { - let mut tagged_strings = arg_type_list(arg)?.clone(); - let mut strings = Vec::new(); - for string in tagged_strings.drain(..) { - match string { - DataType::String(s) => strings.push(Value::String(s)), - DataType::Number(n) => strings.push(Value::Number(Number::from_f64(n).unwrap())), - //DataType::Bool(b) => strings.push(json!(b)), - DataType::None() => strings.push(Value::Null), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Query2 support for parsing values is limited and only supports strings, numbers and null, list contains {:?}", invalid_type) - )) - } - } - return Ok(strings); - } - pub fn get_timeinterval (env: &HashMap<&str, DataType>) -> Result { let interval_str = match env.get("TIMEINTERVAL") { Some(data_ti) => match data_ti { diff --git a/src/query/mod.rs b/src/query/mod.rs index f00450cb..8182e02b 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -1,13 +1,14 @@ -use std::collections::HashMap; +use std::fmt; use crate::datastore::Datastore; -use crate::models::Event; use crate::models::TimeInterval; -use serde::Serializer; + +pub mod datatype; + +pub use crate::query::datatype::DataType; // TODO: add line numbers to errors // (works during lexing, but not during parsing I believe) -// TODO: greater/less comparisons #[derive(Debug)] pub enum QueryError { @@ -30,223 +31,6 @@ impl fmt::Display for QueryError { } } -#[derive(Clone,Serialize)] -#[serde(untagged)] -pub enum DataType { - None(), - Bool(bool), - Number(f64), - String(String), - Event(Event), - List(Vec), - Dict(HashMap), - #[serde(serialize_with = "serialize_function")] - Function(String, functions::QueryFn), -} - -fn serialize_function(_element: &String, _fun: &functions::QueryFn, _serializer: S) -> Result - where S: Serializer -{ - panic!("Query function was unevaluated and was attempted to be serialized, panic!"); - //element.id.serialize(serializer) -} - -use std::fmt; - -// Needed because of a limitation in rust where you cannot derive(Debug) on a -// enum which has a fn with reference parameters which our QueryFn has -// https://stackoverflow.com/questions/53380040/function-pointer-with-a-reference-argument-cannot-derive-debug -impl fmt::Debug for DataType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DataType::None() => write!(f, "None()"), - DataType::Bool(b) => write!(f, "Bool({})", b), - DataType::Number(n) => write!(f, "Number({})", n), - DataType::String(s) => write!(f, "String({})", s), - DataType::Event(e) => write!(f, "Event({:?})", e), - DataType::List(l) => write!(f, "List({:?})", l), - DataType::Dict(d) => write!(f, "Dict({:?})", d), - DataType::Function(name, _fun) => write!(f, "Function({})", name), - } - } -} - -/* Like eq, but raises an error when comparing between different types. - * Should be used as often as possible */ -impl DataType { - fn query_eq(&self, other: &DataType) -> Result { - match (self, other) { - // TODO: Comparisons of bool == num, bool == str - (DataType::None(), DataType::None()) => Ok(false), - (DataType::Bool(b1), DataType::Bool(b2)) => Ok(b1 == b2), - (DataType::Number(n1), DataType::Number(n2)) => Ok(n1 == n2), - (DataType::String(s1), DataType::String(s2)) => Ok(s1 == s2), - (DataType::Event(e1), DataType::Event(e2)) => Ok(e1 == e2), - (DataType::List(l1), DataType::List(l2)) => Ok(l1 == l2), - (DataType::Dict(d1), DataType::Dict(d2)) => Ok(d1 == d2), - // We do not care about comparing functions - _ => Err(QueryError::InvalidType(format!("Cannot compare values of different types {:?} and {:?}", self, other))), - } - } -} - -/* Required for query_eq when comparing two dicts */ -impl PartialEq for DataType { - fn eq(&self, other: &DataType) -> bool { - match (self, other) { - (DataType::None(), DataType::None()) => true, - // TODO: Comparisons of bool == num, bool == str - (DataType::Bool(b1), DataType::Bool(b2)) => b1 == b2, - (DataType::Number(n1), DataType::Number(n2)) => n1 == n2, - (DataType::String(s1), DataType::String(s2)) => s1 == s2, - (DataType::Event(e1), DataType::Event(e2)) => e1 == e2, - (DataType::List(l1), DataType::List(l2)) => l1 == l2, - (DataType::Dict(d1), DataType::Dict(d2)) => d1 == d2, - // We do not care about comparing functions - _ => false - } - } -} - -use std::convert::TryFrom; - -impl TryFrom<&DataType> for Vec { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - match value { - DataType::List(ref s) => Ok(s.clone()), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List, got {:?}", invalid_type) - )) - } - } -} - -impl TryFrom<&DataType> for String { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - match value { - DataType::String(s) => Ok(s.clone()), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List of Strings, list contains {:?}", invalid_type) - )) - } - } -} - -impl TryFrom<&DataType> for Vec { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - let mut tagged_strings: Vec = Vec::try_from(value)?; - let mut strings = Vec::new(); - for string in tagged_strings.drain(..) { - let s: String = String::try_from(&string)?; - strings.push(s); - } - return Ok(strings); - } -} - -impl TryFrom<&DataType> for Rule { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - let rulemap: HashMap = (match value { - DataType::Dict(d) => { - let map: HashMap = d.iter().map(|(k, v)| { - let s: String = String::try_from(v).unwrap(); - (k.clone(), s.clone()) - }).collect(); - Ok(map) - }, - DataType::String(s) => { - let regex_str: String = s.clone(); - let tuple: Vec<(String, String)> = vec![("regex".into(), regex_str)]; - let map: HashMap = tuple.iter().cloned().collect(); - Ok(map) - } - _ => Err(QueryError::InvalidFunctionParameters( - format!("Expected rule, found something else") - )) - })?; - Ok(Rule::from(rulemap)) - } -} - -impl TryFrom<&DataType> for Vec { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - let mut tagged_events = Vec::try_from(value)?; - let mut events = Vec::new(); - for event in tagged_events.drain(..) { - match event { - DataType::Event(e) => events.push(e.clone()), - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type List of Events, list contains {:?}", invalid_type) - )) - } - } - return Ok(events); - } -} - -use crate::transform::classify::Rule; - -impl TryFrom<&DataType> for Vec<(String, Rule)> { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - let mut tagged_lists: Vec = Vec::try_from(value)?; - let mut lists: Vec<(String, Rule)> = Vec::new(); - for list in tagged_lists.drain(..) { - match list { - DataType::List(ref l) => { - let category: String = String::try_from(l.get(0).unwrap())?; - let rule = Rule::try_from(l.get(1).unwrap())?; - lists.push((category, rule)); - }, - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) - )) - } - } - return Ok(lists); - } -} - -impl TryFrom<&DataType> for Vec<(Vec, Rule)> { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - let mut tagged_lists: Vec = Vec::try_from(value)?; - let mut lists: Vec<(Vec, Rule)> = Vec::new(); - for list in tagged_lists.drain(..) { - match list { - DataType::List(ref l) => { - let category: Vec = Vec::try_from(l.get(0).unwrap())?; - let rule = Rule::try_from(l.get(1).unwrap())?; - lists.push((category, rule)); - }, - ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) - )) - } - } - return Ok(lists); - } -} - - -impl TryFrom<&DataType> for f64 { - type Error = QueryError; - fn try_from(value: &DataType) -> Result { - match value { - DataType::Number(f) => Ok(*f), - ref invalid_type => Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type Number, got {:?}", invalid_type) - )) - } - } -} - - mod lexer { use plex::lexer; diff --git a/src/transform/classify.rs b/src/transform/classify.rs index 62b18066..d2b748b6 100644 --- a/src/transform/classify.rs +++ b/src/transform/classify.rs @@ -1,9 +1,8 @@ -use std::collections::HashMap; /// Transforms for classifying (tagging and categorizing) events. /// /// Based on code in aw_research: https://github.com/ActivityWatch/aw-research/blob/master/aw_research/classify.py -use std::collections::HashSet; +use std::collections::HashMap; use crate::models::Event; use regex::Regex; use serde_json; diff --git a/tests/sync.rs b/tests/sync.rs index 693a7116..031da2d6 100644 --- a/tests/sync.rs +++ b/tests/sync.rs @@ -3,7 +3,7 @@ #[cfg(test)] mod sync_tests { use std::collections::{HashMap}; - use chrono::{DateTime, Utc, Duration}; + use chrono::{DateTime, Utc}; use aw_server::models::{Bucket, Event}; use aw_server::datastore::{Datastore, DatastoreError}; From f65d6ec3b81126fc63751fb66fe4fee62f0e0cee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Mon, 7 Oct 2019 14:36:14 +0200 Subject: [PATCH 15/16] cleaned up TryFrom traits --- src/query/datatype.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/query/datatype.rs b/src/query/datatype.rs index d6c2dca8..df208d52 100644 --- a/src/query/datatype.rs +++ b/src/query/datatype.rs @@ -118,10 +118,10 @@ impl TryFrom<&DataType> for String { impl TryFrom<&DataType> for Vec { type Error = QueryError; fn try_from(value: &DataType) -> Result { - let mut tagged_strings: Vec = Vec::try_from(value)?; + let mut tagged_strings: Vec = value.try_into()?; let mut strings = Vec::new(); for string in tagged_strings.drain(..) { - let s: String = String::try_from(&string)?; + let s: String = (&string).try_into()?; strings.push(s); } return Ok(strings); @@ -134,7 +134,7 @@ impl TryFrom<&DataType> for Rule { let rulemap: HashMap = (match value { DataType::Dict(d) => { let map: HashMap = d.iter().map(|(k, v)| { - let s: String = String::try_from(v).unwrap(); + let s: String = v.try_into().unwrap(); (k.clone(), s.clone()) }).collect(); Ok(map) @@ -156,7 +156,7 @@ impl TryFrom<&DataType> for Rule { impl TryFrom<&DataType> for Vec { type Error = QueryError; fn try_from(value: &DataType) -> Result { - let mut tagged_events = Vec::try_from(value)?; + let mut tagged_events: Vec = value.try_into()?; let mut events = Vec::new(); for event in tagged_events.drain(..) { match event { @@ -173,17 +173,17 @@ impl TryFrom<&DataType> for Vec { impl TryFrom<&DataType> for Vec<(String, Rule)> { type Error = QueryError; fn try_from(value: &DataType) -> Result { - let mut tagged_lists: Vec = Vec::try_from(value)?; + let mut tagged_lists: Vec = value.try_into()?; let mut lists: Vec<(String, Rule)> = Vec::new(); for list in tagged_lists.drain(..) { match list { DataType::List(ref l) => { - let category: String = String::try_from(l.get(0).unwrap())?; - let rule = Rule::try_from(l.get(1).unwrap())?; - lists.push((category, rule)); + let tag: String = l.get(0).unwrap().try_into()?; + let rule: Rule = l.get(1).unwrap().try_into()?; + lists.push((tag, rule)); }, ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) + format!("Expected function parameter of type list of (tag, rule) tuples, list contains {:?}", invalid_type) )) } } @@ -194,17 +194,17 @@ impl TryFrom<&DataType> for Vec<(String, Rule)> { impl TryFrom<&DataType> for Vec<(Vec, Rule)> { type Error = QueryError; fn try_from(value: &DataType) -> Result { - let mut tagged_lists: Vec = Vec::try_from(value)?; + let mut tagged_lists: Vec = value.try_into()?; let mut lists: Vec<(Vec, Rule)> = Vec::new(); for list in tagged_lists.drain(..) { match list { DataType::List(ref l) => { - let category: Vec = Vec::try_from(l.get(0).unwrap())?; - let rule = Rule::try_from(l.get(1).unwrap())?; + let category: Vec = l.get(0).unwrap().try_into()?; + let rule: Rule = l.get(1).unwrap().try_into()?; lists.push((category, rule)); }, ref invalid_type => return Err(QueryError::InvalidFunctionParameters( - format!("Expected function parameter of type list of category rules, list contains {:?}", invalid_type) + format!("Expected function parameter of type list of (category, rule) tuples, list contains {:?}", invalid_type) )) } } From 7bf15f51c461177ddb7fe80f62a84374b867016a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Mon, 7 Oct 2019 17:08:34 +0200 Subject: [PATCH 16/16] removed old way of specifying rules for categorize and tag --- src/query/datatype.rs | 6 ------ tests/query.rs | 7 +++---- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/query/datatype.rs b/src/query/datatype.rs index df208d52..dc156d1d 100644 --- a/src/query/datatype.rs +++ b/src/query/datatype.rs @@ -139,12 +139,6 @@ impl TryFrom<&DataType> for Rule { }).collect(); Ok(map) }, - DataType::String(s) => { - let regex_str: String = s.clone(); - let tuple: Vec<(String, String)> = vec![("regex".into(), regex_str)]; - let map: HashMap = tuple.iter().cloned().collect(); - Ok(map) - } _ => Err(QueryError::InvalidFunctionParameters( format!("Expected rule, found something else") )) diff --git a/tests/query.rs b/tests/query.rs index 2c70acd3..31ac3bcd 100644 --- a/tests/query.rs +++ b/tests/query.rs @@ -302,8 +302,8 @@ mod query_tests { events = limit_events(events, 10000); events = sort_by_timestamp(events); events = concat(events, query_bucket("{}")); - events = categorize(events, [[["test"], "value$"], [["test", "testing"], "value$"]]); - events = tag(events, [["testtag", "test$"], ["another testtag", "test-pat$"]]); + events = categorize(events, [[["test"], {{ "regex": "value$" }}], [["test", "testing"], {{ "regex": "value$" }}]]); + events = tag(events, [["testtag", {{ "regex": "test$" }}], ["another testtag", {{ "regex": "test-pat$" }}]]); total_duration = sum_durations(events); bucketnames = query_bucket_names(); print("test", "test2"); @@ -332,9 +332,8 @@ mod query_tests { let code = format!( r#" events = query_bucket("{}"); - events = categorize(events, [[["Test", "Subtest"], "^value$"]]); events = categorize(events, [[["Test", "Subtest"], {{ "regex": "^value$" }}]]); - events = tag(events, [["testtag", "value$"], ["another testtag", "value$"]]); + events = tag(events, [["testtag", {{ "regex": "value$" }}], ["another testtag", {{ "regex": "value$" }}]]); test = {{}}; RETURN = events;"#, "testid"