From 90b5147fc069d05a1997c1ab8df6b116577aea14 Mon Sep 17 00:00:00 2001 From: Nicolae Date: Thu, 30 Aug 2018 20:58:05 +0300 Subject: [PATCH 1/2] Necessary commit to create summary for all browser data --- aw_analysis/query2_functions.py | 7 +++++++ aw_transform/__init__.py | 2 +- aw_transform/sort_by.py | 6 +++++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/aw_analysis/query2_functions.py b/aw_analysis/query2_functions.py index eec2120b..f010f51c 100644 --- a/aw_analysis/query2_functions.py +++ b/aw_analysis/query2_functions.py @@ -17,6 +17,7 @@ sort_by_timestamp, sort_by_duration, sum_durations, + sum_event_lists, split_url_events, simplify_string, flood @@ -214,6 +215,12 @@ def q2_sum_durations(events: list) -> timedelta: _verify_variable_is_type(events, list) return sum_durations(events) +@q2_function +def q2_sum_event_lists(events1: list, events2: list) -> List[Event]: + _verify_variable_is_type(events1, list) + _verify_variable_is_type(events2, list) + return sum_event_lists(events1, events2) + """ Flood functions diff --git a/aw_transform/__init__.py b/aw_transform/__init__.py index d7e96d90..ab2d062d 100644 --- a/aw_transform/__init__.py +++ b/aw_transform/__init__.py @@ -3,7 +3,7 @@ from .heartbeats import heartbeat_merge, heartbeat_reduce from .merge_events_by_keys import merge_events_by_keys from .chunk_events_by_key import chunk_events_by_key -from .sort_by import sort_by_timestamp, sort_by_duration, sum_durations +from .sort_by import sort_by_timestamp, sort_by_duration, sum_durations, sum_event_lists from .split_url_events import split_url_events from .simplify import simplify_string from .flood import flood diff --git a/aw_transform/sort_by.py b/aw_transform/sort_by.py index ebe832ea..e169dcff 100644 --- a/aw_transform/sort_by.py +++ b/aw_transform/sort_by.py @@ -1,7 +1,6 @@ import logging from datetime import datetime, timedelta from typing import List - from aw_core.models import Event logger = logging.getLogger(__name__) @@ -18,3 +17,8 @@ def limit_events(events, count) -> List[Event]: def sum_durations(events) -> timedelta: return timedelta(seconds=(sum(event.duration.total_seconds() for event in events))) + +def sum_event_lists(events1, events2) -> List[Event]: + events = events1 + events2 + events = sorted(events, key=lambda e: e.timestamp) + return events \ No newline at end of file From 3c4187e50bffb60a16593d671dbe4b301453e6e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bj=C3=A4reholt?= Date: Sat, 3 Nov 2018 12:04:59 +0100 Subject: [PATCH 2/2] renamed sum_event_lists to concat, added boolean support by adding it to the namespace --- aw_analysis/query2.py | 4 ++-- aw_analysis/query2_functions.py | 7 ++++--- aw_transform/__init__.py | 2 +- aw_transform/sort_by.py | 9 ++++++--- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/aw_analysis/query2.py b/aw_analysis/query2.py index 01542fd4..0c1bb186 100644 --- a/aw_analysis/query2.py +++ b/aw_analysis/query2.py @@ -325,8 +325,8 @@ def _parse_token(string: str, namespace: dict): # TODO: Add return type def create_namespace() -> dict: namespace = { - "TRUE": 1, - "FALSE": 0, + "True": True, + "False": False, } return namespace diff --git a/aw_analysis/query2_functions.py b/aw_analysis/query2_functions.py index f010f51c..d8f258d1 100644 --- a/aw_analysis/query2_functions.py +++ b/aw_analysis/query2_functions.py @@ -17,7 +17,7 @@ sort_by_timestamp, sort_by_duration, sum_durations, - sum_event_lists, + concat, split_url_events, simplify_string, flood @@ -215,11 +215,12 @@ def q2_sum_durations(events: list) -> timedelta: _verify_variable_is_type(events, list) return sum_durations(events) + @q2_function -def q2_sum_event_lists(events1: list, events2: list) -> List[Event]: +def q2_concat(events1: list, events2: list) -> List[Event]: _verify_variable_is_type(events1, list) _verify_variable_is_type(events2, list) - return sum_event_lists(events1, events2) + return concat(events1, events2) """ diff --git a/aw_transform/__init__.py b/aw_transform/__init__.py index ab2d062d..40d1e52b 100644 --- a/aw_transform/__init__.py +++ b/aw_transform/__init__.py @@ -3,7 +3,7 @@ from .heartbeats import heartbeat_merge, heartbeat_reduce from .merge_events_by_keys import merge_events_by_keys from .chunk_events_by_key import chunk_events_by_key -from .sort_by import sort_by_timestamp, sort_by_duration, sum_durations, sum_event_lists +from .sort_by import sort_by_timestamp, sort_by_duration, sum_durations, concat from .split_url_events import split_url_events from .simplify import simplify_string from .flood import flood diff --git a/aw_transform/sort_by.py b/aw_transform/sort_by.py index e169dcff..7860c07d 100644 --- a/aw_transform/sort_by.py +++ b/aw_transform/sort_by.py @@ -9,16 +9,19 @@ def sort_by_timestamp(events) -> List[Event]: return sorted(events, key=lambda e: e.timestamp) + def sort_by_duration(events) -> List[Event]: return sorted(events, key=lambda e: e.duration, reverse=True) + def limit_events(events, count) -> List[Event]: return events[:count] + def sum_durations(events) -> timedelta: return timedelta(seconds=(sum(event.duration.total_seconds() for event in events))) -def sum_event_lists(events1, events2) -> List[Event]: + +def concat(events1, events2) -> List[Event]: events = events1 + events2 - events = sorted(events, key=lambda e: e.timestamp) - return events \ No newline at end of file + return events