diff --git a/trappy/bare_trace.py b/trappy/bare_trace.py index f3fbd58c..9787f04d 100644 --- a/trappy/bare_trace.py +++ b/trappy/bare_trace.py @@ -27,12 +27,13 @@ class BareTrace(object): """ - def __init__(self, name=""): + def __init__(self, name="", build_df=True): self.name = name self.normalized_time = False self.class_definitions = {} self.trace_classes = [] self.basetime = 0 + self.build_df = build_df def get_duration(self): """Returns the largest time value of all classes, @@ -133,6 +134,8 @@ def add_parsed_event(self, name, dfr, pivot=None): setattr(self, name, event) def finalize_objects(self): + if not self.build_df: + return for trace_class in self.trace_classes: trace_class.create_dataframe() trace_class.finalize_object() diff --git a/trappy/base.py b/trappy/base.py index 8a1b976d..aebda802 100644 --- a/trappy/base.py +++ b/trappy/base.py @@ -105,6 +105,7 @@ def __init__(self, parse_raw=False): self.comm_array = [] self.pid_array = [] self.cpu_array = [] + self.callback = None self.parse_raw = parse_raw def finalize_object(self): @@ -171,42 +172,60 @@ def append_data(self, time, comm, pid, cpu, data): self.cpu_array.append(cpu) self.data_array.append(data) + if not self.callback: + return + data_dict = self.generate_data_dict(comm, pid, cpu, data) + self.callback(time, data_dict) + + def generate_data_dict(self, comm, pid, cpu, data_str): + data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu} + prev_key = None + for field in data_str.split(): + if "=" not in field: + # Concatenation is supported only for "string" values + if type(data_dict[prev_key]) is not str: + continue + data_dict[prev_key] += ' ' + field + continue + (key, value) = field.split('=', 1) + try: + value = int(value) + except ValueError: + pass + data_dict[key] = value + prev_key = key + return data_dict + def generate_parsed_data(self): # Get a rough idea of how much memory we have to play with + CHECK_MEM_COUNT = 10000 kb_free = _get_free_memory_kb() starting_maxrss = getrusage(RUSAGE_SELF).ru_maxrss check_memory_usage = True + check_memory_count = 1 for (comm, pid, cpu, data_str) in zip(self.comm_array, self.pid_array, self.cpu_array, self.data_array): - data_dict = {"__comm": comm, "__pid": pid, "__cpu": cpu} - prev_key = None - for field in data_str.split(): - if "=" not in field: - # Concatenation is supported only for "string" values - if type(data_dict[prev_key]) is not str: - continue - data_dict[prev_key] += ' ' + field - continue - (key, value) = field.split('=', 1) - try: - value = int(value) - except ValueError: - pass - data_dict[key] = value - prev_key = key + data_dict = self.generate_data_dict(comm, pid, cpu, data_str) # When running out of memory, Pandas has been observed to segfault # rather than throwing a proper Python error. # Look at how much memory our process is using and warn if we seem - # to be getting close to the system's limit. + # to be getting close to the system's limit, check it only once + # in the beginning and then every CHECK_MEM_COUNT events + check_memory_count -= 1 + if check_memory_count != 0: + yield data_dict + continue + kb_used = (getrusage(RUSAGE_SELF).ru_maxrss - starting_maxrss) if check_memory_usage and kb_free and kb_used > kb_free * 0.9: warnings.warn("TRAPpy: Appear to be low on memory. " "If errors arise, try providing more RAM") check_memory_usage = False + check_memory_count = CHECK_MEM_COUNT yield data_dict def create_dataframe(self): @@ -237,15 +256,3 @@ def write_csv(self, fname): :type fname: str """ self.data_frame.to_csv(fname) - - def normalize_time(self, basetime): - """Substract basetime from the Time of the data frame - - :param basetime: The offset which needs to be subtracted from - the time index - :type basetime: float - """ - if basetime and not self.data_frame.empty: - self.data_frame.reset_index(inplace=True) - self.data_frame["Time"] = self.data_frame["Time"] - basetime - self.data_frame.set_index("Time", inplace=True) diff --git a/trappy/ftrace.py b/trappy/ftrace.py index 6a5fce0a..4726a3f5 100644 --- a/trappy/ftrace.py +++ b/trappy/ftrace.py @@ -54,8 +54,9 @@ class GenericFTrace(BareTrace): dynamic_classes = {} def __init__(self, name="", normalize_time=True, scope="all", - events=[], window=(0, None), abs_window=(0, None)): - super(GenericFTrace, self).__init__(name) + events=[], event_callbacks={}, window=(0, None), + abs_window=(0, None), build_df=True): + super(GenericFTrace, self).__init__(name, build_df) if not hasattr(self, "needs_raw_parsing"): self.needs_raw_parsing = False @@ -73,6 +74,8 @@ def __init__(self, name="", normalize_time=True, scope="all", for attr, class_def in self.class_definitions.iteritems(): trace_class = class_def() + if event_callbacks.has_key(attr): + trace_class.callback = event_callbacks[attr] setattr(self, attr, trace_class) self.trace_classes.append(trace_class) @@ -82,9 +85,6 @@ def __init__(self, name="", normalize_time=True, scope="all", raw=True) self.finalize_objects() - if normalize_time: - self.normalize_time() - @classmethod def register_parser(cls, cobject, scope): """Register the class as an Event. This function @@ -207,6 +207,9 @@ def contains_unique_word(line, unique_words=cls_for_unique_word.keys()): except AttributeError: continue + if self.normalize_time: + timestamp = timestamp - self.basetime + data_str = line[data_start_idx:] # Remove empty arrays from the trace @@ -480,14 +483,16 @@ class FTrace(GenericFTrace): """ def __init__(self, path=".", name="", normalize_time=True, scope="all", - events=[], window=(0, None), abs_window=(0, None)): + events=[], event_callbacks={}, window=(0, None), + abs_window=(0, None), build_df=True): self.trace_path, self.trace_path_raw = self.__process_path(path) self.needs_raw_parsing = True self.__populate_metadata() super(FTrace, self).__init__(name, normalize_time, scope, events, - window, abs_window) + event_callbacks, window, abs_window, + build_df) def __process_path(self, basepath): """Process the path and return the path to the trace text file""" diff --git a/trappy/sched.py b/trappy/sched.py index 4a68f6a5..b8d9cc58 100644 --- a/trappy/sched.py +++ b/trappy/sched.py @@ -108,11 +108,10 @@ class SchedSwitch(Base): def __init__(self): super(SchedSwitch, self).__init__(parse_raw=True) - def create_dataframe(self): - self.data_array = [line.replace(" ==> ", " ", 1) - for line in self.data_array] - - super(SchedSwitch, self).create_dataframe() + def append_data(self, time, comm, pid, cpu, data): + data_rep = data.replace(" ==> ", " ") + super(SchedSwitch, self).append_data(time, comm, pid, cpu, + data_rep) register_ftrace_parser(SchedSwitch, "sched") diff --git a/trappy/systrace.py b/trappy/systrace.py index 6e917a65..c11601da 100644 --- a/trappy/systrace.py +++ b/trappy/systrace.py @@ -50,13 +50,16 @@ class SysTrace(GenericFTrace): """ def __init__(self, path=".", name="", normalize_time=True, scope="all", - events=[], window=(0, None), abs_window=(0, None)): + events=[], event_callbacks={}, window=(0, None), + abs_window=(0, None), build_df=True): self.trace_path = path super(SysTrace, self).__init__(name, normalize_time, scope, events, - window, abs_window) - + event_callbacks, window, abs_window, + build_df) + if not build_df: + return try: self._cpus = 1 + self.sched_switch.data_frame["__cpu"].max() except AttributeError: