diff --git a/.gitignore b/.gitignore index 3917e5d..0107189 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ .venv37/ .venv38/ .venv39/ +graph.png diff --git a/grapher.py b/grapher.py new file mode 100644 index 0000000..a357470 --- /dev/null +++ b/grapher.py @@ -0,0 +1,329 @@ +# #WHAT I NEED TO DO +# MODEL THE BEHAVIOUR THAT INTERMEDIATE LIST ARE ALSO IN SOME ORDER CURRENTLY THAT BEHAVIOR IS LOST LEADING TO LOSS OF INTERMEDIATE INFORMATION +# LOGICAL ADDRESSES OF INTERMEDIATES NEED TO BE KINDA SEMI CONCRETE + +import networkx as nx +import matplotlib.pyplot as plt +import inspect +from collections import deque + +oneNodePerMemoryLocation = False + + +G = nx.MultiDiGraph() + +numberOfContextAndVariable = 0 +contextAndVariableToMemAddressMap = {} +baseVarToIndices = {} + +variableMap = {} + +functionCalls = 0 +contextToIdMap = {} + +halfDependencyQueue = deque() +joinedDependencies = [] +callStackOffset = 0 + +def reduced_call_stack(): + # currentStack = inspect.stack() + # #Remove first two frames which is functions inside grapher.py, and one more for current location of control in main program + # reducedStack = tuple([(i.filename, i.lineno, i.function, i.index) for i in currentStack[callStackOffset+3:]]) + # # print(reducedStack) + # return reducedStack + return 123 + +def function_call(): + global functionCalls + functionCalls += 1 + reducedStack = reduced_call_stack() + if reducedStack in contextToIdMap: + print("INFO: New context initialized") + contextToIdMap[reducedStack] = functionCalls + + +def produce_half_dependency(*halfDependencies): + for halfDependency in halfDependencies: + assert len(halfDependency) == 1, "Half Dependencies must be a singleton while producing" + parent = halfDependency[0] + + try: + localVariableContext = contextToIdMap[reduced_call_stack()] + except: + print(contextToIdMap) + assert False, "Context not initialized" + + halfDependencyQueue.append((localVariableContext, parent)) + +def consume_half_dependency(*halfDependencies): + global joinedDependencies, callStackOffset, baseVarToIndices, contextAndVariableToMemAddressMap + for halfDependency in halfDependencies: + assert len(halfDependency) == 1, "Half Dependencies must be a singleton while consuming" + child = halfDependency[0] + try: + (parentContext, parent) = halfDependencyQueue.popleft() + except: + assert False, "No half dependency left to consume" + + try: + childContext = contextToIdMap[reduced_call_stack()] + except: + print(contextToIdMap) + assert False, "Context not initialized" + + # add_in_logical_memory(parentContext, parent, childContext, child) + + dependency = (child, parent) + joinedDependencies.append(dependency) + parentAddress = fetchOrCreateAddress(parentContext, parent, must_exist=True) + # if (childContext, child[0]) not in baseVarToIndices: + baseVarToIndices[(childContext, child[0])] = baseVarToIndices[(parentContext, parent[0])] + for indices in baseVarToIndices[(childContext, child[0])]: + flag = True + # This thing is for multi dimensional arrays partially indexed + for i, index in enumerate(child[1]): + if indices[i] != index: + flag=False + if flag: + contextAndVariableToMemAddressMap[(childContext, child[0], indices)] = contextAndVariableToMemAddressMap[(parentContext, parent[0], indices)] + childAddress = fetchOrCreateAddress(childContext, child, must_exist=True) + + if len(halfDependencyQueue) == 0: + callStackOffset += 1 + add_dependency_internal(parentContext, *joinedDependencies) + callStackOffset -=1 + joinedDependencies = [] + +def fetchOrCreateAddress(contextID, variable, must_exist=False): + global contextAndVariableToMemAddressMap, numberOfContextAndVariable, baseVarToIndices + variableName = variable[0] + variableIndices = [None] + variableIndices.extend(list(variable[1])) + iterIndices = [] + # print(variableIndices) + for index in variableIndices: + if index is not None: + iterIndices.append(index) + if (contextID, variableName, tuple(iterIndices)) in contextAndVariableToMemAddressMap: + continue + else: + if (contextID, variableName) not in baseVarToIndices: + baseVarToIndices[(contextID, variableName)] = [] + baseVarToIndices[(contextID, variableName)].append(tuple(iterIndices)) + if must_exist == True: + assert False, "address should have already been assigned by now for " + str((contextID, variableName)) + numberOfContextAndVariable += 1 + contextAndVariableToMemAddressMap[(contextID, variableName, tuple(iterIndices))] = numberOfContextAndVariable + print("New variable created: ", str((contextID, variableName, iterIndices)), " at memory address ", numberOfContextAndVariable) + return contextAndVariableToMemAddressMap[(contextID, variableName, variable[1])] + +def instantiate_variable(variable): + global callStackOffset + callStackOffset += 1 + instantiate_variable_internal(None, variable) + callStackOffset -= 1 + +def instantiate_variable_internal(context, variable): + global variableMap, contextToIdMap + try: + context = contextToIdMap[reduced_call_stack()] + except: + print(contextToIdMap) + assert False, "Context not initialized. Is add_function() called within this function?" + address = fetchOrCreateAddress(context, variable, must_exist=False) + version = variableMap.get(address, (None, 0))[1] + string = "l%d_v%d" % (address, version) + G.add_node(string, pos=(address, version), rawLoc=str(variable)) + variableMap[address] = (string, version) + +def add_dependency(*dependencies): + print(dependencies) + global callStackOffset + callStackOffset += 1 + add_dependency_internal(None, *dependencies) + callStackOffset -= 1 + +def add_dependency_internal(parentContext, *dependencies): + dependencyToChildStrAndVersion = {} + for dependency in dependencies: + # print(dependency) + assert len(dependency) == 2, "Dependencies must be in pairs only" + child = dependency[0] + parent = dependency[1] + + # print(child, parent) + try: + childContext = contextToIdMap[reduced_call_stack()] + if parentContext is None: + parentContext = childContext + except: + print(contextToIdMap) + assert False, "Context not initialized. Is add_function() called within this function?" + + parentAddress = fetchOrCreateAddress(parentContext, parent, must_exist=True) + childAddress = fetchOrCreateAddress(childContext, child, must_exist=False) + + # print(childAddress, childContext, child, parentAddress, parentContext, parent) + + + childVersion = variableMap.get(childAddress, (None, 0))[1] + parentVersion = variableMap.get(parentAddress, (None, 0))[1] + + oldChildVersion = childVersion + childVersion = 0 if oneNodePerMemoryLocation and (not child.startswith('t')) else max(parentVersion, childVersion) + 1 + + childStr = "l%d_v%d" % (childAddress, childVersion) + oldChildStr = variableMap.get(childAddress, (None, None))[0] + parentStr = "l%d_v%d" % (parentAddress, parentVersion) + + dependencyToChildStrAndVersion[dependency] = (childStr, childVersion) + G.add_node(parentStr, pos=(parentAddress, parentVersion), rawLoc=str(parent)) + G.add_node(childStr, pos=(childAddress, childVersion), rawLoc=str(child)) + + G.add_edge(parentStr, childStr, isShadow=False) + #Shadow edges ensure that Value (t+1) is plotted AFTER Value (t) always + if oldChildStr is not None: + G.add_edge(oldChildStr, childStr, isShadow=True) + # input() + + for dependency in dependencies: + child = dependency[0] + + try: + childContext = contextToIdMap[reduced_call_stack()] + except: + print(contextToIdMap) + assert False, "Context not initialized" + + childAddress = fetchOrCreateAddress(childContext, child) + variableMap[childAddress] = dependencyToChildStrAndVersion[dependency] + + +def showGraph(filterVar=None): + global contextToIdMap + # print(variableMap) + # print(contextAndVariableToMemAddressMap) + # print(baseVarToIndices) + + traversalQ = deque() + + # Only show nodes of those variables which are present in the output + if filterVar is not None: + context = contextToIdMap[reduced_call_stack()] + currentVar = (context, filterVar) + indices = baseVarToIndices[currentVar] + + # We know the graph is a DAG so no loops which simplifies the code + listOfVisitedNodes = {} + for index in indices: + address = contextAndVariableToMemAddressMap[(context, filterVar, index)] + node = variableMap[address][0] + traversalQ.append(node) + listOfVisitedNodes[node] = True + + while len(traversalQ) != 0: + node = traversalQ.popleft() + parentsOfNode = G.predecessors(node) + for neighbor in parentsOfNode: + if neighbor in listOfVisitedNodes: + continue + else: + listOfVisitedNodes[neighbor] = True + traversalQ.append(neighbor) + + nodesForRemoval = [] + for node in G.nodes: + if node not in listOfVisitedNodes: + nodesForRemoval.append(node) + + for node in nodesForRemoval: + G.remove_node(node) + + + + print(len(nx.get_node_attributes(G, 'pos').keys())) + print(len(nx.get_node_attributes(G, 'rawLoc').keys())) + print(len(G.nodes)) + + plt.subplot(121) + + foundIntermediate = True + # Removing intermediate nodes + # while foundIntermediate: + # foundIntermediate = False + # toRemove = [] + # for node in G.nodes: + # # print(node, type(node)) + # if node.startswith('t'): + # foundIntermediate = True + # childs = G.successors(node) + # parents = G.predecessors(node) + # toRemove.append(node) + # rawLocMap = nx.get_node_attributes(G, 'rawLoc') + # for child in childs: + # for parent in parents: + # if rawLocMap[child] == rawLocMap[parent]: + # G.add_edge(parent, child, isShadow=True) + # G.add_edge(parent, child, isShadow=False) + # for node in toRemove: + # G.remove_node(node) + + # Adjusting height of all nodes + orderOfNodes = {} + + print(len(nx.get_node_attributes(G, 'pos').keys())) + print(len(nx.get_node_attributes(G, 'rawLoc').keys())) + print(len(G.nodes)) + + for node in G.nodes: + if nx.get_node_attributes(G, 'pos')[node] not in orderOfNodes: + orderOfNodes[nx.get_node_attributes(G, 'pos')[node]] = [] + orderOfNodes[nx.get_node_attributes(G, 'pos')[node]].append(node) + for key in sorted(orderOfNodes, key=lambda x:(x[1], x[0])): + # print(key) + for node in orderOfNodes[key]: + xcoord = nx.get_node_attributes(G, 'pos')[node][0] + if len(list(G.predecessors(node))) > 0: + # print(nx.get_node_attributes(G, 'pos')[node]) + # print([nx.get_node_attributes(G, 'pos')[i] for i in G.predecessors(node)]) + # print(node, [i for i in G.predecessors(node)]) + minHeight = max([nx.get_node_attributes(G, 'pos')[i][1] for i in G.predecessors(node)]) + 1 + nx.set_node_attributes(G, {node:{'pos':(xcoord,minHeight)}}) + else: + nx.set_node_attributes(G, {node:{'pos':(xcoord,0)}}) + + + #Compress the graph to make it more clear. Just a heuristic, works well for sorting + currentHeight = -1 #No node is given negative height + counterForCurrentHeight = 0 + for key in sorted(orderOfNodes, key=lambda x:(x[1], x[0])): + if key[1] > currentHeight: + currentHeight = key[1] + counterForCurrentHeight = 0 + for node in orderOfNodes[key]: + # nx.set_node_attributes(G, {node:{'pos':(counterForCurrentHeight, currentHeight)}}) + counterForCurrentHeight += 1 + # print(counterForCurrentHeight, currentHeight) + + pos = nx.circular_layout(G) if oneNodePerMemoryLocation else nx.get_node_attributes(G,'pos') + + nx.draw_networkx_nodes(G, pos) + nx.draw_networkx_labels(G, pos, font_size=5) + ax = plt.gca() + # print(nx.get_edge_attributes(G,'isShadow')) + for e in G.edges: + # print(e) + if nx.get_edge_attributes(G,'isShadow')[e] == False: + ax.annotate("", + xy=pos[e[1]], xycoords='data', + xytext=pos[e[0]], textcoords='data', + arrowprops=dict(arrowstyle="->", color="0.1", + shrinkA=10, shrinkB=10, + patchA=None, patchB=None, + connectionstyle="arc3,rad=rrr".replace('rrr',str(0.3*e[2]) + ), + ), + ) + plt.axis('off') + plt.show() + plt.savefig('graph.png', dpi=300) \ No newline at end of file diff --git a/instrumentation/data_tracing_receiver.py b/instrumentation/data_tracing_receiver.py index 92bedf8..3a7dfca 100644 --- a/instrumentation/data_tracing_receiver.py +++ b/instrumentation/data_tracing_receiver.py @@ -33,12 +33,12 @@ def __init__( def collection_updated(self, i: Any, value: "StackElement") -> "StackElement": if isinstance(self.collection_elems, list): - # elems_copy = [StackElement( - # e.concrete, - # opmap["BINARY_SUBSCR"], - # [self, i], - # ) for i, e in enumerate(self.collection_elems)] - elems_copy_list = [e for i, e in enumerate(self.collection_elems)] + elems_copy_list = [StackElement( + e.concrete, + opmap["BINARY_SUBSCR"], + [self, i], + ) for i, e in enumerate(self.collection_elems)] + # elems_copy_list = [e for i, e in enumerate(self.collection_elems)] elems_copy_list[i] = value return StackElement(self.concrete, self.opcode, self.deps, self.is_cow_pointer, self.cow_latest_value, elems_copy_list) elif isinstance(self.collection_elems, dict): diff --git a/instrumentation/instrument.py b/instrumentation/instrument.py index 14ffa5a..8206970 100644 --- a/instrumentation/instrument.py +++ b/instrumentation/instrument.py @@ -252,8 +252,8 @@ def instrument_bytecode(code: Bytecode, code_id: int = 0) -> Bytecode: label_to_op_index, code_id, False ) - if isinstance(op, Instr) and op.name not in pre_opcode_instrument and op.name not in post_opcode_instrument: - print(f"IGNORING OPERATION {op.name}") + # if isinstance(op, Instr) and op.name not in pre_opcode_instrument and op.name not in post_opcode_instrument: + # print(f"IGNORING OPERATION {op.name}") instrumented.append(op) diff --git a/run_instrumented.py b/run_instrumented.py index 4be9b54..92cb221 100644 --- a/run_instrumented.py +++ b/run_instrumented.py @@ -2,16 +2,18 @@ from dis import opname from instrumentation.stack_tracking_receiver import StackTrackingReceiver -from instrumentation.data_tracing_receiver import DataTracingReceiver +from instrumentation.data_tracing_receiver import DataTracingReceiver, StackElement from instrumentation.module_loader import PatchingPathFinder from instrumentation.exec import exec_instrumented +from instrumentation.heap_object_tracking import HeapObjectTracker +import grapher patcher = PatchingPathFinder() patcher.install() from demos.quicksort import quicksort_return import random -arr = [random.randint(0, 10) for i in range(10)] +arr = [random.randint(0, 10) for i in range(5)] orig_arr = list(arr) receiver = DataTracingReceiver() with StackTrackingReceiver(): @@ -26,26 +28,56 @@ def pretty_symbolic(symbolic): else: return receiver.stringify_maybe_object_id(symbolic.concrete) -def print_deps(symbolic, indent_level=0): +id_for_stack_element = HeapObjectTracker() +seen_collections = set() + +def print_deps(symbolic: StackElement, indent_level=0): indent = ' ' * indent_level if symbolic.is_cow_pointer: print_deps(symbolic.cow_latest_value, indent_level) elif symbolic.collection_elems: - print(f"{indent}collection with elements:") - for elem in symbolic.collection_elems: - print_deps(elem, indent_level + 1) + # print(f"{indent}collection with elements:") + my_id = id_for_stack_element.get_object_id(symbolic) + if my_id not in seen_collections: + seen_collections.add(my_id) + for i, elem in enumerate(symbolic.collection_elems): + if elem.opcode == -1: + grapher.instantiate_variable( + ("sym_" + str(my_id), (i,)) + ) + else: + res = print_deps(elem, indent_level + 1) + if res: + grapher.add_dependency(( + (("sym_" + str(my_id), (i,)), res) + )) + else: + other_id = id_for_stack_element.get_object_id(elem) + grapher.add_dependency(( + (("sym_" + str(my_id), (i,)), ("sym_" + str(other_id), (0,))) + )) elif opname[symbolic.opcode] == "BINARY_SUBSCR": - print(f"{indent}{pretty_symbolic(symbolic)} depends on index {symbolic.deps[1]} of collection {pretty_symbolic(symbolic.deps[0])}") - print_deps(symbolic.deps[0].collection_elems[symbolic.deps[1]], indent_level + 1) + other_id = id_for_stack_element.get_object_id(symbolic.deps[0]) + # print(f"{indent}{pretty_symbolic(symbolic)} depends on index {symbolic.deps[1]} of collection {pretty_symbolic(symbolic.deps[0])}") + print_deps(symbolic.deps[0], indent_level + 1) + return ("sym_" + str(other_id), (symbolic.deps[1],)) else: - print(f"{indent}{pretty_symbolic(symbolic)} depends via {opname[symbolic.opcode]}") + # print(f"{indent}{pretty_symbolic(symbolic)} depends via {opname[symbolic.opcode]}") + if symbolic.opcode == -1: + my_id = id_for_stack_element.get_object_id(symbolic) + grapher.instantiate_variable( + ("sym_" + str(my_id), (0,)) + ) for dep in symbolic.deps: print_deps(dep, indent_level + 1) print("orig: " + str(orig_arr)) print("out: " + str(arr)) +grapher.function_call() print_deps(receiver.symbolic_stack.pop()) +grapher.showGraph() + # import numpy as np # arr = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])