From 713d4213ef9ccc514d32357daf8c76397f4ba947 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 29 Sep 2023 13:20:58 -0400 Subject: [PATCH 01/39] First stab at html output for codegen diffs --- tools/diff_codegen_nvfuser_tests.py | 415 ++++++++++++++++++++-------- tools/templates/codediff.html | 129 +++++++++ 2 files changed, 434 insertions(+), 110 deletions(-) create mode 100644 tools/templates/codediff.html diff --git a/tools/diff_codegen_nvfuser_tests.py b/tools/diff_codegen_nvfuser_tests.py index 57a82611868..dcaa7b00fe1 100644 --- a/tools/diff_codegen_nvfuser_tests.py +++ b/tools/diff_codegen_nvfuser_tests.py @@ -11,145 +11,340 @@ codegen_comparison/{$commit1,$commit2}/binary_tests """ +from collections import OrderedDict +from dataclasses import dataclass, field +import difflib import os import re import subprocess import sys +from datetime import datetime +from typing import Optional, Set -# precompile an RE we'll apply over and over - - -def get_test_map(directory: str) -> dict[str, list[str]]: - """ - Get a map from test name to list of cuda filenames - """ - # first find the stdout log file - logfile = None - for fname in os.listdir(directory): - if fname.find("stdout") != -1: - if logfile is not None: - raise RuntimeError( - f"Input directory {directory} contains multiple " - 'possible logs (filenames containing "stdout")' - ) - logfile = os.path.join(directory, fname) - if logfile is None: - raise RuntimeError( - f"Input directory {directory} contains no log (filenames " - 'containing "stdout")' - ) - # regex for stripping ANSI color codes - ansi_re = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]") - kernel_map = {} - current_test = None - current_files = [] - for line in open(logfile, "r").readlines(): - line = ansi_re.sub("", line.strip()) - if line[:13] == "[ RUN ] ": - current_test = line[13:] - elif line[:13] == "[ OK ] ": - # Finalize test - assert current_test is not None - kernel_map[current_test] = current_files - current_test = None - current_files = [] - elif line[:10] == "PRINTING: ": - if line[-3:] == ".cu": - # This avoids comparing the .ptx files that are created then - # removed by the MemoryTest.LoadCache tests - current_files.append(line[10:]) - - return kernel_map - - -def diff_nvfuser_tests_dirs(dir1: str, dir2: str): - """ - Given directories for two - """ - # check that commands are equal - command1 = open(os.path.join(dir1, "command"), "r").read() - command2 = open(os.path.join(dir2, "command"), "r").read() - - if command1 != command2: - print("WARNING: commands differ between runs", file=sys.stderr) - print(f" {dir1}: {command1}", file=sys.stderr) - print(f" {dir2}: {command2}", file=sys.stderr) - - # check that command includes "nvfuser_tests" - if command1.find("nvfuser_tests") == -1: - print( - "ERROR: Command does not appear to be nvfuser_tests. Aborting.", - file=sys.stderr, +@dataclass +class GitBranch: + name: str + # TODO: get the name of tracking branch + # tracking_branch + + def __post_init__(self): + # TODO: find tracking branch for this branch + pass + + +@dataclass +class GitRev: + abbrev: str + full_hash: str = None + in_branches: list[GitBranch] = field(default_factory=list) + author_name: str = None + author_email: str = None + author_time: datetime.time = None + commit_time: datetime.time = None + + def __post_init__(self): + self.full_hash = ( + subprocess.run(["git", "rev-parse", self.abbrev], capture_output=True) + .stdout.strip() + .decode("utf-8") ) - sys.exit(1) - - # check that exit codes are equal - exitcode1 = open(os.path.join(dir1, "exitcode"), "r").read() - exitcode2 = open(os.path.join(dir2, "exitcode"), "r").read() - if exitcode1 != exitcode2: - print( - f"WARNING: Exit codes {exitcode1} and {exitcode2} do not match.", - file=sys.stderr, + for line in ( + subprocess.run( + ["git", "branch", "--quiet", "--color=never", self.full_hash], + capture_output=True, + ) + .stdout.strip() + .splitlines() + ): + # Possible output: + # + # main + # * scalar_seg_edges + # + # In this case, we have checked out the HEAD of the + # scalar_seg_edges branch. Here we just strip the *. + if line[0] == "*": + line = line[2:] + in_branches.append(line) + + date_fmt = "%Y/%m/%d %H:%M:%S %z" + git_show = ( + lambda fmt: subprocess.run( + [ + "git", + "show", + "--no-patch", + f"--format={fmt}", + f"--date=format:{date_fmt}", + self.full_hash, + ], + capture_output=True, + ) + .stdout.strip() + .decode("utf-8") ) + self.author_name = git_show("%an") + self.author_email = git_show("%ae") + + # Get date and time for this commit in datetime format + get_datetime = lambda time_str: datetime.strptime(time_str, date_fmt) + self.author_time = get_datetime(git_show("%ad")) + self.commit_time = get_datetime(git_show("%cd")) + + +@dataclass +class TestRun: + directory: str + git_rev: GitRev = None + run_name: str = None + command: str = None + exit_code: int = None + # map from name of test to list of kernel base filenames + kernel_map: dict[str, list[str]] = field(default_factory=dict) + # collecting the preamble lets us skip it when diffing, and lets us compare + # only the preamble between runs + preamble: str = None + # lets us seek past preamble + preamble_size_bytes: int = None - # get a map from test name to list of .cu files for each directory - map1 = get_test_map(dir1) - map2 = get_test_map(dir2) + def __post_init__(self): + # get description of this git rev + abbrev = os.path.basename(os.path.dirname(os.path.abspath(self.directory))) + self.git_rev = GitRev(abbrev) - differing_tests = set() - for testname, kernels1 in map1.items(): - if testname not in map2: + self.command = open(os.path.join(self.directory, "command"), "r").read() + + # check that command includes "nvfuser_tests" + if self.command.find("nvfuser_tests") == -1: print( - f"WARNING: Test {testname} present in {dir1} but not in {dir2}", + "ERROR: Command does not appear to be nvfuser_tests. Aborting.", file=sys.stderr, ) - continue + sys.exit(1) - kernels2 = map2[testname] + self.exit_code = int(open(os.path.join(self.directory, "exitcode"), "r").read()) - if len(kernels1) != len(kernels2): - print( - f"WARNING: Test {testname} has different number of kernels " - f"in {dir1} than in {dir2}. Not showing diffs.", - file=sys.stderr, + self.compute_kernel_map() + + self.find_preamble() + + print("End of TestRun post_init") + + def compute_kernel_map(self): + """ + Compute a map from test name to list of cuda filenames + """ + # first find the stdout log file + logfile = None + for fname in os.listdir(self.directory): + if fname.find("stdout") != -1: + if logfile is not None: + raise RuntimeError( + f"Input directory {self.directory} contains multiple " + 'possible logs (filenames containing "stdout")' + ) + logfile = os.path.join(self.directory, fname) + if logfile is None: + raise RuntimeError( + f"Input directory {self.directory} contains no log (filenames " + 'containing "stdout")' ) - differing_tests.add(testname) - - for k1, k2 in zip(kernels1, kernels2): - f1 = os.path.join(dir1, "cuda", k1) - f2 = os.path.join(dir2, "cuda", k2) - # -U50 gives us plenty of context - # -I "void kernel" ignores mismatches in kernel signature line - # The intention is to avoid false positives from differently - # numbered kernels, but this can also hide true differences if - # the kernel signature changes. - args = ["diff", "-U50", "-I", "void kernel", f1, f2] - ret = subprocess.run(args, capture_output=True) - if ret.returncode != 0: - print(testname, ret.args) - print(ret.stdout.decode("utf-8")) - differing_tests.add(testname) - - for testname, kernels2 in map2.items(): - if testname not in map1: + + # regex for stripping ANSI color codes + ansi_re = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]") + current_test = None + current_files = [] + for line in open(logfile, "r").readlines(): + line = ansi_re.sub("", line.strip()) + if line[:13] == "[ RUN ] ": + current_test = line[13:] + elif line[:13] == "[ OK ] ": + # Finalize test + assert current_test is not None + self.kernel_map[current_test] = current_files + current_test = None + current_files = [] + elif line[:10] == "PRINTING: ": + if line[-3:] == ".cu": + # This avoids comparing the .ptx files that are created then + # removed by the MemoryTest.LoadCache tests + current_files.append(line[10:]) + + def find_preamble(self): + """Look for common preamble in collected kernels""" + preamble_lines = [] + first = True + for cufile in os.listdir(os.path.join(self.directory, "cuda")): + cufile_full = os.path.join(self.directory, "cuda", cufile) + with open(cufile_full, "r") as f: + for i, line in enumerate(f.readlines()): + line = line.rstrip() + # we set nvfuser_index_t in the preamble. We ignore that change for the purposes of this diff + if line[:8] == "typedef " and line[-17:] == " nvfuser_index_t;": + line = "typedef int nvfuser_index_t; // NOTE: hardcoded to int for easier diffing" + if first: + preamble_lines.append(line) + elif i >= len(preamble_lines) or preamble_lines[i] != line: + break + self.preamble_size_bytes = f.tell() + preamble_lines = preamble_lines[:i] + if self.preamble_size_bytes == 0: + # early return if preamble is determined to be empty + break + first = False + self.preamble = "\n".join(preamble_lines) + + def get_kernel(self, test_name, kernel_number, strip_preamble=True) -> str: + """Get a string of the kernel, optionally stripping the preamble""" + basename = self.kernel_map[test_name][kernel_number] + fullname = os.path.join(self.directory, "cuda", basename) + with open(fullname, "r") as f: + if strip_preamble: + f.seek(self.preamble_size_bytes) + return f.read().strip() + + +@dataclass +class KernelDiff: + testname: str + kernel_num: int + code1: str + code2: str + diff: str + + +# Lets us maintain test order +class LastUpdatedOrderedDict(OrderedDict): + """Just an ordered dict with insertion at the end""" + + def __setitem__(self, key, value): + super().__setitem__(key, value) + self.move_to_end(key) + + +@dataclass +class TestDifferences: + run1: TestRun + run2: TestRun + differing_tests: LastUpdatedOrderedDict[str, list[KernelDiff]] = field( + default_factory=list + ) + new_tests: list[str] = field(default_factory=list) + removed_tests: list[str] = field(default_factory=list) + + def __post_init__(self): + if self.run1.command != self.run2.command: + print("WARNING: commands differ between runs", file=sys.stderr) + print(f" {self.run1.directory}: {self.run1.command}", file=sys.stderr) + print(f" {self.run2.directory}: {self.run2.command}", file=sys.stderr) + + if self.run1.exit_code != self.run1.exit_code: print( - f"WARNING: Test {testname} present in {dir2} but not in {dir1}", + f"WARNING: Exit codes {self.run1.exit_code} and {self.run2.exit_code} do not match.", file=sys.stderr, ) - return differing_tests + if self.run1.preamble != self.run2.preamble: + print("Preambles differ between runs indicating changes to runtime files") + + differing_tests_set = set() + for testname, kernels1 in self.run1.kernel_map.items(): + if testname not in self.run2.kernel_map: + removed_tests.append(testname) + continue + + kernels2 = self.run2.kernel_map[testname] + + if len(kernels1) != len(kernels2): + print( + f"WARNING: Test {testname} has different number of kernels " + f"in {dir1} than in {dir2}. Not showing diffs.", + file=sys.stderr, + ) + if testname not in differing_tests_set: + differing_tests.append(testname) + differing_tests_set.add(testname) + + for kernel_num in range(len(kernels1)): + code1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=True) + code2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=True) + + lines1 = code1.splitlines() + lines2 = code2.splitlines() + + diff_str = "\n".join(difflib.unified_diff( + lines1, + lines2, + fromfile=self.run1.git_rev.abbrev, + tofile=self.run2.git_rev.abbrev, + n=5, + )) + if len(diff_str) > 0: + print(testname, kernel_num, diff_str) + diff_obj = KernelDiff(testname, kernel_num, code1, code2, diff_str) + if testname in self.differing_tests: + self.differing_tests[testname].append(diff_obj) + else: + self.differing_tests[testname] = [diff_obj] + + for testname, kernels2 in self.run2.kernel_map.items(): + if testname not in self.run1.kernel_map: + new_tests.append(testname) + + def __len__(self): + return len(self.differing_tests) + + def to_dict(self): + """Convert to hierarchical dict format for use with jinja""" + d = {} + d["git1"] = self.run1.git_rev.to_dict() + d["git2"] = self.run2.git_rev.to_dict() + + def generate_html(self, output_file: str) -> str: + """Return a self-contained HTML string summarizing the codegen comparison""" + import jinja2 + import pygments + from pygments.lexers import CppLexer, DiffLexer + from pygments.formatters import HtmlFormatter + + env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath=".")) + template = env.get_template( + os.path.join(os.path.dirname(__file__), "templates", "codediff.html") + ) + context = self.to_dict() + context["pygments_style_defs"] = HtmlFormatter().get_style_defs(".highlight") + + return template.render(template_vars) if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + epilog="This command must be run from within a git checkout of the NVFuser repo." + ) parser.add_argument("dir1", help="Directory containing stdout-*.log and cuda/") parser.add_argument("dir2", help="Directory containing stdout-*.log and cuda/") + parser.add_argument("--html", action="store_true", help="Write HTML file?") + parser.add_argument( + "-o", "--output-file", help="Location of HTML file output if -h is given." + ) args = parser.parse_args() - differing_tests = diff_nvfuser_tests_dirs(args.dir1, args.dir2) + test_diffs = TestDifferences(TestRun(args.dir1), TestRun(args.dir2)) + + if args.html: + output_file = args.output_file + if output_file is None: + # determine default output file + get_abbrev = lambda d: os.path.basename(os.path.dirname(os.path.abspath(d))) + abbrev1 = get_abbrev(args.dir1) + abbrev2 = get_abbrev(args.dir2) + run_name = os.path.basename(os.path.abspath(args.dir1)) + output_file = f"codediff_{abbrev1}_{abbrev2}_{run_name}.html" + with open(output_file, "w") as f: + f.write(differing_tests.generate_html()) if len(differing_tests) == 0: print("No differences found in overlapping tests!") diff --git a/tools/templates/codediff.html b/tools/templates/codediff.html new file mode 100644 index 00000000000..437ba48757b --- /dev/null +++ b/tools/templates/codediff.html @@ -0,0 +1,129 @@ + + + {{ git1.abbrev }} vs {{ git2.abbrev }} - NVFuser codegen diff + + + + +

{{ git1.abbrev }} vs {{ git2.abbrev }} - NVFuser codegen diff

+ +

Git information

+ +

Old commit: {{ git1.abbrev }}

+ {{ git1.title }}
+ {{ git1.author_name }} <{{ git1.author_email }}>
+ {{ git1.author_datetime }}
+ {% if git1.pull_request is defined %} + Pull request: {{ git1.pull_request.title }}
+ View PR on github.com
+ Show commit on github.com
+ {% else %} + Show commit on github.com
+ {% endif %} + Browse code at this commit on github.com
+ +

New commit: {{ git2.abbrev }}

+ {{ git2.title }}
+ {{ git2.author_name }} <{{ git2.author_email }}>
+ {{ git2.author_datetime }}
+ {% if git2.pull_request is defined %} + Pull request: {{ git2.pull_request.title }}
+ View PR on github.com
+ Show commit on github.com
+ {% else %} + Show commit on github.com
+ {% endif %} + Browse code at this commit on github.com
+ + +

Code comparison

+ + {% if new_tests|length > 0 %} +

New Tests

+ {% for test in new_tests %} + {{loop.index}}: {{test.name}}
+ + {% endfor %} + {% endif %} + + {% if removed_tests|length > 0 %} +

Removed Tests

+ {% for test in removed_tests %} + {{loop.index}}: {{test.name}} + {% endfor %} + {% endif %} + +

Test Diffs

+ {% for test in test_diffs %} + {{loop.index}}: {{test.name}} + {% if test.kernels|length > 1 %} +
+ {% endif %} + {% set outer_index = loop.index %} + {% for kernel in test.kernels %} +     Kernel {{ loop.index }} + + +
+ + + + {% endfor %} + {% endfor %} + + From fc7746d4547db9894984af07e21af12b24fad4ed Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 29 Sep 2023 13:23:26 -0400 Subject: [PATCH 02/39] Add small test script REMOVE LATER --- tools/templates/test_jinja_pygmentize.py | 234 +++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 tools/templates/test_jinja_pygmentize.py diff --git a/tools/templates/test_jinja_pygmentize.py b/tools/templates/test_jinja_pygmentize.py new file mode 100644 index 00000000000..ea41086b50c --- /dev/null +++ b/tools/templates/test_jinja_pygmentize.py @@ -0,0 +1,234 @@ +import jinja2 +import pygments +from pygments.lexers import CppLexer, DiffLexer +from pygments.formatters import HtmlFormatter + +env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath=".")) +template = env.get_template("codediff.html") + +# some lorem ipsum code and diff +some_code = """ +__global__ void kernel1(Tensor T0, Tensor T1) { + alignas(16) extern __shared__ char array[]; + const unsigned smem_offset = 0; + NVFUSER_DEFINE_MAGIC_ZERO; + nvfuser_index_t i0; + i0 = ceilDiv(T0.logical_size[3], 32); + nvfuser_index_t i1; + i1 = T0.logical_size[2] * i0; + nvfuser_index_t i2; + i2 = ((nvfuser_index_t)blockIdx.x) % i1; + nvfuser_index_t i3; + i3 = i2 / i0; + nvfuser_index_t i4; + i4 = i2 % i0; + nvfuser_index_t i5; + i5 = ((nvfuser_index_t)blockIdx.x) / i1; + nvfuser_index_t i6; + i6 = ((nvfuser_index_t)threadIdx.x) / 8; + nvfuser_index_t i7; + i7 = ((nvfuser_index_t)threadIdx.x) % 8; + nvfuser_index_t i8; + i8 = 32 * i4; + nvfuser_index_t i9; + i9 = 4 * i7; + nvfuser_index_t i10; + i10 = (((i9 + ((T0.logical_size[3] * T0.logical_size[2]) * i6)) + (((32 * T0.logical_size[3]) * T0.logical_size[2]) * i5)) + (T0.logical_size[3] * i3)) + i8; + nvfuser_index_t i11; + i11 = (16 * T0.logical_size[3]) * T0.logical_size[2]; + nvfuser_index_t i12; + i12 = 4 * ((nvfuser_index_t)threadIdx.x); + nvfuser_index_t i13; + i13 = ((nvfuser_index_t)threadIdx.x) / 32; + nvfuser_index_t i14; + i14 = ((nvfuser_index_t)threadIdx.x) % 32; + nvfuser_index_t i15; + i15 = (32 * i14) + i13; + nvfuser_index_t i16; + i16 = T0.logical_size[1] * T0.logical_size[3]; + nvfuser_index_t i17; + i17 = 32 * i5; + nvfuser_index_t i18; + i18 = i14 + i17; + nvfuser_index_t i19; + i19 = ((((T0.logical_size[1] * i13) + (i16 * i3)) + ((32 * T0.logical_size[1]) * i4)) + ((i16 * T0.logical_size[2]) * (i18 / T0.logical_size[1]))) + (i18 % T0.logical_size[1]); + nvfuser_index_t i20; + i20 = 4 * T0.logical_size[1]; + nvfuser_index_t i21; + i21 = T0.logical_size[0] * T0.logical_size[1]; + bool b22; + b22 = i18 < i21; + bool b23; + b23 = ((3 + i9) + i8) < T0.logical_size[3]; + nvfuser_index_t i24; + i24 = ((-i21) + i6) + i17; + nvfuser_index_t i25; + i25 = ((-T0.logical_size[3]) + i13) + i8; + float* T2 = reinterpret_cast(array + smem_offset + 0); + if (((((((16 + i6) + i17) < i21) && ((((i7 * 4) + 3) + i8) < T0.logical_size[3])) && b22) && (((28 + i13) + i8) < T0.logical_size[3]))) { + #pragma unroll + for(nvfuser_index_t i26 = 0; i26 < 2; ++i26) { + loadGeneric( &T2[(i12 + (512 * i26))], &T0[(i10 + (i11 * (i26 + nvfuser_zero)))]); + } + NVFUSER_UPDATE_MAGIC_ZERO; + float T3[8]; + __barrier_sync(0); + #pragma unroll + for(nvfuser_index_t i27 = 0; i27 < 8; ++i27) { + T3[i27] + = T2[(i15 + (4 * i27))]; + } + NVFUSER_UPDATE_MAGIC_ZERO; + #pragma unroll + for(nvfuser_index_t i28 = 0; i28 < 8; ++i28) { + T1[(i19 + (i20 * (i28 + nvfuser_zero)))] + = T3[i28]; + } + NVFUSER_UPDATE_MAGIC_ZERO; + } else { + #pragma unroll + for(nvfuser_index_t i26 = 0; i26 < 2; ++i26) { + nvfuser_index_t i29; + i29 = i26 + nvfuser_zero; + if ((b23 && (i24 < (-(16 * i29))))) { + loadGeneric( &T2[(i12 + (512 * i26))], &T0[(i10 + (i11 * i29))]); + } + } + NVFUSER_UPDATE_MAGIC_ZERO; + float T3[8]; + __barrier_sync(0); + #pragma unroll + for(nvfuser_index_t i27 = 0; i27 < 8; ++i27) { + if ((b22 && (i25 < (-(4 * (i27 + nvfuser_zero)))))) { + T3[i27] + = T2[(i15 + (4 * i27))]; + } + } + NVFUSER_UPDATE_MAGIC_ZERO; + #pragma unroll + for(nvfuser_index_t i28 = 0; i28 < 8; ++i28) { + nvfuser_index_t i30; + i30 = i28 + nvfuser_zero; + if ((b22 && (i25 < (-(4 * i30))))) { + T1[(i19 + (i20 * i30))] + = T3[i28]; + } + } + NVFUSER_UPDATE_MAGIC_ZERO; + } +} + """ +some_diff = """ +__global__ void kernel15(Tensor T8, nvfuser_index_t i0, nvfuser_index_t i1, nvfuser_index_t i2, Tensor T13, Tensor T6) { + alignas(16) extern __shared__ char array[]; + void* shared_mem = array; + NVFUSER_DEFINE_MAGIC_ZERO; + nvfuser_index_t i3; + i3 = 4 * ((nvfuser_index_t)threadIdx.x); + Tensor s4; + s4.data = T8.data; + s4.logical_size = T8.logical_size; + s4.alloc_stride = T8.alloc_stride; + double d5; +- d5 = (double)(i1); ++ d5 = (double)(i0); + double d6; +- d6 = (double)(i2); ++ d6 = (double)(i1); + double d7; + d7 = (double)(0); + double d8; + d8 = (double)(0); + double d9; + d9 = (double)(0); + double d10; +- d10 = (double)(i1); ++ d10 = (double)(i0); + double d11; +- d11 = (double)(i2); ++ d11 = (double)(i1); + Array a12; + a12 = s4.logical_size; + nvfuser_index_t i13; + i13 = a12[2]; + nvfuser_index_t i14; +- i14 = i3 + (((i1 * i2) * i13) * ((nvfuser_index_t)blockIdx.x)); ++ i14 = i3 + (((i0 * i1) * i13) * ((nvfuser_index_t)blockIdx.x)); + nvfuser_index_t i15; +- i15 = (i2 * i1) * i13; ++ i15 = (i1 * i0) * i13; + nvfuser_index_t i16; + i16 = 4 * (ceilDiv((ceilDiv(i15, 4)), 7)); + nvfuser_index_t i17; + i17 = (3 - i15) + i3; + bool b18; + b18 = ((nvfuser_index_t)threadIdx.x) == 0; + double d19; + d19 = (double)(i13); + double d20; + d20 = (double)(i13); + double d21; + d21 = 1.00000000000000000e+00 * d20; + double d22; + d22 = d21 * d11; + double d23; + d23 = d22 * d10; + double d24; + d24 = d23 - d9; + bool b25; + b25 = d24 >= d8; + """ + +template_vars = { + "pygments_style_defs": HtmlFormatter().get_style_defs(".highlight"), + "git1": { + "abbrev": "8fd1ff44", + "full_hash": "8fd144083db93d5f954b62b25f1c159947652691", + "pull_request": { + "title": "Wrap CompiledKernel in unique_ptr and add a proper destructor.", + "number": 968, + }, + "author_name": "Jacob Hinkle", + "author_email": "jhinkle@nvidia.com", + "author_datetime": "Wed Sep 27 09:52:34 2023 -0400", + "title": "Merge remote-tracking branch 'origin/main' into scalar_seg_edges", + }, + "git2": { + "abbrev": "877dc636", + "full_hash": "877dc63606d35d44a0320f927fdb83fd8168eaf9", + "pull_request": { + "title": "Visit extent scalars in SegmentCandidateFinder::resolveScalarsInGroup", + "number": 840, + }, + "author_name": "Jacob Hinkle", + "author_email": "jhinkle@nvidia.com", + "author_datetime": "Wed Sep 27 07:26:54 2023 -0400", + "title": "Merge remote-tracking branch 'origin/main' into scalar_seg_edges", + }, + "test_diffs": [ + { + "name": "NVFuserTestFoo", + "kernels": [ + { + "highlighted_code1": pygments.highlight(some_code, CppLexer(), HtmlFormatter()), + "highlighted_code2": pygments.highlight(some_code, CppLexer(), HtmlFormatter()), + "highlighted_diff": pygments.highlight(some_diff, DiffLexer(), HtmlFormatter()), + }, + ], + }, + ], + "new_tests": [ + { + "name": "bat", + "highlighted_code": pygments.highlight(some_code, CppLexer(), HtmlFormatter()), + }, + ], + "removed_tests": [ + { + "name": "baz", + "highlighted_code": pygments.highlight(some_code, CppLexer(), HtmlFormatter()), + }, + ] +} + +print(template.render(template_vars)) From 2994d77325738de114d45403ec0aa7ee5ba43311 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 29 Sep 2023 14:30:47 -0400 Subject: [PATCH 03/39] Still WIP --- tools/diff_codegen_nvfuser_tests.py | 167 +++++++++++++++++------ tools/templates/codediff.html | 2 +- tools/templates/test_jinja_pygmentize.py | 1 + 3 files changed, 131 insertions(+), 39 deletions(-) diff --git a/tools/diff_codegen_nvfuser_tests.py b/tools/diff_codegen_nvfuser_tests.py index dcaa7b00fe1..b344b455580 100644 --- a/tools/diff_codegen_nvfuser_tests.py +++ b/tools/diff_codegen_nvfuser_tests.py @@ -19,7 +19,7 @@ import subprocess import sys from datetime import datetime -from typing import Optional, Set +from typing import Optional, Set, Union @dataclass @@ -36,6 +36,7 @@ def __post_init__(self): @dataclass class GitRev: abbrev: str + title: str = None full_hash: str = None in_branches: list[GitBranch] = field(default_factory=list) author_name: str = None @@ -84,6 +85,7 @@ def __post_init__(self): .stdout.strip() .decode("utf-8") ) + self.title = git_show("%s") self.author_name = git_show("%an") self.author_email = git_show("%ae") @@ -92,6 +94,22 @@ def __post_init__(self): self.author_time = get_datetime(git_show("%ad")) self.commit_time = get_datetime(git_show("%cd")) + def to_dict(self): + return { + "abbrev": self.abbrev, + "full_hash": self.full_hash, + # TODO: detect PRs and add in this format + # "pull_request": { + # "title": "Wrap CompiledKernel in unique_ptr and add a proper destructor.", + # "number": 968, + # }, + "author_name": self.author_name, + "author_email": self.author_email, + "author_datetime": str(self.author_time), + "title": self.title, + } + + @dataclass class TestRun: @@ -175,6 +193,7 @@ def find_preamble(self): """Look for common preamble in collected kernels""" preamble_lines = [] first = True + files_processed = 0 # limit how many files to check for cufile in os.listdir(os.path.join(self.directory, "cuda")): cufile_full = os.path.join(self.directory, "cuda", cufile) with open(cufile_full, "r") as f: @@ -193,6 +212,9 @@ def find_preamble(self): # early return if preamble is determined to be empty break first = False + files_processed += 1 + if files_processed >= 50: + break self.preamble = "\n".join(preamble_lines) def get_kernel(self, test_name, kernel_number, strip_preamble=True) -> str: @@ -202,8 +224,20 @@ def get_kernel(self, test_name, kernel_number, strip_preamble=True) -> str: with open(fullname, "r") as f: if strip_preamble: f.seek(self.preamble_size_bytes) - return f.read().strip() + code = f.read().strip() + return code +def highlight_code(code) -> str: + import pygments + from pygments.formatters import HtmlFormatter + from pygments.lexers import CppLexer + return pygments.highlight(code, CppLexer(), HtmlFormatter()) + +def highlight_diff(diff) -> str: + import pygments + from pygments.formatters import HtmlFormatter + from pygments.lexers import DiffLexer + return pygments.highlight(diff, DiffLexer(), HtmlFormatter()) @dataclass class KernelDiff: @@ -213,6 +247,15 @@ class KernelDiff: code2: str diff: str + def to_dict(self): + print("Highlighting diff ", self.kernel_num, 'for test', self.testname) + return { + "number": self.kernel_num, + "highlighted_code1": highlight_code(self.code1), + "highlighted_code2": highlight_code(self.code2), + "highlighted_diff": highlight_diff(self.diff), + } + # Lets us maintain test order class LastUpdatedOrderedDict(OrderedDict): @@ -227,9 +270,10 @@ def __setitem__(self, key, value): class TestDifferences: run1: TestRun run2: TestRun - differing_tests: LastUpdatedOrderedDict[str, list[KernelDiff]] = field( - default_factory=list - ) + # eitehr a list of diffs, or different numbers of kernels present + differing_tests: LastUpdatedOrderedDict[ + str, Union[tuple[int, int], list[KernelDiff]] + ] = field(default_factory=LastUpdatedOrderedDict) new_tests: list[str] = field(default_factory=list) removed_tests: list[str] = field(default_factory=list) @@ -248,10 +292,9 @@ def __post_init__(self): if self.run1.preamble != self.run2.preamble: print("Preambles differ between runs indicating changes to runtime files") - differing_tests_set = set() for testname, kernels1 in self.run1.kernel_map.items(): if testname not in self.run2.kernel_map: - removed_tests.append(testname) + self.removed_tests.append(testname) continue kernels2 = self.run2.kernel_map[testname] @@ -259,27 +302,27 @@ def __post_init__(self): if len(kernels1) != len(kernels2): print( f"WARNING: Test {testname} has different number of kernels " - f"in {dir1} than in {dir2}. Not showing diffs.", + f"in {dir1} than in {dir2}. Not showing diffs for this test.", file=sys.stderr, ) - if testname not in differing_tests_set: - differing_tests.append(testname) - differing_tests_set.add(testname) + self.differing_tests[testname] = (len(kernels1), len(kernels2)) for kernel_num in range(len(kernels1)): - code1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=True) - code2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=True) + code1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=False) + code2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=False) lines1 = code1.splitlines() lines2 = code2.splitlines() - diff_str = "\n".join(difflib.unified_diff( - lines1, - lines2, - fromfile=self.run1.git_rev.abbrev, - tofile=self.run2.git_rev.abbrev, - n=5, - )) + diff_str = "\n".join( + difflib.unified_diff( + lines1, + lines2, + fromfile=self.run1.git_rev.abbrev, + tofile=self.run2.git_rev.abbrev, + n=5, + ) + ) if len(diff_str) > 0: print(testname, kernel_num, diff_str) diff_obj = KernelDiff(testname, kernel_num, code1, code2, diff_str) @@ -290,7 +333,7 @@ def __post_init__(self): for testname, kernels2 in self.run2.kernel_map.items(): if testname not in self.run1.kernel_map: - new_tests.append(testname) + self.new_tests.append(testname) def __len__(self): return len(self.differing_tests) @@ -300,22 +343,54 @@ def to_dict(self): d = {} d["git1"] = self.run1.git_rev.to_dict() d["git2"] = self.run2.git_rev.to_dict() - - def generate_html(self, output_file: str) -> str: + + d["test_diffs"] = {} + for testname, diffs in self.differing_tests.items(): + if isinstance(diffs, tuple): + # differing numbers of kernels produced by this test + d["test_diffs"][testname] = diffs + else: + d["test_diffs"][testname] = [di.to_dict() for di in diffs] + + d["new_tests"] = [] + for testname in self.new_tests: + kernels_code = [] + for i in range(len(self.run2.kernel_map[testname])): + kernels_code.append(highlight_code(self.run2.get_kernel(testname, i, strip_preamble=False))) + d["new_tests"].append({ + "name": testname, + "highlighted_code": kernels_code, + }) + + d["removed_tests"] = [] + for testname in self.removed_tests: + kernels_code = [] + for i in range(len(self.run1.kernel_map[testname])): + kernels_code.append(highlight_code(self.run1.get_kernel(testname, i, strip_preamble=False))) + d["new_tests"].append({ + "name": testname, + "highlighted_code": kernels_code, + }) + + return d + + def generate_html(self) -> str: """Return a self-contained HTML string summarizing the codegen comparison""" import jinja2 - import pygments - from pygments.lexers import CppLexer, DiffLexer from pygments.formatters import HtmlFormatter - env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath=".")) - template = env.get_template( - os.path.join(os.path.dirname(__file__), "templates", "codediff.html") - ) - context = self.to_dict() + tools_dir = os.path.dirname(__file__) + env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath=tools_dir)) + template = env.get_template("templates/codediff.html") + import json + if True: # write + context = self.to_dict() + json.dump(context, open("context.json", "w")) + else: # read + context = json.load(open("context.json", "r")) context["pygments_style_defs"] = HtmlFormatter().get_style_defs(".highlight") - return template.render(template_vars) + return template.render(context) if __name__ == "__main__": @@ -332,7 +407,15 @@ def generate_html(self, output_file: str) -> str: ) args = parser.parse_args() - test_diffs = TestDifferences(TestRun(args.dir1), TestRun(args.dir2)) + import pickle + + if False: # write + test_diffs = TestDifferences(TestRun(args.dir1), TestRun(args.dir2)) + with open("diffs.pkl", "wb") as f: + pickle.dump(test_diffs, f) + else: # read + with open("diffs.pkl", "rb") as f: + test_diffs = pickle.load(f) if args.html: output_file = args.output_file @@ -344,13 +427,21 @@ def generate_html(self, output_file: str) -> str: run_name = os.path.basename(os.path.abspath(args.dir1)) output_file = f"codediff_{abbrev1}_{abbrev2}_{run_name}.html" with open(output_file, "w") as f: - f.write(differing_tests.generate_html()) + f.write(test_diffs.generate_html()) + + num_differing_kernels = 0 + for k, v in test_diffs.differing_tests.items(): + if isinstance(v, list): + num_differing_kernels += len(v) - if len(differing_tests) == 0: + if len(test_diffs.differing_tests) == 0: print("No differences found in overlapping tests!") else: - print("Differences found in the following tests:") - for t in differing_tests: - print(f" {t}") - exit(len(differing_tests)) + print(len(test_diffs.differing_tests), "tests found") + if len(test_diffs.new_tests) > 0: + print(len(test_diffs.new_tests), "new tests found") + if len(test_diffs.removed_tests) > 0: + print(len(test_diffs.removed_tests), "removed tests found") + + exit(len(test_diffs.differing_tests)) diff --git a/tools/templates/codediff.html b/tools/templates/codediff.html index 437ba48757b..e4f27baf352 100644 --- a/tools/templates/codediff.html +++ b/tools/templates/codediff.html @@ -110,7 +110,7 @@

Test Diffs

{% endif %} {% set outer_index = loop.index %} {% for kernel in test.kernels %} -     Kernel {{ loop.index }} +     Kernel {{ kernel.number }}
diff --git a/tools/templates/test_jinja_pygmentize.py b/tools/templates/test_jinja_pygmentize.py index ea41086b50c..a6da83c6306 100644 --- a/tools/templates/test_jinja_pygmentize.py +++ b/tools/templates/test_jinja_pygmentize.py @@ -210,6 +210,7 @@ "name": "NVFuserTestFoo", "kernels": [ { + "kernel_num": 3, "highlighted_code1": pygments.highlight(some_code, CppLexer(), HtmlFormatter()), "highlighted_code2": pygments.highlight(some_code, CppLexer(), HtmlFormatter()), "highlighted_diff": pygments.highlight(some_diff, DiffLexer(), HtmlFormatter()), From e845a96707333831e02564904e5578e994492616 Mon Sep 17 00:00:00 2001 From: Jacob Hinkle Date: Fri, 29 Sep 2023 15:21:53 -0400 Subject: [PATCH 04/39] First working version --- tools/diff_codegen_nvfuser_tests.py | 141 ++++++++++++++--------- tools/templates/codediff.html | 31 +++-- tools/templates/test_jinja_pygmentize.py | 22 +++- 3 files changed, 121 insertions(+), 73 deletions(-) diff --git a/tools/diff_codegen_nvfuser_tests.py b/tools/diff_codegen_nvfuser_tests.py index b344b455580..b87ad8fdd1a 100644 --- a/tools/diff_codegen_nvfuser_tests.py +++ b/tools/diff_codegen_nvfuser_tests.py @@ -19,7 +19,7 @@ import subprocess import sys from datetime import datetime -from typing import Optional, Set, Union +from typing import Union @dataclass @@ -70,27 +70,32 @@ def __post_init__(self): in_branches.append(line) date_fmt = "%Y/%m/%d %H:%M:%S %z" - git_show = ( - lambda fmt: subprocess.run( - [ - "git", - "show", - "--no-patch", - f"--format={fmt}", - f"--date=format:{date_fmt}", - self.full_hash, - ], - capture_output=True, + + def git_show(fmt) -> str: + return ( + subprocess.run( + [ + "git", + "show", + "--no-patch", + f"--format={fmt}", + f"--date=format:{date_fmt}", + self.full_hash, + ], + capture_output=True, + ) + .stdout.strip() + .decode("utf-8") ) - .stdout.strip() - .decode("utf-8") - ) + self.title = git_show("%s") self.author_name = git_show("%an") self.author_email = git_show("%ae") # Get date and time for this commit in datetime format - get_datetime = lambda time_str: datetime.strptime(time_str, date_fmt) + def get_datetime(time_str): + return datetime.strptime(time_str, date_fmt) + self.author_time = get_datetime(git_show("%ad")) self.commit_time = get_datetime(git_show("%cd")) @@ -110,7 +115,6 @@ def to_dict(self): } - @dataclass class TestRun: directory: str @@ -125,6 +129,7 @@ class TestRun: preamble: str = None # lets us seek past preamble preamble_size_bytes: int = None + preamble_size_lines: int = None def __post_init__(self): # get description of this git rev @@ -215,6 +220,7 @@ def find_preamble(self): files_processed += 1 if files_processed >= 50: break + self.preamble_size_lines = len(preamble_lines) self.preamble = "\n".join(preamble_lines) def get_kernel(self, test_name, kernel_number, strip_preamble=True) -> str: @@ -222,23 +228,34 @@ def get_kernel(self, test_name, kernel_number, strip_preamble=True) -> str: basename = self.kernel_map[test_name][kernel_number] fullname = os.path.join(self.directory, "cuda", basename) with open(fullname, "r") as f: - if strip_preamble: - f.seek(self.preamble_size_bytes) - code = f.read().strip() + code = "" + for i, line in enumerate(f.readlines()): + if not strip_preamble or i >= self.preamble_size_lines: + # replace kernel934 with kernel1 to facilitate diffing + code += re.sub(r"\bkernel\d+\b", "kernelN", line) + code = code.rstrip() + if strip_preamble and code[-1] == "}": + # trailing curly brace is close of namespace. This will clean it up so that we have just the kernel + code = code[:-1].rstrip() return code + def highlight_code(code) -> str: import pygments from pygments.formatters import HtmlFormatter - from pygments.lexers import CppLexer + from pygments.lexers import CppLexer + return pygments.highlight(code, CppLexer(), HtmlFormatter()) + def highlight_diff(diff) -> str: import pygments from pygments.formatters import HtmlFormatter - from pygments.lexers import DiffLexer + from pygments.lexers import DiffLexer + return pygments.highlight(diff, DiffLexer(), HtmlFormatter()) + @dataclass class KernelDiff: testname: str @@ -248,7 +265,7 @@ class KernelDiff: diff: str def to_dict(self): - print("Highlighting diff ", self.kernel_num, 'for test', self.testname) + print("Highlighting diff of kernel", self.kernel_num, "in test", self.testname) return { "number": self.kernel_num, "highlighted_code1": highlight_code(self.code1), @@ -308,8 +325,8 @@ def __post_init__(self): self.differing_tests[testname] = (len(kernels1), len(kernels2)) for kernel_num in range(len(kernels1)): - code1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=False) - code2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=False) + code1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=True) + code2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=True) lines1 = code1.splitlines() lines2 = code2.splitlines() @@ -343,34 +360,51 @@ def to_dict(self): d = {} d["git1"] = self.run1.git_rev.to_dict() d["git2"] = self.run2.git_rev.to_dict() - - d["test_diffs"] = {} + + d["test_diffs"] = [] for testname, diffs in self.differing_tests.items(): if isinstance(diffs, tuple): # differing numbers of kernels produced by this test - d["test_diffs"][testname] = diffs + d["test_diffs"].append(diffs) else: - d["test_diffs"][testname] = [di.to_dict() for di in diffs] + d["test_diffs"].append( + { + "name": testname, + "kernels": [di.to_dict() for di in diffs], + } + ) d["new_tests"] = [] for testname in self.new_tests: kernels_code = [] for i in range(len(self.run2.kernel_map[testname])): - kernels_code.append(highlight_code(self.run2.get_kernel(testname, i, strip_preamble=False))) - d["new_tests"].append({ - "name": testname, - "highlighted_code": kernels_code, - }) + kernels_code.append( + highlight_code( + self.run2.get_kernel(testname, i, strip_preamble=True) + ) + ) + d["new_tests"].append( + { + "name": testname, + "highlighted_code": kernels_code, + } + ) d["removed_tests"] = [] for testname in self.removed_tests: kernels_code = [] for i in range(len(self.run1.kernel_map[testname])): - kernels_code.append(highlight_code(self.run1.get_kernel(testname, i, strip_preamble=False))) - d["new_tests"].append({ - "name": testname, - "highlighted_code": kernels_code, - }) + kernels_code.append( + highlight_code( + self.run1.get_kernel(testname, i, strip_preamble=True) + ) + ) + d["removed_tests"].append( + { + "name": testname, + "highlighted_code": kernels_code, + } + ) return d @@ -382,12 +416,7 @@ def generate_html(self) -> str: tools_dir = os.path.dirname(__file__) env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath=tools_dir)) template = env.get_template("templates/codediff.html") - import json - if True: # write - context = self.to_dict() - json.dump(context, open("context.json", "w")) - else: # read - context = json.load(open("context.json", "r")) + context = self.to_dict() context["pygments_style_defs"] = HtmlFormatter().get_style_defs(".highlight") return template.render(context) @@ -407,21 +436,15 @@ def generate_html(self) -> str: ) args = parser.parse_args() - import pickle - - if False: # write - test_diffs = TestDifferences(TestRun(args.dir1), TestRun(args.dir2)) - with open("diffs.pkl", "wb") as f: - pickle.dump(test_diffs, f) - else: # read - with open("diffs.pkl", "rb") as f: - test_diffs = pickle.load(f) + test_diffs = TestDifferences(TestRun(args.dir1), TestRun(args.dir2)) if args.html: output_file = args.output_file if output_file is None: # determine default output file - get_abbrev = lambda d: os.path.basename(os.path.dirname(os.path.abspath(d))) + def get_abbrev(d): + return os.path.basename(os.path.dirname(os.path.abspath(d))) + abbrev1 = get_abbrev(args.dir1) abbrev2 = get_abbrev(args.dir2) run_name = os.path.basename(os.path.abspath(args.dir1)) @@ -437,8 +460,12 @@ def generate_html(self) -> str: if len(test_diffs.differing_tests) == 0: print("No differences found in overlapping tests!") else: - - print(len(test_diffs.differing_tests), "tests found") + print( + num_differing_kernels, + "from", + len(test_diffs.differing_tests), + "tests found", + ) if len(test_diffs.new_tests) > 0: print(len(test_diffs.new_tests), "new tests found") if len(test_diffs.removed_tests) > 0: diff --git a/tools/templates/codediff.html b/tools/templates/codediff.html index e4f27baf352..2d1001de67c 100644 --- a/tools/templates/codediff.html +++ b/tools/templates/codediff.html @@ -88,31 +88,42 @@

Code comparison

{% if new_tests|length > 0 %}

New Tests

{% for test in new_tests %} - {{loop.index}}: {{test.name}}
- + {{test.name}}
+ {% set test_num = loop.index %} + {% for code in test.highlighted_code %} + Kernel {{loop.index}} +

+ + {% endfor %} {% endfor %} {% endif %} {% if removed_tests|length > 0 %}

Removed Tests

{% for test in removed_tests %} - {{loop.index}}: {{test.name}} + {{test.name}}
+ {% set test_num = loop.index %} + {% for code in test.highlighted_code %} + Kernel {{loop.index}} +

+ + {% endfor %} {% endfor %} {% endif %} - +

Test Diffs

{% for test in test_diffs %} - {{loop.index}}: {{test.name}} - {% if test.kernels|length > 1 %} + {{loop.index}}: {{test.name}}
- {% endif %} {% set outer_index = loop.index %} {% for kernel in test.kernels %}     Kernel {{ kernel.number }} - +