diff --git a/tools/compare_codegen.sh b/tools/compare_codegen.sh index 31670027025..b139e1869a6 100755 --- a/tools/compare_codegen.sh +++ b/tools/compare_codegen.sh @@ -130,6 +130,12 @@ run_test() { mkdir -p "$testdir" echo "$testcmd" > "$testdir/command" + # exclude $testdir when printing env + printenv | grep -v '^testdir=' > "$testdir/env" + + nvcc --version > "$testdir/nvcc_version" + nvidia-smi --query-gpu=gpu_name --format=csv,noheader > "$testdir/gpu_names" + # Allow next command to fail set +e $testcmd | tee "$testdir/stdout-$(date +%Y%m%d_%H%M%S).log" @@ -186,7 +192,7 @@ collect_kernels() { export NVFUSER_TEST_RANDOM_SEED=0 export NVFUSER_DISABLE=parallel_compile # run tests and benchmarks with cuda_to_file and dump output to files - export NVFUSER_DUMP=cuda_to_file + export NVFUSER_DUMP=cuda_to_file,ptxas_verbose mkdir -p "$outdir/$commit" diff --git a/tools/diff_codegen_nvfuser_tests.py b/tools/diff_codegen_nvfuser_tests.py index 57a82611868..9cd3fa1fd05 100644 --- a/tools/diff_codegen_nvfuser_tests.py +++ b/tools/diff_codegen_nvfuser_tests.py @@ -1,3 +1,6 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause """ Find corresponding .cu files for matching tests, even when new tests are introduced between two commits. Diffs are displayed and the return value is the @@ -11,151 +14,564 @@ codegen_comparison/{$commit1,$commit2}/binary_tests """ +from dataclasses import asdict, dataclass, field, InitVar +import difflib import os import re import subprocess import sys -# precompile an RE we'll apply over and over - - -def get_test_map(directory: str) -> dict[str, list[str]]: - """ - Get a map from test name to list of cuda filenames - """ - # first find the stdout log file - logfile = None - for fname in os.listdir(directory): - if fname.find("stdout") != -1: - if logfile is not None: - raise RuntimeError( - f"Input directory {directory} contains multiple " - 'possible logs (filenames containing "stdout")' - ) - logfile = os.path.join(directory, fname) - if logfile is None: - raise RuntimeError( - f"Input directory {directory} contains no log (filenames " - 'containing "stdout")' - ) - # regex for stripping ANSI color codes - ansi_re = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]") - kernel_map = {} - current_test = None - current_files = [] - for line in open(logfile, "r").readlines(): - line = ansi_re.sub("", line.strip()) - if line[:13] == "[ RUN ] ": - current_test = line[13:] - elif line[:13] == "[ OK ] ": - # Finalize test - assert current_test is not None - kernel_map[current_test] = current_files - current_test = None - current_files = [] - elif line[:10] == "PRINTING: ": - if line[-3:] == ".cu": - # This avoids comparing the .ptx files that are created then - # removed by the MemoryTest.LoadCache tests - current_files.append(line[10:]) - - return kernel_map - - -def diff_nvfuser_tests_dirs(dir1: str, dir2: str): - """ - Given directories for two - """ - # check that commands are equal - command1 = open(os.path.join(dir1, "command"), "r").read() - command2 = open(os.path.join(dir2, "command"), "r").read() - - if command1 != command2: - print("WARNING: commands differ between runs", file=sys.stderr) - print(f" {dir1}: {command1}", file=sys.stderr) - print(f" {dir2}: {command2}", file=sys.stderr) - - # check that command includes "nvfuser_tests" - if command1.find("nvfuser_tests") == -1: - print( - "ERROR: Command does not appear to be nvfuser_tests. Aborting.", - file=sys.stderr, - ) - sys.exit(1) +@dataclass +class GitRev: + abbrev: str + title: str = field(init=False) + full_hash: str = field(init=False) + author_name: str = field(init=False) + author_email: str = field(init=False) + author_time: str = field(init=False) + commit_time: str = field(init=False) - # check that exit codes are equal - exitcode1 = open(os.path.join(dir1, "exitcode"), "r").read() - exitcode2 = open(os.path.join(dir2, "exitcode"), "r").read() - if exitcode1 != exitcode2: - print( - f"WARNING: Exit codes {exitcode1} and {exitcode2} do not match.", - file=sys.stderr, + def __post_init__(self): + self.full_hash = ( + subprocess.run(["git", "rev-parse", self.abbrev], capture_output=True) + .stdout.strip() + .decode("utf-8") ) + for line in ( + subprocess.run( + ["git", "branch", "--quiet", "--color=never", self.full_hash], + capture_output=True, + ) + .stdout.strip() + .splitlines() + ): + # Possible output: + # + # main + # * scalar_seg_edges + # + # In this case, we have checked out the HEAD of the + # scalar_seg_edges branch. Here we just strip the *. + if line[0] == "*": + line = line[2:] + in_branches.append(line) + + def git_show(fmt) -> str: + return ( + subprocess.run( + [ + "git", + "show", + "--no-patch", + f"--format={fmt}", + self.full_hash, + ], + capture_output=True, + ) + .stdout.strip() + .decode("utf-8") + ) + + self.title = git_show("%s") + self.author_name = git_show("%an") + self.author_email = git_show("%ae") + self.author_time = git_show("%ad") + self.commit_time = git_show("%cd") + + +@dataclass +class CompiledKernel: + filename: str + code: str | None = None + ptxas_info: str | None = None + gmem_bytes: int = 0 + smem_bytes: int = 0 + cmem_bank_bytes: list[int] | None = None + registers: int | None = None + stack_frame_bytes: int = 0 + spill_store_bytes: int = 0 + spill_load_bytes: int = 0 + mangled_name: str | None = None + arch: str | None = None + index_type: str | None = None + + def __post_init__(self): + self.parse_ptxas() + + def parse_ptxas(self): + # Example input: + # + # ptxas info : 307 bytes gmem + # ptxas info : Compiling entry function '_ZN11CudaCodeGen7kernel1ENS_6TensorIfLi2ELi2EEES1_S1_' for 'sm_86' + # ptxas info : Function properties for _ZN11CudaCodeGen7kernel1ENS_6TensorIfLi2ELi2EEES1_S1_ + # ptxas . 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads + # ptxas info : Used 203 registers, 16 bytes smem, 472 bytes cmem[0], 8 bytes cmem[2] + # + # Here we parse this into the fields presented, and we replace the + # mangled kernel name since it includes the kernel number and is + # useless for the purposes of diffing since the kernel signature is + # already included. + if self.ptxas_info is None: + return + + m = re.search(r"Compiling entry function '(.*)' for '(.*)'", self.ptxas_info) + if m is not None: + self.mangled_name, self.arch = m.groups() + + def find_unique_int(pattern) -> int | None: + m = re.search(pattern, self.ptxas_info) + return 0 if m is None else int(m.groups()[0]) + + self.stack_frame_bytes = find_unique_int(r"(\d+) bytes stack frame") + self.spill_store_bytes = find_unique_int(r"(\d+) bytes spill stores") + self.spill_load_bytes = find_unique_int(r"(\d+) bytes spill loads") + self.registers = find_unique_int(r"(\d+) registers") + self.gmem_bytes = find_unique_int(r"(\d+) bytes gmem") + self.smem_bytes = find_unique_int(r"(\d+) bytes smem") + + self.cmem_bank_bytes = [] + cmem_banks = 0 + for m in re.finditer(r"(\d+) bytes cmem\[(\d+)\]", self.ptxas_info): + nbytes_str, bank_str = m.groups() + bank = int(bank_str) + if len(self.cmem_bank_bytes) <= bank: + self.cmem_bank_bytes += [0] * (bank + 1 - len(self.cmem_bank_bytes)) + self.cmem_bank_bytes[bank] = int(nbytes_str) + cmem_banks += 1 - # get a map from test name to list of .cu files for each directory - map1 = get_test_map(dir1) - map2 = get_test_map(dir2) - differing_tests = set() - for testname, kernels1 in map1.items(): - if testname not in map2: +@dataclass +class CompiledTest: + name: str + kernels: list[CompiledKernel] + passed: bool + + +@dataclass +class TestRun: + directory: str + git: GitRev = field(init=False) + run_name: str = field(init=False) + command: str = field(init=False) + exit_code: int = field(init=False) + env: str = field(init=False) + gpu_names: str = field(init=False) + nvcc_version: str = field(init=False) + # map from name of test to list of kernel base filenames + kernel_map: dict[str, CompiledTest] = field(default_factory=dict) + # collecting the preamble lets us skip it when diffing, and lets us compare + # only the preamble between runs + preamble: str = field(init=False) + # The following lets us skip preamble when loading kernels. Note that the + # preamble can change length due to differing index types, so we can't rely + # on f.seek() + preamble_size_lines: int = field(init=False) + + def __post_init__(self): + self.run_name = os.path.basename(self.directory) + + # get description of this git rev + abbrev = os.path.basename(os.path.dirname(os.path.abspath(self.directory))) + self.git = GitRev(abbrev) + + self.command = open(os.path.join(self.directory, "command"), "r").read() + + # check that command includes "nvfuser_tests" + if self.command.find("nvfuser_tests") == -1: print( - f"WARNING: Test {testname} present in {dir1} but not in {dir2}", + "ERROR: Command does not appear to be nvfuser_tests. Aborting.", file=sys.stderr, ) - continue + sys.exit(1) - kernels2 = map2[testname] + try: + self.env = "" + for line in open(os.path.join(self.directory, "env"), "r").readlines(): + # remove $testdir which is set by compare_codegen.sh + # NOTE: compare_codegen.sh should have already removed these lines + if re.search(r"^testdir=", line) is None: + self.env += line + except FileNotFoundError: + self.env = None - if len(kernels1) != len(kernels2): - print( - f"WARNING: Test {testname} has different number of kernels " - f"in {dir1} than in {dir2}. Not showing diffs.", - file=sys.stderr, + try: + self.nvcc_version = open( + os.path.join(self.directory, "nvcc_version"), "r" + ).read() + except FileNotFoundError: + self.nvcc_version = None + + try: + self.gpu_names = list( + open(os.path.join(self.directory, "gpu_names"), "r").readlines() ) - differing_tests.add(testname) - - for k1, k2 in zip(kernels1, kernels2): - f1 = os.path.join(dir1, "cuda", k1) - f2 = os.path.join(dir2, "cuda", k2) - # -U50 gives us plenty of context - # -I "void kernel" ignores mismatches in kernel signature line - # The intention is to avoid false positives from differently - # numbered kernels, but this can also hide true differences if - # the kernel signature changes. - args = ["diff", "-U50", "-I", "void kernel", f1, f2] - ret = subprocess.run(args, capture_output=True) - if ret.returncode != 0: - print(testname, ret.args) - print(ret.stdout.decode("utf-8")) - differing_tests.add(testname) - - for testname, kernels2 in map2.items(): - if testname not in map1: + except FileNotFoundError: + self.gpu_names = None + + self.exit_code = int(open(os.path.join(self.directory, "exitcode"), "r").read()) + + self.compute_kernel_map() + + self.find_preamble() + + def compute_kernel_map(self): + """ + Compute a map from test name to list of cuda filenames + """ + # first find the stdout log file + logfile = None + for fname in os.listdir(self.directory): + if fname.find("stdout") != -1: + if logfile is not None: + raise RuntimeError( + f"Input directory {self.directory} contains multiple " + 'possible logs (filenames containing "stdout")' + ) + logfile = os.path.join(self.directory, fname) + if logfile is None: + raise RuntimeError( + f"Input directory {self.directory} contains no log (filenames " + 'containing "stdout")' + ) + + # regex for stripping ANSI color codes + ansi_re = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]") + current_test = None + current_file = None + ptxas_info = "" + kernels = [] + + def finalize_kernel(): + nonlocal ptxas_info + nonlocal current_file + if current_file is not None: + kernels.append(CompiledKernel(current_file, ptxas_info=ptxas_info)) + ptxas_info = "" + current_file = None + + def finalize_test(passed: bool): + nonlocal current_test + nonlocal kernels + assert current_test is not None + finalize_kernel() + self.kernel_map[current_test] = CompiledTest(current_test, kernels, passed) + current_test = None + kernels = [] + + for line in open(logfile, "r").readlines(): + line = ansi_re.sub("", line.strip()) + if line[:13] == "[ RUN ] ": + current_test = line[13:] + elif line[:13] == "[ OK ] ": + finalize_test(True) + elif line[:13] == "[ FAILED ] ": + finalize_test(False) + elif line[:10] == "PRINTING: ": + if line[-3:] == ".cu": + finalize_kernel() + # This avoids comparing the .ptx files that are created then + # removed by the MemoryTest.LoadCache tests + current_file = line[10:] + elif line[:6] == "ptxas ": + # NVFUSER_DUMP=ptxas_verbose corresponds to nvcc --ptxas-options=-v or --resources-usage + # This always prints after printing the cuda filename + if current_file is None: + print("WARNING: Cannot associate ptxas info with CUDA kernel") + continue + ptxas_info += line + "\n" + + def find_preamble(self): + """Look for common preamble in collected kernels""" + preamble_lines = [] + first = True + files_processed = 0 # limit how many files to check + for cufile in os.listdir(os.path.join(self.directory, "cuda")): + cufile_full = os.path.join(self.directory, "cuda", cufile) + with open(cufile_full, "r") as f: + for i, line in enumerate(f.readlines()): + line = line.rstrip() + # we set nvfuser_index_t in the preamble. We ignore that change for the purposes of this diff + if line[:8] == "typedef " and line[-17:] == " nvfuser_index_t;": + line = "typedef int nvfuser_index_t; // NOTE: index type hard-coded as int for display only" + if first: + preamble_lines.append(line) + elif i >= len(preamble_lines) or preamble_lines[i] != line: + break + preamble_lines = preamble_lines[:i] + if len(preamble_lines) == 0: + # early return if preamble is determined to be empty + break + first = False + files_processed += 1 + if files_processed >= 50: + break + self.preamble_size_lines = len(preamble_lines) + self.preamble = "\n".join(preamble_lines) + + def get_kernel( + self, test_name, kernel_number, strip_preamble=True + ) -> CompiledKernel: + """Get a string of the kernel, optionally stripping the preamble""" + kern = self.kernel_map[test_name].kernels[kernel_number] + basename = kern.filename + fullname = os.path.join(self.directory, "cuda", basename) + kern.code = "" + with open(fullname, "r") as f: + for i, line in enumerate(f.readlines()): + if kern.index_type is None: + m = re.search(r"typedef\s+(\S*)\s+nvfuser_index_t;", line) + if m is not None: + kern.index_type = m.groups()[0] + if not strip_preamble or i >= self.preamble_size_lines: + # replace kernel934 with kernel1 to facilitate diffing + kern.code += re.sub(r"\bkernel\d+\b", "kernelN", line) + kern.code = kern.code.rstrip() + if strip_preamble and kern.code[-1] == "}": + # trailing curly brace is close of namespace. This will clean it up so that we have just the kernel + kern.code = kern.code[:-1].rstrip() + return kern + + +@dataclass +class KernelDiff: + testname: str + kernel_num: int + kernel1: CompiledKernel + kernel2: CompiledKernel + diff_lines: InitVar[list[str]] + diff: str = field(init=False) + new_lines: int = 0 + removed_lines: int = 0 + + def __post_init__(self, diff_lines: list[str]): + self.diff = "\n".join(diff_lines) + + for line in diff_lines: + if line[:2] == "+ ": + self.new_lines += 1 + elif line[:2] == "- ": + self.removed_lines += 1 + + +@dataclass +class TestDiff: + testname: str + test1_passed: bool + test2_passed: bool + kernel_diffs: list[KernelDiff] | None = None + kernel_number_mismatch: tuple[int, int] | None = None + + +@dataclass +class TestDifferences: + run1: TestRun + run2: TestRun + # either a list of diffs, or different numbers of kernels present + test_diffs: list[TestDiff] = field(default_factory=list) + new_tests: list[CompiledTest] = field(default_factory=list) + removed_tests: list[CompiledTest] = field(default_factory=list) + total_num_diffs: int = 0 + show_diffs: InitVar[bool] = False + preamble_diff: str = field(init=False) + + def __post_init__(self, show_diffs: bool): + if self.run1.command != self.run2.command: + print("WARNING: commands differ between runs", file=sys.stderr) + print(f" {self.run1.directory}: {self.run1.command}", file=sys.stderr) + print(f" {self.run2.directory}: {self.run2.command}", file=sys.stderr) + + if self.run1.exit_code != self.run1.exit_code: print( - f"WARNING: Test {testname} present in {dir2} but not in {dir1}", + f"WARNING: Exit codes {self.run1.exit_code} and {self.run2.exit_code} do not match.", file=sys.stderr, ) - return differing_tests + self.preamble_diff = "\n".join( + difflib.unified_diff( + self.run1.preamble.splitlines(), + self.run2.preamble.splitlines(), + fromfile=self.run1.git.abbrev, + tofile=self.run2.git.abbrev, + n=5, + ) + ) + if len(self.preamble_diff) > 0: + print("Preambles differ between runs indicating changes to runtime files") + + for testname, compiled_test1 in self.run1.kernel_map.items(): + if testname not in self.run2.kernel_map: + compiled_test1.kernels = [ + self.run1.get_kernel(testname, i) + for i in range(len(compiled_test1.kernels)) + ] + self.removed_tests.append(compiled_test1) + continue + + compiled_test2 = self.run2.kernel_map[testname] + + if len(compiled_test1.kernels) != len(compiled_test2.kernels): + print( + f"WARNING: Test {testname} has different number of kernels " + f"in {dir1} than in {dir2}. Not showing diffs for this test.", + file=sys.stderr, + ) + self.test_diffs.append( + TestDiff( + testname, + compiled_test1.passed, + compiled_test2.passed, + None, + (len(compiled_test1.kernels), len(compiled_test2.kernels)), + ) + ) + + kernel_diffs = [] + for kernel_num in range(len(compiled_test1.kernels)): + kern1 = self.run1.get_kernel(testname, kernel_num, strip_preamble=True) + kern2 = self.run2.get_kernel(testname, kernel_num, strip_preamble=True) + + diff_lines = list( + difflib.unified_diff( + kern1.code.splitlines(), + kern2.code.splitlines(), + fromfile=self.run1.git.abbrev, + tofile=self.run2.git.abbrev, + n=5, + ) + ) + if len(diff_lines) > 0: + kd = KernelDiff(testname, kernel_num, kern1, kern2, diff_lines) + if show_diffs: + print(testname, kernel_num, kd.diff) + self.total_num_diffs += 1 + kernel_diffs.append(kd) + + if len(kernel_diffs) > 0: + self.test_diffs.append( + TestDiff( + testname, + compiled_test1.passed, + compiled_test2.passed, + kernel_diffs, + ) + ) + + for testname, compiled_test2 in self.run2.kernel_map.items(): + if testname not in self.run1.kernel_map: + compiled_test2.kernels = [ + self.run2.get_kernel(testname, i) + for i in range(len(compiled_test2.kernels)) + ] + self.new_tests.append(compiled_test2) + + def hide_env(self): + """Remove private information like env vars and lib versions""" + self.run1.env = None + self.run2.env = None + self.run1.nvcc_version = None + self.run2.nvcc_version = None + + def generate_html(self, omit_preamble: bool, max_diffs: bool) -> str: + """Return a self-contained HTML string summarizing the codegen comparison""" + import jinja2 + + tools_dir = os.path.dirname(__file__) + env = jinja2.Environment(loader=jinja2.FileSystemLoader(searchpath=tools_dir)) + template = env.get_template("templates/codediff.html") + context = asdict(self) + context["omit_preamble"] = omit_preamble + context["max_diffs"] = max_diffs + context["tool_git"] = GitRev("HEAD") + + return template.render(context) if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + epilog="This command must be run from within a git checkout of the NVFuser repo.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) parser.add_argument("dir1", help="Directory containing stdout-*.log and cuda/") parser.add_argument("dir2", help="Directory containing stdout-*.log and cuda/") + parser.add_argument( + "--hide-env", + action="store_true", + help="Hide environment variables and nvcc versions in output?", + ) + parser.add_argument("--html", action="store_true", help="Write HTML file?") + parser.add_argument( + "--hide-diffs", action="store_true", help="Print diffs to STDOUT?" + ) + parser.add_argument( + "--html-max-diffs", + default=200, + type=int, + help="Limit number of included kernel diffs in HTML output to this many (does not affect exit code).", + ) + parser.add_argument( + "--html-omit-preamble", + action="store_true", + help="Omit the preamble in HTML output?", + ) + parser.add_argument( + "-o", "--output-file", help="Location of HTML file output if -h is given." + ) + parser.add_argument( + "--json", + help="Location to write JSON output, if given", + ) args = parser.parse_args() - differing_tests = diff_nvfuser_tests_dirs(args.dir1, args.dir2) + td = TestDifferences( + TestRun(args.dir1), TestRun(args.dir2), show_diffs=not args.hide_diffs + ) - if len(differing_tests) == 0: + if args.hide_env: + td.hide_env() + + if args.html: + output_file = args.output_file + if output_file is None: + # determine default output file + def get_abbrev(d): + return os.path.basename(os.path.dirname(os.path.abspath(d))) + + abbrev1 = get_abbrev(args.dir1) + abbrev2 = get_abbrev(args.dir2) + run_name = os.path.basename(os.path.abspath(args.dir1)) + output_file = f"codediff_{abbrev1}_{abbrev2}_{run_name}.html" + with open(output_file, "w") as f: + f.write( + td.generate_html( + omit_preamble=args.html_omit_preamble, max_diffs=args.html_max_diffs + ) + ) + + if args.json is not None: + import json + + d = asdict(td) + # clean up the dict a bit by removing temporary data structures + del d["run1"]["kernel_map"] + del d["run2"]["kernel_map"] + json.dump(d, open(args.json, "w"), indent=2) + + if len(td.test_diffs) == 0: print("No differences found in overlapping tests!") else: - print("Differences found in the following tests:") - for t in differing_tests: - print(f" {t}") + print( + td.total_num_diffs, + "kernel differences from", + len(td.test_diffs), + "tests found", + ) + if len(td.new_tests) > 0: + print(len(td.new_tests), "new tests found") + if len(td.removed_tests) > 0: + print(len(td.removed_tests), "removed tests found") - exit(len(differing_tests)) + # Return 1 if preamble or any kernels are changed, else 0 + exit(1 if len(td.test_diffs) > 0 or len(td.preamble_diff) > 0 else 0) diff --git a/tools/templates/codediff.html b/tools/templates/codediff.html new file mode 100644 index 00000000000..1cbe78da50a --- /dev/null +++ b/tools/templates/codediff.html @@ -0,0 +1,433 @@ +{#- +SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +All rights reserved. +SPDX-License-Identifier: BSD-3-Clause +-#} + + + {{ run1.git.abbrev }} vs {{ run2.git.abbrev }} - NVFuser codegen diff + + + + + + + + + + +

{{ run1.git.abbrev }} vs {{ run2.git.abbrev }} - NVFuser codegen diff

+

Git Information

+

+ Old commit: {{ run1.git.abbrev }} +

+ {{ run1.git.title|e }} +
+ {{ run1.git.author_name|e }} <{{ run1.git.author_email|e }}> +
+ {{ run1.git.author_time }} +
+ View commit +
+ Browse code at this commit +
+

+ New commit: {{ run2.git.abbrev }} +

+ {{ run2.git.title|e }} +
+ {{ run2.git.author_name|e }} <{{ run2.git.author_email|e }}> +
+ {{ run2.git.author_time }} +
+ View commit +
+ Browse code at this commit +
+

Code comparison

+ Command: {{ run1.command|e }} + {% if run1.gpu_names is not none %} +
+ {%- if run1.gpu_names | length > 1 %} + {% if run1.gpu_names != run2.gpu_names %}{{ run1.git.abbrev }}{% endif %} + GPUs: +
{{run1.gpu_names | e}}
+ %} + {%- else -%} + {% if run1.gpu_names != run2.gpu_names %}{{ run1.git.abbrev }}{% endif %} + GPU: {{ run1.gpu_names[0] | e }} +
+ {%- endif %} + {% endif %} + {% if run1.nvcc_version is not none %} +
> nvcc --version {%- if run1.nvcc_version != run2.nvcc_version %}# {{run1.git.abbrev}}{% endif %}
+{{run1.nvcc_version | e}}
+ {% endif %} + {%- if run1.env is not none %} + {%- if run1.env != run2.env %} + + {% else %} + + matches between runs + {% endif -%} + + {% endif %} + {% if run2.gpu_names is not none %} + {% if run2.gpu_names != run1.gpu_names %} + {%- if run2.gpu_names | length > 1 %} +
+ {{ run2.git.abbrev }} GPUs: +
+
{{run2.gpu_names | e}}
+ {%- else -%} + {{ run2.git.abbrev }} GPU: {{ run2.gpu_names[0] | e }} + {%- endif %} + {% endif %} + {% endif %} + {% if run2.nvcc_version is not none %} + {% if run2.nvcc_version != run1.nvcc_version %} +
> nvcc --version  # {{run2.git.abbrev}}
+{{run2.nvcc_version | e}}
+ {% endif %} + {% endif %} + {% if run2.env is not none %} + {% if run2.env != run1.env %} +
+ + + {% endif %} + {% endif %} +
+ {% if not omit_preamble %} + {% if run1.preamble != run2.preamble %} + NVFuser preamble differs between runs + + + + + + +
+ + + +{% else %} +
+ + matches between runs +
+ +{% endif %} +{% endif %} +{% if new_tests|length > 0 %} +
+

+ New Tests + +

+ {% for test in new_tests %} + {{ test.name }} + {% if not test.passed %}FAILED{% endif %} +
+ {% set test_num = loop.index %} + {% for kernel in test.kernels %} +     Kernel {{ loop.index }} + +
+ index type: {{ kernel.index_type }} + arch: {{ kernel.arch }} + registers: {{ kernel.registers }} + {% if kernel.gmem_bytes is not none and kernel.gmem_bytes > 0 %}gmem: {{ kernel.gmem_bytes }}{% endif %} + {% if kernel.smem_bytes is not none and kernel.smem_bytes > 0 %}smem: {{ kernel.smem_bytes }}{% endif %} + {% if kernel.stack_frame_bytes is not none and kernel.stack_frame_bytes > 0 %} + stack frame: {{ kernel.stack_frame_bytes }} + {% endif %} + {% if kernel.spill_store_bytes is not none and kernel.spill_store_bytes > 0 %} + spill stores: {{ kernel.spill_store_bytes }} + {% endif %} + {% if kernel.spill_load_bytes is not none and kernel.spill_load_bytes > 0 %} + spill loads: {{ kernel.spill_load_bytes }} + {% endif %} + {% if kernel.cmem_bank_bytes is not none %} + {% for cmem_bank in range(kernel.cmem_bank_bytes|length) %} + {% set cmemb = kernel.cmem_bank_bytes[cmem_bank] %} + cmem[{{ cmem_bank }}]: {{ cmemb }} + {% endfor %} + {% endif %} +
+ + {% endfor %} +
+ {% endfor %} +{% endif %} +{% if removed_tests|length > 0 %} +
+

+ Removed Tests + +

+ {% for test in removed_tests %} + {{ test.name }} +
+ {% set test_num = loop.index %} + {% for kernel in test.kernels %} +     Kernel {{ loop.index }} + +
+
+ + {% endfor %} + {% endfor %} +{% endif %} +
+

+ Test Diffs + +

+{% set loop_vars = namespace(total_diffs=0) %} +{% for test_diff in test_diffs %} + {% if loop_vars.total_diffs < max_diffs %} + {{ loop.index }}: {{ test_diff.testname }} + {% if not test_diff.test1_passed or not test_diff.test2_passed -%} + {{"SUCCESS" if test_diff.test1_passed else "FAILED"}} → + {{"SUCCESS" if test_diff.test2_passed else "FAILED"}} + {%- endif -%} +
+ {% set outer_index = loop.index %} + {% for kernel_diff in test_diff.kernel_diffs %} + {% if loop_vars.total_diffs == max_diffs + 1 %} +
+ WARNING: Only showing {{ max_diffs }} out of {{ total_num_diffs }} + total modified kernels. To show more kernels pass a higher value in + the --html-max-diffs argument to + tools/diff_codegen_nvfuser_tests.py. +
+ {% elif loop_vars.total_diffs < max_diffs %} +   Kernel {{ kernel_diff.kernel_num }} + + + + + + + -{{ kernel_diff.removed_lines }} + +{{ kernel_diff.new_lines }} + index type: + {%- if kernel_diff.kernel1.index_type == kernel_diff.kernel2.index_type %} + {{ kernel_diff.kernel1.index_type }} + {% else -%} + {{ kernel_diff.kernel1.index_type }}{{ kernel_diff.kernel2.index_type }} + {%- endif %} + {%- if kernel_diff.kernel1.registers != kernel_diff.kernel2.registers -%} + registers: {{ kernel_diff.kernel1.registers }}{{ kernel_diff.kernel2.registers }} + {%- else -%} + registers: {{ kernel_diff.kernel1.registers }} + {%- endif %} + {% if kernel_diff.kernel1.gmem_bytes != kernel_diff.kernel2.gmem_bytes -%} + gmem: {{ kernel_diff.kernel1.gmem_bytes }}{{ kernel_diff.kernel2.gmem_bytes }} + {%- endif %} + {% if kernel_diff.kernel1.smem_bytes != kernel_diff.kernel2.smem_bytes -%} + smem: {{ kernel_diff.kernel1.smem_bytes }}{{ kernel_diff.kernel2.smem_bytes }} + {%- elif kernel_diff.kernel1.smem_bytes > 0 -%} + smem: {{ kernel_diff.kernel1.smem_bytes }} + {%- endif %} + {% if kernel_diff.kernel1.stack_frame_bytes != kernel_diff.kernel2.stack_frame_bytes -%} + stack frame: {{ kernel_diff.kernel1.stack_frame_bytes }}{{ kernel_diff.kernel2.stack_frame_bytes }} + {%- elif kernel_diff.kernel1.stack_frame_bytes >0 -%} + stack frame: {{ kernel_diff.kernel1.stack_frame_bytes }} + {%- endif %} + {% if kernel_diff.kernel1.spill_store_bytes != kernel_diff.kernel2.spill_store_bytes -%} + spill stores: {{ kernel_diff.kernel1.spill_store_bytes }}{{ kernel_diff.kernel2.spill_store_bytes }} + {%- elif kernel_diff.kernel1.spill_store_bytes >0 -%} + spill stores: {{ kernel_diff.kernel1.spill_store_bytes }} + {%- endif %} + {% if kernel_diff.kernel1.spill_load_bytes != kernel_diff.kernel2.spill_load_bytes -%} + spill loads: {{ kernel_diff.kernel1.spill_load_bytes }}{{ kernel_diff.kernel2.spill_load_bytes }} + {%- elif kernel_diff.kernel1.spill_load_bytes > 0 -%} + spill loads: {{ kernel_diff.kernel1.spill_load_bytes }} + {%- endif %} + {% if kernel_diff.kernel1.cmem_bank_bytes is not none and kernel_diff.kernel2.cmem_bank_bytes is not none %} + {% for cmem_bank in range([kernel_diff.kernel1.cmem_bank_bytes|length, kernel_diff.kernel2.cmem_bank_bytes|length] | max) %} + {% set cmem1 = kernel_diff.kernel1.cmem_bank_bytes[cmem_bank] if cmem_bank < kernel_diff.kernel1.cmem_bank_bytes|length else 0 %} + {% set cmem2 = kernel_diff.kernel2.cmem_bank_bytes[cmem_bank] if cmem_bank < kernel_diff.kernel2.cmem_bank_bytes|length else 0 %} + {% if cmem1 != cmem2 %} + cmem[{{ cmem_bank }}]: {{ cmem1 }}{{ cmem2 }} + {% elif cmem1 > 0 %} + cmem[{{ cmem_bank }}]: {{ cmem1 }} + {% endif %} + {% endfor %} + {% endif %} +
+ + + +{% endif %} +{% set loop_vars.total_diffs = loop_vars.total_diffs + 1 %} +{% endfor %} +
+{% endif %} +{% endfor %} +
+ + +