From dc9026847637db8e6443962342a9b2c9712612cb Mon Sep 17 00:00:00 2001
From: Luni-4 <luni-4@hotmail.it>
Date: Wed, 18 Nov 2020 18:14:32 +0100
Subject: [PATCH] Check validity of a tree-sitter-language update

Add a script to check whether a tree-sitter-language update introduces
breaking changes in rust-code-analysis code.

To do so, this script comapres the metrics, computed on a chosen
repository, before and after a tree-sitter-language update.
---
 check-submodule.py | 315 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 315 insertions(+)
 create mode 100755 check-submodule.py

diff --git a/check-submodule.py b/check-submodule.py
new file mode 100755
index 000000000..0ed4663ba
--- /dev/null
+++ b/check-submodule.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+
+"""check-submodule
+This script checks whether breaking changes could be introduced in
+rust-code-analysis code after the update of a tree-sitter-language submodule.
+To do so, it compares the differences between the metrics, computed on a
+chosen repository, before and after a tree-sitter-language update.
+
+
+To compute metrics:
+
+./check-submodule.py compute-metrics -u REPO_URL -p LOCAL_DIR -l TREE_SITTER_LANGUAGE
+
+NOTE: The compute-metrics subcommand MUST be run on a clean master branch!
+
+
+To compare metrics:
+
+The metrics are saved as json files, and to compare them, a specific
+json-diff has been adopted.
+
+1. Install npm on your system
+2. Install json-diff from npm running: npm install -g json-diff
+3. Install deepdiff to retrieve minimal tests: pip install deepdiff
+
+./check-submodule.py compare-metrics -l TREE_SITTER_LANGUAGE
+"""
+
+import argparse
+import json
+import pathlib
+import re
+import subprocess
+import sys
+import typing as T
+
+import deepdiff
+
+# The /tmp directory will be used as workdir
+WORKDIR = pathlib.Path("/tmp")
+# Suffix for the directory containing the old metrics
+OLD_SUFFIX = "-old"
+# Suffix for the directory containing the new metrics
+NEW_SUFFIX = "-new"
+
+# Run a subprocess.
+def run_subprocess(cmd: str, *args: T.Union[str, pathlib.Path]) -> None:
+    subprocess.run([cmd, *args])
+
+
+# Run a subprocess and return its output.
+def get_subprocess_output(
+    cmd: str, *args: T.Union[str, pathlib.Path]
+) -> subprocess.CompletedProcess:
+    return subprocess.run(
+        [cmd, *args],
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+    )
+
+
+# Run rust-code-analysis on the chosen repository to compute metrics.
+def run_rca(repo_dir: pathlib.Path, output_dir: pathlib.Path) -> None:
+    run_subprocess(
+        "cargo",
+        "run",
+        "--package",
+        "rust-code-analysis-cli",
+        "--",
+        "--metrics",
+        "--output-format=json",
+        "--pr",
+        "-p",
+        repo_dir,
+        "-o",
+        output_dir,
+    )
+
+
+# Find the difference between the two json metric files.
+def get_json_diff(
+    first_file: pathlib.Path, second_file: pathlib.Path
+) -> T.Tuple[T.Dict[str, T.Any], T.Dict[str, T.Any]]:
+    with open(first_file, "r") as input_file:
+        t1 = json.load(input_file)
+
+    with open(second_file, "r") as input_file:
+        t2 = json.load(input_file)
+
+    diff = deepdiff.DeepDiff(t1, t2, ignore_order=True)
+
+    return (t1, diff)
+
+
+# Save the filename and the list of code spans associated to the differences
+# in a dictionary.
+def get_metrics_diff_span(
+    first_json: T.Dict[str, T.Any], diff: T.Dict[str, T.Any]
+) -> T.Dict[str, T.List[T.Tuple[int, int]]]:
+    # Search for this pattern in the differences object
+    prog = re.compile(r"\['spaces'\]\[\d+\]")
+
+    output = {"name": first_json["name"], "spaces_spans": []}
+
+    for value in diff["values_changed"]:
+        val = "".join(prog.findall(value))
+        # Subtracting one because files starts from 0
+        start_line = eval(f'first_json{val}["start_line"]') - 1
+        end_line = eval(f'first_json{val}["end_line"]')
+        output["spaces_spans"].append((start_line, end_line))
+
+    # Print the path of the repository file containing the differences
+    print(first_json["name"])
+
+    return output
+
+
+# Dump minimal tests code in an output file.
+def dump_minimal_tests(
+    code_spans_object: T.Dict[str, T.List[T.Tuple[int, int]]],
+    new_filename: pathlib.Path,
+    compare_dir: pathlib.Path,
+) -> None:
+    # Remove duplicates from the list of spans
+    spans_list = dict.fromkeys(code_spans_object["spaces_spans"])
+
+    # Get filename
+    filename = code_spans_object["name"]
+
+    # Read code spans from the input source code
+    with open(filename, "r") as input_file:
+        lines = input_file.readlines()
+
+    # Write spans to output file
+    output_path = compare_dir / new_filename.stem
+    with open(output_path, "w") as output_file:
+        for span in spans_list:
+            output_file.write("Minimal test:\n")
+            output_file.write("".join(lines[span[0] : span[1]]) + "\n")
+
+
+# Dump json file of differences.
+def dump_json_file(
+    stdout: str, new_filename: pathlib.Path, compare_dir: pathlib.Path
+) -> None:
+    # Dump json file of differences
+    output_path = compare_dir / new_filename.name
+    with open(output_path, "w") as output_file:
+        output_file.write(stdout)
+
+
+# Compute minimal tests.
+def compute_minimal_tests(
+    old_filename: pathlib.Path, new_filename: pathlib.Path, compare_dir: pathlib.Path
+) -> None:
+    # Find the difference between the two json files with the aim of
+    # getting some minimal tests
+    first_json, diff = get_json_diff(old_filename, new_filename)
+
+    # Retrieve the code spans associated to the differences
+    code_spans_object = get_metrics_diff_span(first_json, diff)
+
+    # Dump the minimal tests retrived from code spans on a file with the
+    # same extension of the analyzed source code
+    dump_minimal_tests(code_spans_object, new_filename, compare_dir)
+
+
+# Save json files of differences and minimal tests in the chosen directory.
+def save_diff_files(
+    old_dir: pathlib.Path, new_dir: pathlib.Path, compare_dir: pathlib.Path
+) -> None:
+    # Get all metric files in old and new directories
+    old_paths = pathlib.Path(old_dir).glob("**/*.json")
+    new_paths = pathlib.Path(new_dir).glob("**/*.json")
+
+    # Save the differences between json files in the chosen dir
+    for old_filename, new_filename in zip(old_paths, new_paths):
+        ret_value = get_subprocess_output("json-diff", "-j", old_filename, new_filename)
+
+        # If two json files are identical, skip to the next pair
+        if ret_value.returncode == 0:
+            continue
+
+        # Dump json file of differences
+        dump_json_file(ret_value.stdout, new_filename, compare_dir)
+
+        # Compute minimal tests
+        compute_minimal_tests(old_filename, new_filename, compare_dir)
+
+
+# Compute metrics before and after a tree-sitter-language update.
+def compute_metrics(args: argparse.Namespace) -> None:
+    # Repository local directory
+    repo_dir = WORKDIR / args.path
+    # Old metrics directory
+    old_dir = WORKDIR / (args.language + OLD_SUFFIX)
+    # New metrics directory
+    new_dir = WORKDIR / (args.language + NEW_SUFFIX)
+
+    # Create output directories
+    old_dir.mkdir(parents=True, exist_ok=True)
+    new_dir.mkdir(parents=True, exist_ok=True)
+
+    # Git clone the chosen repository
+    # Note: no submodules repositories are accepted
+    print(f"Cloning {args.url} into {repo_dir}")
+    run_subprocess("git", "clone", "--depth=1", args.url, repo_dir)
+
+    # Compute old metrics
+    print("\nComputing metrics before the update and saving them in", old_dir)
+    run_rca(repo_dir, old_dir)
+
+    # Create a new branch
+    print("\nCreate a new branch called", args.language)
+    run_subprocess("git", "checkout", "-B", args.language)
+
+    # Update tree-sitter-language submodule
+    print("\nUpdate", args.language)
+    run_subprocess("./update-sumbodules.sh", args.language)
+
+    # Compute new metrics
+    print("\nComputing metrics after the update and saving them in", new_dir)
+    run_rca(repo_dir, new_dir)
+
+
+# Compare metrics and dump the differences whether there are some.
+def compare_metrics(args: argparse.Namespace) -> None:
+    # Old metrics directory
+    old_dir = WORKDIR / (args.language + OLD_SUFFIX)
+    # New metrics directory
+    new_dir = WORKDIR / (args.language + NEW_SUFFIX)
+
+    # Compare metrics directory
+    compare_dir = WORKDIR / (args.language + "-compare")
+
+    # Create compare directory
+    compare_dir.mkdir(parents=True, exist_ok=True)
+
+    # Save files of differences and minimal tests in the chosen directory
+    save_diff_files(old_dir, new_dir, compare_dir)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="check-submodule",
+        description="This tool computes the metrics of a chosen repository "
+        "before and after a tree-sitter-language update.",
+        epilog="The source code of this program can be found on "
+        "GitHub at https://github.com/mozilla/rust-code-analysis",
+    )
+
+    # Subcommands parsers
+    commands = parser.add_subparsers(help="Sub-command help")
+
+    # Compute metrics command
+    compute_metrics_cmd = commands.add_parser(
+        "compute-metrics",
+        help="Computes the metrics of a chosen repository before and after "
+        "a tree-sitter-language update.",
+    )
+
+    # Arguments
+    compute_metrics_cmd.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=True,
+        help="URL of the repository used to compute the metrics",
+    )
+
+    compute_metrics_cmd.add_argument(
+        "-p",
+        "--path",
+        type=str,
+        required=True,
+        help="Path where the repository will be saved locally",
+    )
+
+    compute_metrics_cmd.add_argument(
+        "-l",
+        "--language",
+        type=str,
+        required=True,
+        help="tree-sitter-language to be updated",
+    )
+    compute_metrics_cmd.set_defaults(func=compute_metrics)
+
+    # Compare metrics command
+    compare_metrics_cmd = commands.add_parser(
+        "compare-metrics",
+        help="Compares the metrics before and after "
+        "a tree-sitter-language update in order to discover whether "
+        "there are differences.",
+    )
+
+    # Arguments
+    compare_metrics_cmd.add_argument(
+        "-l",
+        "--language",
+        type=str,
+        required=True,
+        help="tree-sitter-language used to compare the metrics",
+    )
+    compare_metrics_cmd.set_defaults(func=compare_metrics)
+
+    # Parse arguments
+    args = parser.parse_args()
+
+    # Call the command
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()