-
Notifications
You must be signed in to change notification settings - Fork 60
Final submodule script version #404
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,26 +13,27 @@ | |
|
|
||
| NOTE: The compute-metrics subcommand MUST be run on a clean master branch! | ||
|
|
||
| To compute metrics on a continuous integration system: | ||
|
|
||
| To compare metrics and retrieve minimal tests: | ||
| ./check-submodule.py compute-ci-metrics -p LOCAL_DIR -l TREE_SITTER_LANGUAGE | ||
|
|
||
| 1. Install deepdiff: pip install deepdiff | ||
| To compare metrics and retrieve the structural JSON of differences | ||
| in addition to the files containing the minimal tests: | ||
|
|
||
| 1. Install json-diff from here: https://github.com/Luni-4/json-diff/releases | ||
| 2. Install json-minimal-tests from here: https://github.com/Luni-4/json-minimal-tests/releases | ||
|
|
||
| ./check-submodule.py compare-metrics -l TREE_SITTER_LANGUAGE | ||
|
|
||
| NOTE: Add the paths of the software above to the PATH environment variable! | ||
| """ | ||
|
|
||
| import argparse | ||
| import asyncio | ||
| import json | ||
| import math | ||
| import pathlib | ||
| import re | ||
| import subprocess | ||
| import sys | ||
| import typing as T | ||
|
|
||
| import deepdiff | ||
|
|
||
| # The /tmp directory will be used as workdir | ||
| WORKDIR = pathlib.Path("/tmp") | ||
| # Suffix for the directory containing the old metrics | ||
|
|
@@ -65,54 +66,23 @@ | |
| "tree-sitter-python": ["*.py"], | ||
| } | ||
|
|
||
|
|
||
| class JsonDiff: | ||
| def __init__( | ||
| self, | ||
| old_metrics: T.List[pathlib.Path], | ||
| new_metrics: T.List[pathlib.Path], | ||
| compare_dir: pathlib.Path, | ||
| max_workers: int, | ||
| ): | ||
| self.compare_dir = compare_dir | ||
| self.max_workers = max_workers | ||
|
|
||
| # Max number of file paths in a sublist | ||
| n = math.ceil(len(old_metrics) / max_workers) | ||
|
|
||
| # Assign a certain number of filepaths to each worker | ||
| self.workers_filepaths = [ | ||
| zip(old_metrics[i * n : (i + 1) * n], new_metrics[i * n : (i + 1) * n]) | ||
| for i in range((len(old_metrics) + n - 1) // n) | ||
| ] | ||
|
|
||
| # Run asynchronous comparisons between json files. | ||
| async def diff(self): | ||
| # Save minimal tests in the chosen directory. | ||
| def _worker(worker_list: T.List[pathlib.Path]): | ||
| for old_filename, new_filename in worker_list: | ||
|
|
||
| # Compute minimal tests | ||
| compute_minimal_tests(old_filename, new_filename, self.compare_dir) | ||
|
|
||
| # Define the max number of coroutines used to compare json files | ||
| await asyncio.gather( | ||
| *(_worker(worker_filepaths) for worker_filepaths in self.workers_filepaths) | ||
| ) | ||
|
|
||
|
|
||
| # Run a subprocess. | ||
| def run_subprocess(cmd: str, *args: T.Union[str, pathlib.Path]) -> None: | ||
| subprocess.run([cmd, *args]) | ||
|
|
||
|
|
||
| # Run rust-code-analysis on the chosen repository to compute metrics. | ||
| def run_rca( | ||
| repo_dir: pathlib.Path, output_dir: pathlib.Path, include_languages: T.List[str] | ||
| repo_dir: pathlib.Path, | ||
| output_dir: pathlib.Path, | ||
| manifest_path: T.Optional[pathlib.Path], | ||
| include_languages: T.List[str], | ||
| ) -> None: | ||
| run_subprocess( | ||
| "cargo", | ||
| "run", | ||
| "--manifest-path", | ||
| manifest_path / "Cargo.toml" if manifest_path else "Cargo.toml", | ||
| "--release", | ||
| "--package", | ||
| "rust-code-analysis-cli", | ||
|
|
@@ -129,104 +99,52 @@ def run_rca( | |
| ) | ||
|
|
||
|
|
||
| # Find the difference between the two json metric files. | ||
| def get_json_diff( | ||
| first_file: pathlib.Path, second_file: pathlib.Path | ||
| ) -> T.Tuple[T.Dict[str, T.Any], T.Dict[str, T.Any]]: | ||
| with open(first_file, "r") as input_file: | ||
| t1 = json.load(input_file) | ||
|
|
||
| with open(second_file, "r") as input_file: | ||
| t2 = json.load(input_file) | ||
|
|
||
| diff = deepdiff.DeepDiff(t1, t2, ignore_order=True) | ||
|
|
||
| return (t1, diff) | ||
|
|
||
|
|
||
| # Save the filename and the list of code spans associated to the differences | ||
| # in a dictionary. | ||
| def get_metrics_diff_span( | ||
| first_json: T.Dict[str, T.Any], diff: T.Dict[str, T.Any] | ||
| ) -> T.Dict[str, T.List[T.Tuple[int, int]]]: | ||
| # Search for this pattern in the differences object | ||
| prog = re.compile(r"\['spaces'\]\[\d+\]") | ||
|
|
||
| output = {"name": first_json["name"], "spaces_spans": []} | ||
|
|
||
| for value in diff["values_changed"]: | ||
| val = "".join(prog.findall(value)) | ||
| # Subtracting one because files starts from 0 | ||
| start_line = eval(f'first_json{val}["start_line"]') - 1 | ||
| end_line = eval(f'first_json{val}["end_line"]') | ||
| output["spaces_spans"].append((start_line, end_line)) | ||
|
|
||
| # Print the path of the repository file containing the differences | ||
| print(first_json["name"]) | ||
|
|
||
| return output | ||
|
|
||
|
|
||
| # Dump minimal tests code in an output file. | ||
| def dump_minimal_tests( | ||
| code_spans_object: T.Dict[str, T.List[T.Tuple[int, int]]], | ||
| new_filename: pathlib.Path, | ||
| compare_dir: pathlib.Path, | ||
| ) -> None: | ||
| # Remove duplicates from the list of spans | ||
| spans_list = dict.fromkeys(code_spans_object["spaces_spans"]) | ||
|
|
||
| # Get filename | ||
| filename = code_spans_object["name"] | ||
|
|
||
| # Read code spans from the input source code | ||
| with open(filename, "r", encoding="utf-8", errors="ignore") as input_file: | ||
| # Decode only utf-8 source code files | ||
| lines = input_file.readlines() | ||
|
|
||
| # Write spans to output file | ||
| output_path = compare_dir / new_filename.stem | ||
| with open(output_path, "w") as output_file: | ||
| for span in spans_list: | ||
| output_file.write("Minimal test:\n") | ||
| output_file.write("".join(lines[span[0] : span[1]]) + "\n") | ||
| # Compute continuous integration metrics before and after a | ||
| # tree-sitter-language update. | ||
| def compute_ci_metrics(args: argparse.Namespace) -> None: | ||
|
|
||
| if args.language not in EXTENSIONS.keys(): | ||
| print(args.language, "is not a valid tree-sitter-language") | ||
| sys.exit(1) | ||
|
|
||
| # Compute minimal tests. | ||
| def compute_minimal_tests( | ||
| old_filename: pathlib.Path, new_filename: pathlib.Path, compare_dir: pathlib.Path | ||
| ) -> None: | ||
| # Find the difference between the two json files with the aim of | ||
| # getting some minimal tests | ||
| first_json, diff = get_json_diff(old_filename, new_filename) | ||
| # Repository passed as input | ||
| repo_dir = pathlib.Path(args.path) | ||
|
|
||
| # If two json files are identical, return | ||
| if not diff: | ||
| return | ||
| # Create rust-code-analysis repository path | ||
| rca_path = WORKDIR / "rust-code-analysis" | ||
|
|
||
| # Retrieve the code spans associated to the differences | ||
| code_spans_object = get_metrics_diff_span(first_json, diff) | ||
| # Old metrics directory | ||
| old_dir = WORKDIR / (args.language + OLD_SUFFIX) | ||
| # New metrics directory | ||
| new_dir = WORKDIR / (args.language + NEW_SUFFIX) | ||
|
|
||
| # Dump the minimal tests retrived from code spans on a file with the | ||
| # same extension of the analyzed source code | ||
| dump_minimal_tests(code_spans_object, new_filename, compare_dir) | ||
| # Create output directories | ||
| old_dir.mkdir(parents=True, exist_ok=True) | ||
| new_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| # Git clone rust-code-analysis master branch repository | ||
| print(f"Cloning rust-code-analysis master branch into /tmp") | ||
| run_subprocess( | ||
| "git", | ||
| "clone", | ||
| "--depth=1", | ||
| "--recurse-submodules", | ||
| "-j8", | ||
| "https://github.com/mozilla/rust-code-analysis", | ||
| rca_path, | ||
| ) | ||
|
Comment on lines
+126
to
+135
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We already have a local clone, we can just reuse it by checking out to master instead of recloning.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some CI systems fetch a single branch and you need to reset commits and make complicated stuff to retrieve master branch's commits. This solution could potentially be used on any CI system. It takes less than a second, so we could maintain it imho |
||
|
|
||
| # Save json files of differences and minimal tests in the chosen directory | ||
| # concurrently. | ||
| def save_diff_files( | ||
| old_dir: pathlib.Path, new_dir: pathlib.Path, compare_dir: pathlib.Path | ||
| ) -> None: | ||
| # Get all metric files in old and new directories | ||
| old_paths = sorted(pathlib.Path(old_dir).glob("*.json")) | ||
| new_paths = sorted(pathlib.Path(new_dir).glob("*.json")) | ||
| # Compute old metrics | ||
| print("\nComputing metrics before the update and saving them in", old_dir) | ||
| run_rca(repo_dir, old_dir, rca_path, EXTENSIONS[args.language]) | ||
|
|
||
| # Create a new coroutines handler | ||
| json_diff = JsonDiff(old_paths, new_paths, compare_dir, 4) | ||
| # Update tree-sitter-language submodule | ||
| print("\nUpdate", args.language) | ||
| run_subprocess("./update-language-bindings.sh") | ||
|
|
||
| # Find the differences between json files and save the results in a | ||
| # chosen directory asynchronously | ||
| asyncio.run(json_diff.diff()) | ||
| # Compute new metrics | ||
| print("\nComputing metrics after the update and saving them in", new_dir) | ||
| run_rca(repo_dir, new_dir, None, EXTENSIONS[args.language]) | ||
|
|
||
|
|
||
| # Compute metrics before and after a tree-sitter-language update. | ||
|
|
@@ -257,19 +175,19 @@ def compute_metrics(args: argparse.Namespace) -> None: | |
|
|
||
| # Compute old metrics | ||
| print("\nComputing metrics before the update and saving them in", old_dir) | ||
| run_rca(repo_dir, old_dir, EXTENSIONS[args.language]) | ||
| run_rca(repo_dir, old_dir, None, EXTENSIONS[args.language]) | ||
|
|
||
| # Create a new branch | ||
| print("\nCreate a new branch called", args.language) | ||
| run_subprocess("git", "checkout", "-B", args.language) | ||
|
|
||
| # Update tree-sitter-language submodule | ||
| print("\nUpdate", args.language) | ||
| run_subprocess("./update-sumbodules.sh", args.language) | ||
| run_subprocess("./update-submodule.sh", args.language) | ||
|
|
||
| # Compute new metrics | ||
| print("\nComputing metrics after the update and saving them in", new_dir) | ||
| run_rca(repo_dir, new_dir, EXTENSIONS[args.language]) | ||
| run_rca(repo_dir, new_dir, None, EXTENSIONS[args.language]) | ||
|
|
||
|
|
||
| # Compare metrics and dump the differences whether there are some. | ||
|
|
@@ -285,8 +203,13 @@ def compare_metrics(args: argparse.Namespace) -> None: | |
| # Create compare directory | ||
| compare_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| # Save files of differences and minimal tests in the chosen directory | ||
| save_diff_files(old_dir, new_dir, compare_dir) | ||
| # Get JSON of differences | ||
| print("\nSave JSON of differences in", compare_dir) | ||
| run_subprocess("json-diff-cli", "--raw-json", "-o", compare_dir, old_dir, new_dir) | ||
|
|
||
| # Get minimal tests | ||
| print("\nSave minimal tests in", compare_dir) | ||
| run_subprocess("json-minimal-tests", "-o", compare_dir, old_dir, new_dir) | ||
|
|
||
|
|
||
| def main() -> None: | ||
|
|
@@ -342,6 +265,31 @@ def main() -> None: | |
| ) | ||
| compute_metrics_cmd.set_defaults(func=compute_metrics) | ||
|
|
||
| # Compute continuous integration metrics command | ||
| compute_ci_metrics_cmd = commands.add_parser( | ||
| "compute-ci-metrics", | ||
| help="Computes the metrics of a chosen repository before and after " | ||
| "a tree-sitter-language update on a continuous integration system.", | ||
| ) | ||
|
|
||
| compute_ci_metrics_cmd.add_argument( | ||
| "-p", | ||
| "--path", | ||
| type=str, | ||
| required=True, | ||
| help="Path where the rust-code-analysis repository is saved on the " | ||
| "continuous integration system", | ||
| ) | ||
| compute_ci_metrics_cmd.add_argument( | ||
| "-l", | ||
| "--language", | ||
| type=str, | ||
| required=True, | ||
| help="tree-sitter-language to be updated", | ||
| ) | ||
|
|
||
| compute_ci_metrics_cmd.set_defaults(func=compute_ci_metrics) | ||
|
|
||
| # Compare metrics command | ||
| compare_metrics_cmd = commands.add_parser( | ||
| "compare-metrics", | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| #!/bin/bash | ||
|
|
||
| # Recreate the language | ||
| pushd enums | ||
| cargo clean | ||
| cargo run -- -lrust -o ../src/languages | ||
| popd | ||
|
|
||
| # Format the code | ||
| cargo fmt |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| #!/bin/bash | ||
|
|
||
| # Update tree-sitter submodule | ||
| # | ||
| # Usage: ./update-submodule.sh $tree-sitter-language | ||
|
|
||
| # Update submodule | ||
| git submodule update --remote $1 | ||
|
|
||
| # Generate the updated grammar for the submodule | ||
| ./update-language-bindings.sh |
This file was deleted.
Uh oh!
There was an error while loading. Please reload this page.