diff --git a/check-grammars-crates.sh b/check-grammars-crates.sh index 53d46d85b..a64b7346c 100755 --- a/check-grammars-crates.sh +++ b/check-grammars-crates.sh @@ -92,29 +92,12 @@ if [ "$(ls -A $COMPARE)" ]; then # Maximum number of considered minimal tests for a metric MT_THRESHOLD=30 - # Array containing the considered metrics - # TODO: Implement a command into rust-code-analysis-cli that returns all - # computed metrics https://github.com/mozilla/rust-code-analysis/issues/478 - METRICS=("cognitive" "sloc" "ploc" "lloc" "cloc" "blank" "cyclomatic" "halstead" "nom" "nexits" "nargs") - - # Output directory name + # Output directory path OUTPUT_DIR=/tmp/output-$TREE_SITTER_CRATE - # Create output directory - mkdir -p $OUTPUT_DIR - - # Retrieve minimal tests for a metric - for METRIC in "${METRICS[@]}" - do - - PREFIX_METRIC="\.$METRIC" - FILES=`grep -r -i -l $PREFIX_METRIC $COMPARE | head -$MT_THRESHOLD` - if [ -n "$FILES" ] - then - mkdir -p $OUTPUT_DIR/$METRIC - cp $FILES $OUTPUT_DIR/$METRIC - fi - done + # Split files into distinct directories depending on + # their metric differences + ./split-minimal-tests.py -i $COMPARE -o $OUTPUT_DIR -t $MT_THRESHOLD tar -czvf /tmp/json-diffs-and-minimal-tests.tar.gz $COMPARE $OUTPUT_DIR fi diff --git a/split-minimal-tests.py b/split-minimal-tests.py new file mode 100755 index 000000000..1bfcc82a5 --- /dev/null +++ b/split-minimal-tests.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +"""split-minimal-tests +This script splits HTML minimal-tests, produced by a software called +`json-minimal-tests`, into distinct directories depending on metric differences. + +Usage: + +./split-minimal-tests.py -i INPUT_DIR -o OUTPUT_DIR [-t MT_THRESHOLD] + +NOTE: OUTPUT_DIR is the path to the output directory to be created. +This directory could contain either a series of directories, called as +the metrics that presents differences, or be empty if no metric differences +are found. +MT_THRESHOLD determines the maximum number of considered minimal tests +for a metric. +""" + +import argparse +import pathlib +import re +import shutil +import typing as T + +# List of metrics +# TODO: Implement a command into rust-code-analysis-cli that returns all +# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478 +METRICS = [ + "cognitive", + "sloc", + "ploc", + "lloc", + "cloc", + "blank", + "cyclomatic", + "halstead", + "nom", + "nexits", + "nargs", +] + + +def main() -> None: + parser = argparse.ArgumentParser( + prog="split-minimal-tests", + description="This tool splits HTML minimal-tests, produced by " + "a software called `json-minimal-tests`, into distinct directories " + "depending on metric differences.", + epilog="The source code of this program can be found on " + "GitHub at https://github.com/mozilla/rust-code-analysis", + ) + + # Arguments + parser.add_argument( + "--input", + "-i", + type=lambda value: pathlib.Path(value), + required=True, + help="Input directory containing HTML minimal tests.", + ) + + parser.add_argument( + "--output", + "-o", + type=lambda value: pathlib.Path(value), + required=True, + help="Path to the output directory.", + ) + + # Optional arguments + parser.add_argument( + "--threshold", + "-t", + type=int, + help="Maximum number of considered minimal tests for a metric.", + ) + + # Parse arguments + args = parser.parse_args() + + # Create output directory + args.output.mkdir(parents=True, exist_ok=True) + + # Save files associated to each metric + metrics_saver: T.Dict[str, T.List] = {metric_name: [] for metric_name in METRICS} + + # Iterate over the files contained in the input directory + for path in args.input.glob("*.html"): + # Open a file + with open(path) as f: + # Read a file + file_str = f.read() + + # Remove all code inside
tags + file_no_pre = re.sub(r"(.|\n)*?<\/pre>", "", file_str)
+
+ # Iterate over metrics
+ for metric_name, metric_files in metrics_saver.items():
+ # Check if there is a metric difference in a file
+ m = re.search(f"(\.{metric_name})", file_no_pre)
+
+ # If some errors occurred, skip to the next metric
+ if m is None:
+ continue
+
+ # Save path if there is a metric difference in a file
+ if m.group(1):
+ metric_files.append(path)
+
+ # Iterate over metrics to print them
+ for metric_name, metric_files in metrics_saver.items():
+ # Create path for metric directory
+ metric_path = args.output / metric_name
+
+ if metric_files:
+ # Create metric directory
+ metric_path.mkdir(parents=True, exist_ok=True)
+
+ # Save the number of files specified in the threshold
+ output_paths = (
+ metric_files[: args.threshold] if args.threshold else metric_files
+ )
+
+ for path in output_paths:
+ # Copy files in the directory
+ shutil.copy(path, metric_path)
+
+
+if __name__ == "__main__":
+ main()