From d24569c75779be90a948df6880a4623ce2b0398d Mon Sep 17 00:00:00 2001 From: Luni-4 Date: Tue, 20 Apr 2021 23:49:14 +0200 Subject: [PATCH 1/2] Add a script to split minimal tests This script splits minimal tests into distinct directories depending on their metric differences --- split-minimal-tests.py | 130 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100755 split-minimal-tests.py diff --git a/split-minimal-tests.py b/split-minimal-tests.py new file mode 100755 index 000000000..1bfcc82a5 --- /dev/null +++ b/split-minimal-tests.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +"""split-minimal-tests +This script splits HTML minimal-tests, produced by a software called +`json-minimal-tests`, into distinct directories depending on metric differences. + +Usage: + +./split-minimal-tests.py -i INPUT_DIR -o OUTPUT_DIR [-t MT_THRESHOLD] + +NOTE: OUTPUT_DIR is the path to the output directory to be created. +This directory could contain either a series of directories, called as +the metrics that presents differences, or be empty if no metric differences +are found. +MT_THRESHOLD determines the maximum number of considered minimal tests +for a metric. +""" + +import argparse +import pathlib +import re +import shutil +import typing as T + +# List of metrics +# TODO: Implement a command into rust-code-analysis-cli that returns all +# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478 +METRICS = [ + "cognitive", + "sloc", + "ploc", + "lloc", + "cloc", + "blank", + "cyclomatic", + "halstead", + "nom", + "nexits", + "nargs", +] + + +def main() -> None: + parser = argparse.ArgumentParser( + prog="split-minimal-tests", + description="This tool splits HTML minimal-tests, produced by " + "a software called `json-minimal-tests`, into distinct directories " + "depending on metric differences.", + epilog="The source code of this program can be found on " + "GitHub at https://github.com/mozilla/rust-code-analysis", + ) + + # Arguments + parser.add_argument( + "--input", + "-i", + type=lambda value: pathlib.Path(value), + required=True, + help="Input directory containing HTML minimal tests.", + ) + + parser.add_argument( + "--output", + "-o", + type=lambda value: pathlib.Path(value), + required=True, + help="Path to the output directory.", + ) + + # Optional arguments + parser.add_argument( + "--threshold", + "-t", + type=int, + help="Maximum number of considered minimal tests for a metric.", + ) + + # Parse arguments + args = parser.parse_args() + + # Create output directory + args.output.mkdir(parents=True, exist_ok=True) + + # Save files associated to each metric + metrics_saver: T.Dict[str, T.List] = {metric_name: [] for metric_name in METRICS} + + # Iterate over the files contained in the input directory + for path in args.input.glob("*.html"): + # Open a file + with open(path) as f: + # Read a file + file_str = f.read() + + # Remove all code inside
 tags
+            file_no_pre = re.sub(r"
(.|\n)*?<\/pre>", "", file_str)
+
+            # Iterate over metrics
+            for metric_name, metric_files in metrics_saver.items():
+                # Check if there is a metric difference in a file
+                m = re.search(f"(\.{metric_name})", file_no_pre)
+
+                # If some errors occurred, skip to the next metric
+                if m is None:
+                    continue
+
+                # Save path if there is a metric difference in a file
+                if m.group(1):
+                    metric_files.append(path)
+
+    # Iterate over metrics to print them
+    for metric_name, metric_files in metrics_saver.items():
+        # Create path for metric directory
+        metric_path = args.output / metric_name
+
+        if metric_files:
+            # Create metric directory
+            metric_path.mkdir(parents=True, exist_ok=True)
+
+            # Save the number of files specified in the threshold
+            output_paths = (
+                metric_files[: args.threshold] if args.threshold else metric_files
+            )
+
+            for path in output_paths:
+                # Copy files in the directory
+                shutil.copy(path, metric_path)
+
+
+if __name__ == "__main__":
+    main()

From c65b90782659c86422e950717cbd060b57bd42da Mon Sep 17 00:00:00 2001
From: Luni-4 
Date: Tue, 20 Apr 2021 23:50:11 +0200
Subject: [PATCH 2/2] Use the new script in the metric-checker

---
 check-grammars-crates.sh | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/check-grammars-crates.sh b/check-grammars-crates.sh
index 53d46d85b..a64b7346c 100755
--- a/check-grammars-crates.sh
+++ b/check-grammars-crates.sh
@@ -92,29 +92,12 @@ if [ "$(ls -A $COMPARE)" ]; then
     # Maximum number of considered minimal tests for a metric
     MT_THRESHOLD=30
 
-    # Array containing the considered metrics
-    # TODO: Implement a command into rust-code-analysis-cli that returns all
-    # computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
-    METRICS=("cognitive" "sloc" "ploc" "lloc" "cloc" "blank" "cyclomatic" "halstead" "nom" "nexits" "nargs")
-
-    # Output directory name
+    # Output directory path
     OUTPUT_DIR=/tmp/output-$TREE_SITTER_CRATE
 
-    # Create output directory
-    mkdir -p $OUTPUT_DIR
-
-    # Retrieve minimal tests for a metric
-    for METRIC in "${METRICS[@]}"
-    do
-
-        PREFIX_METRIC="\.$METRIC"
-        FILES=`grep -r -i -l $PREFIX_METRIC $COMPARE | head -$MT_THRESHOLD`
-        if [ -n "$FILES" ]
-        then
-            mkdir -p $OUTPUT_DIR/$METRIC
-            cp $FILES $OUTPUT_DIR/$METRIC
-        fi
-    done
+    # Split files into distinct directories depending on
+    # their metric differences
+    ./split-minimal-tests.py -i $COMPARE -o $OUTPUT_DIR -t $MT_THRESHOLD
 
     tar -czvf /tmp/json-diffs-and-minimal-tests.tar.gz $COMPARE $OUTPUT_DIR
 fi