Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 4 additions & 21 deletions check-grammars-crates.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,29 +92,12 @@ if [ "$(ls -A $COMPARE)" ]; then
# Maximum number of considered minimal tests for a metric
MT_THRESHOLD=30

# Array containing the considered metrics
# TODO: Implement a command into rust-code-analysis-cli that returns all
# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
METRICS=("cognitive" "sloc" "ploc" "lloc" "cloc" "blank" "cyclomatic" "halstead" "nom" "nexits" "nargs")

# Output directory name
# Output directory path
OUTPUT_DIR=/tmp/output-$TREE_SITTER_CRATE

# Create output directory
mkdir -p $OUTPUT_DIR

# Retrieve minimal tests for a metric
for METRIC in "${METRICS[@]}"
do

PREFIX_METRIC="\.$METRIC"
FILES=`grep -r -i -l $PREFIX_METRIC $COMPARE | head -$MT_THRESHOLD`
if [ -n "$FILES" ]
then
mkdir -p $OUTPUT_DIR/$METRIC
cp $FILES $OUTPUT_DIR/$METRIC
fi
done
# Split files into distinct directories depending on
# their metric differences
./split-minimal-tests.py -i $COMPARE -o $OUTPUT_DIR -t $MT_THRESHOLD

tar -czvf /tmp/json-diffs-and-minimal-tests.tar.gz $COMPARE $OUTPUT_DIR
fi
130 changes: 130 additions & 0 deletions split-minimal-tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env python3

"""split-minimal-tests
This script splits HTML minimal-tests, produced by a software called
`json-minimal-tests`, into distinct directories depending on metric differences.

Usage:

./split-minimal-tests.py -i INPUT_DIR -o OUTPUT_DIR [-t MT_THRESHOLD]

NOTE: OUTPUT_DIR is the path to the output directory to be created.
This directory could contain either a series of directories, called as
the metrics that presents differences, or be empty if no metric differences
are found.
MT_THRESHOLD determines the maximum number of considered minimal tests
for a metric.
"""

import argparse
import pathlib
import re
import shutil
import typing as T

# List of metrics
# TODO: Implement a command into rust-code-analysis-cli that returns all
# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
METRICS = [
"cognitive",
"sloc",
"ploc",
"lloc",
"cloc",
"blank",
"cyclomatic",
"halstead",
"nom",
"nexits",
"nargs",
]


def main() -> None:
parser = argparse.ArgumentParser(
prog="split-minimal-tests",
description="This tool splits HTML minimal-tests, produced by "
"a software called `json-minimal-tests`, into distinct directories "
"depending on metric differences.",
epilog="The source code of this program can be found on "
"GitHub at https://github.com/mozilla/rust-code-analysis",
)

# Arguments
parser.add_argument(
"--input",
"-i",
type=lambda value: pathlib.Path(value),
required=True,
help="Input directory containing HTML minimal tests.",
)

parser.add_argument(
"--output",
"-o",
type=lambda value: pathlib.Path(value),
required=True,
help="Path to the output directory.",
)

# Optional arguments
parser.add_argument(
"--threshold",
"-t",
type=int,
help="Maximum number of considered minimal tests for a metric.",
)

# Parse arguments
args = parser.parse_args()

# Create output directory
args.output.mkdir(parents=True, exist_ok=True)

# Save files associated to each metric
metrics_saver: T.Dict[str, T.List] = {metric_name: [] for metric_name in METRICS}

# Iterate over the files contained in the input directory
for path in args.input.glob("*.html"):
# Open a file
with open(path) as f:
# Read a file
file_str = f.read()

# Remove all code inside <pre></pre> tags
file_no_pre = re.sub(r"<pre>(.|\n)*?<\/pre>", "", file_str)

# Iterate over metrics
for metric_name, metric_files in metrics_saver.items():
# Check if there is a metric difference in a file
m = re.search(f"(\.{metric_name})", file_no_pre)

# If some errors occurred, skip to the next metric
if m is None:
continue

# Save path if there is a metric difference in a file
if m.group(1):
metric_files.append(path)

# Iterate over metrics to print them
for metric_name, metric_files in metrics_saver.items():
# Create path for metric directory
metric_path = args.output / metric_name

if metric_files:
# Create metric directory
metric_path.mkdir(parents=True, exist_ok=True)

# Save the number of files specified in the threshold
output_paths = (
metric_files[: args.threshold] if args.threshold else metric_files
)

for path in output_paths:
# Copy files in the directory
shutil.copy(path, metric_path)


if __name__ == "__main__":
main()