From e3e0f8bde2db5710d91cd5658c0392707e280e70 Mon Sep 17 00:00:00 2001 From: CJ Williams Date: Mon, 16 Jan 2023 17:54:44 +0000 Subject: [PATCH 1/5] Fix extracted README paths to use relative output path --- inspect4py/cli.py | 1 + inspect4py/utils.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/inspect4py/cli.py b/inspect4py/cli.py index ad5a726..bf20997 100644 --- a/inspect4py/cli.py +++ b/inspect4py/cli.py @@ -1278,6 +1278,7 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir files[:] = [f for f in files if not f.startswith(ignore_f)] # print(files) for f in files: + print(f) if ".py" in f and not f.endswith(".pyc"): try: path = os.path.join(subdir, f) diff --git a/inspect4py/utils.py b/inspect4py/utils.py index de26e78..8c2c1ef 100644 --- a/inspect4py/utils.py +++ b/inspect4py/utils.py @@ -729,9 +729,10 @@ def extract_readme(input_path: str) -> dict: """ readme_files = {} for file in Path(input_path).rglob("README.*"): + relative_path = Path(file).relative_to(Path(input_path).parent) try: with open(file, 'r') as f: - readme_files[str(file)] = f.read() + readme_files[str(relative_path)] = f.read() except Exception as e: print(f"Error when opening {file}: {e}") From b24a93e90b4a02af30d6c7195d69b35d026ac90b Mon Sep 17 00:00:00 2001 From: CJ Williams Date: Mon, 16 Jan 2023 17:57:41 +0000 Subject: [PATCH 2/5] Update unit test --- test/test_inspect4py.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/test_inspect4py.py b/test/test_inspect4py.py index 6983e03..c1401f9 100644 --- a/test/test_inspect4py.py +++ b/test/test_inspect4py.py @@ -614,14 +614,12 @@ def test_readme(self): call_list, control_flow, directory_tree, software_invocation, abstract_syntax_tree, source_code, license_detection, readme, metadata) - root_dir = Path(input_path) expected_readme_files = { - f"{root_dir}/README.md": "README.md in root dir\n", - f"{root_dir}/subdir/README.txt": "README.txt in subdir\n", - f"{root_dir}/subdir/subsubdir/README.rst": "README.rst in subsubdir\n" + "test_readme/README.md": "README.md in root dir\n", + "test_readme/subdir/README.txt": "README.txt in subdir\n", + "test_readme/subdir/subsubdir/README.rst": "README.rst in subsubdir\n" } actual_readme_files = dir_info["readme_files"] - print(actual_readme_files) assert expected_readme_files == actual_readme_files From 94a8ed1dd324bde897aa94525479c280fcffa2da Mon Sep 17 00:00:00 2001 From: CJ Williams Date: Mon, 16 Jan 2023 18:09:44 +0000 Subject: [PATCH 3/5] Add missing output directory segment --- inspect4py/cli.py | 2 +- inspect4py/utils.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/inspect4py/cli.py b/inspect4py/cli.py index bf20997..e6fce9f 100644 --- a/inspect4py/cli.py +++ b/inspect4py/cli.py @@ -1343,7 +1343,7 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir except Exception as e: print("Error when detecting license: %s", str(e)) if readme: - dir_info["readme_files"] = extract_readme(input_path) + dir_info["readme_files"] = extract_readme(input_path, output_dir) if metadata: dir_info["metadata"] = get_github_metadata(input_path) json_file = output_dir + "/directory_info.json" diff --git a/inspect4py/utils.py b/inspect4py/utils.py index 8c2c1ef..276bc26 100644 --- a/inspect4py/utils.py +++ b/inspect4py/utils.py @@ -722,23 +722,24 @@ def detect_license(license_text, licenses_path, threshold=0.9): return sorted(rank_list, key=lambda t: t[1], reverse=True) -def extract_readme(input_path: str) -> dict: +def extract_readme(input_path: str, output_dir: str) -> dict: """ Function to extract content of all readme file under the input directory. :param input_path: Path of the repository to be analyzed. + :param output_dir: The output directory. Used to generate the correct path of the README file. """ readme_files = {} for file in Path(input_path).rglob("README.*"): - relative_path = Path(file).relative_to(Path(input_path).parent) + relative_path = os.path.join(output_dir, Path(file).relative_to(Path(input_path).parent)) try: with open(file, 'r') as f: readme_files[str(relative_path)] = f.read() except Exception as e: print(f"Error when opening {file}: {e}") + print(readme_files.keys()) return readme_files - def get_github_metadata(input_path: str) -> dict: """ Function to extract metadata from the remote repository using Github api. From 48f985c93219b8950f340e3fe076665de741c156 Mon Sep 17 00:00:00 2001 From: CJ Williams Date: Mon, 16 Jan 2023 18:11:41 +0000 Subject: [PATCH 4/5] Fix unit test --- inspect4py/utils.py | 1 - test/test_inspect4py.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/inspect4py/utils.py b/inspect4py/utils.py index 276bc26..82d87d3 100644 --- a/inspect4py/utils.py +++ b/inspect4py/utils.py @@ -737,7 +737,6 @@ def extract_readme(input_path: str, output_dir: str) -> dict: except Exception as e: print(f"Error when opening {file}: {e}") - print(readme_files.keys()) return readme_files def get_github_metadata(input_path: str) -> dict: diff --git a/test/test_inspect4py.py b/test/test_inspect4py.py index c1401f9..ce2ae50 100644 --- a/test/test_inspect4py.py +++ b/test/test_inspect4py.py @@ -615,9 +615,9 @@ def test_readme(self): source_code, license_detection, readme, metadata) expected_readme_files = { - "test_readme/README.md": "README.md in root dir\n", - "test_readme/subdir/README.txt": "README.txt in subdir\n", - "test_readme/subdir/subsubdir/README.rst": "README.rst in subsubdir\n" + f"{output_dir}/test_readme/README.md": "README.md in root dir\n", + f"{output_dir}/test_readme/subdir/README.txt": "README.txt in subdir\n", + f"{output_dir}/test_readme/subdir/subsubdir/README.rst": "README.rst in subsubdir\n" } actual_readme_files = dir_info["readme_files"] assert expected_readme_files == actual_readme_files @@ -736,7 +736,7 @@ def invoke_inspector(input_path, output_dir, ignore_dir_pattern, ignore_file_pat dir_info["license"]["detected_type"] = [{k: f"{v:.1%}"} for k, v in rank_list] dir_info["license"]["extracted_text"] = license_text if readme: - dir_info["readme_files"] = extract_readme(input_path) + dir_info["readme_files"] = extract_readme(input_path, output_dir) if metadata: dir_info["metadata"] = get_github_metadata(input_path) return dir_info From 61677699a24adf8ba0fc51ee85125dbcd84185d8 Mon Sep 17 00:00:00 2001 From: CJ Williams Date: Mon, 16 Jan 2023 20:48:48 +0000 Subject: [PATCH 5/5] Remove print statements --- inspect4py/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/inspect4py/cli.py b/inspect4py/cli.py index e6fce9f..166b275 100644 --- a/inspect4py/cli.py +++ b/inspect4py/cli.py @@ -1276,9 +1276,7 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir dirs[:] = [d for d in dirs if not d.startswith(ignore_d)] for ignore_f in ignore_file_pattern: files[:] = [f for f in files if not f.startswith(ignore_f)] - # print(files) for f in files: - print(f) if ".py" in f and not f.endswith(".pyc"): try: path = os.path.join(subdir, f)