diff --git a/inspect4py/cli.py b/inspect4py/cli.py index ad5a726..166b275 100644 --- a/inspect4py/cli.py +++ b/inspect4py/cli.py @@ -1276,7 +1276,6 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir dirs[:] = [d for d in dirs if not d.startswith(ignore_d)] for ignore_f in ignore_file_pattern: files[:] = [f for f in files if not f.startswith(ignore_f)] - # print(files) for f in files: if ".py" in f and not f.endswith(".pyc"): try: @@ -1342,7 +1341,7 @@ def main(input_path, output_dir, ignore_dir_pattern, ignore_file_pattern, requir except Exception as e: print("Error when detecting license: %s", str(e)) if readme: - dir_info["readme_files"] = extract_readme(input_path) + dir_info["readme_files"] = extract_readme(input_path, output_dir) if metadata: dir_info["metadata"] = get_github_metadata(input_path) json_file = output_dir + "/directory_info.json" diff --git a/inspect4py/utils.py b/inspect4py/utils.py index de26e78..82d87d3 100644 --- a/inspect4py/utils.py +++ b/inspect4py/utils.py @@ -722,22 +722,23 @@ def detect_license(license_text, licenses_path, threshold=0.9): return sorted(rank_list, key=lambda t: t[1], reverse=True) -def extract_readme(input_path: str) -> dict: +def extract_readme(input_path: str, output_dir: str) -> dict: """ Function to extract content of all readme file under the input directory. :param input_path: Path of the repository to be analyzed. + :param output_dir: The output directory. Used to generate the correct path of the README file. """ readme_files = {} for file in Path(input_path).rglob("README.*"): + relative_path = os.path.join(output_dir, Path(file).relative_to(Path(input_path).parent)) try: with open(file, 'r') as f: - readme_files[str(file)] = f.read() + readme_files[str(relative_path)] = f.read() except Exception as e: print(f"Error when opening {file}: {e}") return readme_files - def get_github_metadata(input_path: str) -> dict: """ Function to extract metadata from the remote repository using Github api. diff --git a/test/test_inspect4py.py b/test/test_inspect4py.py index 6983e03..ce2ae50 100644 --- a/test/test_inspect4py.py +++ b/test/test_inspect4py.py @@ -614,14 +614,12 @@ def test_readme(self): call_list, control_flow, directory_tree, software_invocation, abstract_syntax_tree, source_code, license_detection, readme, metadata) - root_dir = Path(input_path) expected_readme_files = { - f"{root_dir}/README.md": "README.md in root dir\n", - f"{root_dir}/subdir/README.txt": "README.txt in subdir\n", - f"{root_dir}/subdir/subsubdir/README.rst": "README.rst in subsubdir\n" + f"{output_dir}/test_readme/README.md": "README.md in root dir\n", + f"{output_dir}/test_readme/subdir/README.txt": "README.txt in subdir\n", + f"{output_dir}/test_readme/subdir/subsubdir/README.rst": "README.rst in subsubdir\n" } actual_readme_files = dir_info["readme_files"] - print(actual_readme_files) assert expected_readme_files == actual_readme_files @@ -738,7 +736,7 @@ def invoke_inspector(input_path, output_dir, ignore_dir_pattern, ignore_file_pat dir_info["license"]["detected_type"] = [{k: f"{v:.1%}"} for k, v in rank_list] dir_info["license"]["extracted_text"] = license_text if readme: - dir_info["readme_files"] = extract_readme(input_path) + dir_info["readme_files"] = extract_readme(input_path, output_dir) if metadata: dir_info["metadata"] = get_github_metadata(input_path) return dir_info