From 05ffe535e11419fcd3885a3b868c352129d95899 Mon Sep 17 00:00:00 2001 From: Andrew Beach Date: Fri, 16 May 2025 11:38:47 -0500 Subject: [PATCH 1/5] update endpoints --- get_results.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/get_results.py b/get_results.py index 654808c..5fed819 100644 --- a/get_results.py +++ b/get_results.py @@ -28,12 +28,12 @@ def fetch_data_from_api(url, headers): return None def get_media_detail(request_id, token, results_df): - page_index = 1 + page_index = 0 headers = {"x-api-key": token, "Content-Type": "application/json"} - url = f"https://api.prd.realitydefender.xyz/api/media/users/{request_id}?pageIndex={page_index}" if request_id == "": while True: + url = f"https://api.prd.realitydefender.xyz/api/v2/media/users/pages/{page_index}?userIds=[]" print(f"getting page {page_index}") response_data = fetch_data_from_api(url, headers) @@ -51,6 +51,7 @@ def get_media_detail(request_id, token, results_df): page_index += 1 else: + url = f"https://api.prd.realitydefender.xyz/api/media/users/{request_id}" response_data = fetch_data_from_api(url, headers) print(f"fetching {request_id}") if response_data: From f579cc3bf3a591f6f8f04248c99dc6ef50984198 Mon Sep 17 00:00:00 2001 From: Andrew Beach Date: Fri, 16 May 2025 11:48:12 -0500 Subject: [PATCH 2/5] Add --get-all flag --- get_results.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/get_results.py b/get_results.py index 5fed819..771d85d 100644 --- a/get_results.py +++ b/get_results.py @@ -4,6 +4,7 @@ import pandas as pd from dotenv import load_dotenv import csv +import argparse def process_response_data(request_id, response_data, results_df): for item in response_data.get('data', []): @@ -27,13 +28,15 @@ def fetch_data_from_api(url, headers): print(f"An error occurred: {e}") return None -def get_media_detail(request_id, token, results_df): +def get_media_detail(request_id, token, results_df, include_all_users=False): page_index = 0 headers = {"x-api-key": token, "Content-Type": "application/json"} if request_id == "": while True: - url = f"https://api.prd.realitydefender.xyz/api/v2/media/users/pages/{page_index}?userIds=[]" + url = (f"https://api.prd.realitydefender.xyz/api/v2/media/users/pages/{page_index}?userIds=[]" + if include_all_users + else f"https://api.prd.realitydefender.xyz/api/v2/media/users/pages/{page_index}") print(f"getting page {page_index}") response_data = fetch_data_from_api(url, headers) @@ -64,19 +67,22 @@ def get_media_detail(request_id, token, results_df): token = os.getenv("RD_API") results_df = pd.DataFrame(columns=['request_id', 'status', 'score']) - if len(sys.argv) > 2: - print("Usage: python sample_get_script.py ") - sys.exit(1) - elif len(sys.argv) == 1: + # Set up argument parser + parser = argparse.ArgumentParser(description='Fetch media details from Reality Defender API') + parser.add_argument('--get-all', action='store_true', help='Get media details for all users') + parser.add_argument('csv_file', nargs='?', help='Path to CSV file containing request IDs') + + args = parser.parse_args() + + if args.get_all: request_id = "" - results_df = get_media_detail(request_id, token, results_df) + results_df = get_media_detail(request_id, token, results_df, include_all_users=True) results_df.to_csv('results.csv', index=False) print("Results saved to results.csv") - else: - csv_file_path = sys.argv[1] + elif args.csv_file: try: # Load the CSV into a DataFrame - df = pd.read_csv(csv_file_path) + df = pd.read_csv(args.csv_file) # Validate that the required columns are present required_columns = {'file_name', 'request_id'} @@ -93,8 +99,11 @@ def get_media_detail(request_id, token, results_df): merged_df.to_csv('results.csv', index=False) print("Results saved to results.csv") except FileNotFoundError: - print(f"Error: File {csv_file_path} not found.") + print(f"Error: File {args.csv_file} not found.") sys.exit(1) except Exception as e: print(f"Error: {e}") sys.exit(1) + else: + parser.print_help() + sys.exit(1) From d5ff6b45babf420895f0d7d1601588aa282e5a07 Mon Sep 17 00:00:00 2001 From: Andrew Beach Date: Fri, 16 May 2025 12:20:04 -0500 Subject: [PATCH 3/5] further script adjustments --- get_results.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/get_results.py b/get_results.py index 771d85d..4a9e1ee 100644 --- a/get_results.py +++ b/get_results.py @@ -40,7 +40,7 @@ def get_media_detail(request_id, token, results_df, include_all_users=False): print(f"getting page {page_index}") response_data = fetch_data_from_api(url, headers) - if response_data is None or not response_data.get('data'): + if response_data is None or not response_data.get('mediaList'): print(f"No data found for page {page_index}. Skipping.") page_index += 1 continue @@ -70,16 +70,11 @@ def get_media_detail(request_id, token, results_df, include_all_users=False): # Set up argument parser parser = argparse.ArgumentParser(description='Fetch media details from Reality Defender API') parser.add_argument('--get-all', action='store_true', help='Get media details for all users') - parser.add_argument('csv_file', nargs='?', help='Path to CSV file containing request IDs') + parser.add_argument('csv_file', nargs='?', default=None, help='Path to CSV file containing request IDs') args = parser.parse_args() - if args.get_all: - request_id = "" - results_df = get_media_detail(request_id, token, results_df, include_all_users=True) - results_df.to_csv('results.csv', index=False) - print("Results saved to results.csv") - elif args.csv_file: + if args.csv_file: try: # Load the CSV into a DataFrame df = pd.read_csv(args.csv_file) @@ -105,5 +100,7 @@ def get_media_detail(request_id, token, results_df, include_all_users=False): print(f"Error: {e}") sys.exit(1) else: - parser.print_help() - sys.exit(1) + request_id = "" + results_df = get_media_detail(request_id, token, results_df, include_all_users=args.get_all) + results_df.to_csv('results.csv', index=False) + print("Results saved to results.csv") From 8af64d36034d64110237f7cde98e23f6b58e8ac7 Mon Sep 17 00:00:00 2001 From: Andrew Beach Date: Fri, 16 May 2025 13:38:51 -0500 Subject: [PATCH 4/5] Additional fixes, add file_name column --- get_results.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/get_results.py b/get_results.py index 4a9e1ee..1946b51 100644 --- a/get_results.py +++ b/get_results.py @@ -7,13 +7,16 @@ import argparse def process_response_data(request_id, response_data, results_df): - for item in response_data.get('data', []): - overall_status = item["resultsSummary"].get('status', 'N/A') - score = item["resultsSummary"].get('metadata', {}).get('finalScore', 'N/A') + for item in response_data.get('mediaList', []): + results_summary = item.get("resultsSummary", {}) + + overall_status = results_summary.get('status', 'UNABLE_TO_EVALUATE') if results_summary else 'UNABLE_TO_EVALUATE' + score = results_summary.get('metadata', {}).get('finalScore', '') if results_summary else '' # Append the data to the DataFrame results_df = pd.concat([results_df, pd.DataFrame([{ - 'request_id': request_id, + 'file_name': item.get('originalFileName', ''), + 'request_id': request_id if request_id else item.get('requestId', ''), 'status': overall_status, 'score': score }])], ignore_index=True) @@ -34,27 +37,30 @@ def get_media_detail(request_id, token, results_df, include_all_users=False): if request_id == "": while True: - url = (f"https://api.prd.realitydefender.xyz/api/v2/media/users/pages/{page_index}?userIds=[]" + url = (f"https://api.dev.realitydefender.xyz/api/v2/media/users/pages/{page_index}?userIds=[]" if include_all_users - else f"https://api.prd.realitydefender.xyz/api/v2/media/users/pages/{page_index}") + else f"https://api.dev.realitydefender.xyz/api/v2/media/users/pages/{page_index}") print(f"getting page {page_index}") response_data = fetch_data_from_api(url, headers) - if response_data is None or not response_data.get('mediaList'): - print(f"No data found for page {page_index}. Skipping.") - page_index += 1 - continue + if response_data is None: + print(f"Error fetching page {page_index}. Stopping.") + break + + if not response_data.get('mediaList'): + print(f"No data found for page {page_index}. Stopping.") + break results_df = process_response_data(request_id, response_data, results_df) - total_pages = response_data.get('totalPages', 1) - + total_pages = response_data.get('totalPages', 0) + if page_index >= total_pages: break page_index += 1 else: - url = f"https://api.prd.realitydefender.xyz/api/media/users/{request_id}" + url = f"https://api.dev.realitydefender.xyz/api/media/users/{request_id}" response_data = fetch_data_from_api(url, headers) print(f"fetching {request_id}") if response_data: @@ -65,7 +71,7 @@ def get_media_detail(request_id, token, results_df, include_all_users=False): if __name__ == "__main__": load_dotenv() token = os.getenv("RD_API") - results_df = pd.DataFrame(columns=['request_id', 'status', 'score']) + results_df = pd.DataFrame(columns=['file_name', 'request_id', 'status', 'score']) # Set up argument parser parser = argparse.ArgumentParser(description='Fetch media details from Reality Defender API') From 60edcf0540038bd4869b900a32cc46a802c54ce3 Mon Sep 17 00:00:00 2001 From: Andrew Beach Date: Fri, 16 May 2025 14:05:51 -0500 Subject: [PATCH 5/5] fix csv case --- get_results.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/get_results.py b/get_results.py index 1946b51..901d9da 100644 --- a/get_results.py +++ b/get_results.py @@ -7,7 +7,8 @@ import argparse def process_response_data(request_id, response_data, results_df): - for item in response_data.get('mediaList', []): + data = response_data.get("mediaList") or response_data.get("data") or [] + for item in data: results_summary = item.get("resultsSummary", {}) overall_status = results_summary.get('status', 'UNABLE_TO_EVALUATE') if results_summary else 'UNABLE_TO_EVALUATE'