From 4f245f17ce62db33961e5bca243968a4af98723c Mon Sep 17 00:00:00 2001 From: rahulharpal1603 Date: Wed, 20 Aug 2025 15:09:26 +0530 Subject: [PATCH 1/2] Add sync microservice for watching file system events --- backend/app/database/folders.py | 62 +++- backend/app/database/images.py | 70 ++++ backend/app/routes/folders.py | 78 +++-- backend/app/utils/folders.py | 31 +- backend/app/utils/images.py | 342 ++++++++++++------ backend/main.py | 20 +- sync-microservice/.gitignore | 142 ++++++++ sync-microservice/README.md | 192 +++++++++++ sync-microservice/app/config/settings.py | 24 ++ sync-microservice/app/core/__init__.py | 0 sync-microservice/app/core/lifespan.py | 54 +++ sync-microservice/app/database/__init__.py | 0 sync-microservice/app/database/folders.py | 117 +++++++ sync-microservice/app/routes/__init__.py | 0 sync-microservice/app/routes/folders.py | 50 +++ sync-microservice/app/routes/health.py | 17 + sync-microservice/app/routes/watcher.py | 85 +++++ sync-microservice/app/schemas/__init__.py | 1 + sync-microservice/app/schemas/folders.py | 36 ++ sync-microservice/app/schemas/health.py | 10 + sync-microservice/app/schemas/watcher.py | 33 ++ sync-microservice/app/utils/__init__.py | 0 sync-microservice/app/utils/file_watcher.py | 0 sync-microservice/app/utils/watcher.py | 362 ++++++++++++++++++++ sync-microservice/main.py | 16 + sync-microservice/requirements.txt | 40 +++ 26 files changed, 1593 insertions(+), 189 deletions(-) create mode 100644 sync-microservice/.gitignore create mode 100644 sync-microservice/README.md create mode 100644 sync-microservice/app/config/settings.py create mode 100644 sync-microservice/app/core/__init__.py create mode 100644 sync-microservice/app/core/lifespan.py create mode 100644 sync-microservice/app/database/__init__.py create mode 100644 sync-microservice/app/database/folders.py create mode 100644 sync-microservice/app/routes/__init__.py create mode 100644 sync-microservice/app/routes/folders.py create mode 100644 sync-microservice/app/routes/health.py create mode 100644 sync-microservice/app/routes/watcher.py create mode 100644 sync-microservice/app/schemas/__init__.py create mode 100644 sync-microservice/app/schemas/folders.py create mode 100644 sync-microservice/app/schemas/health.py create mode 100644 sync-microservice/app/schemas/watcher.py create mode 100644 sync-microservice/app/utils/__init__.py create mode 100644 sync-microservice/app/utils/file_watcher.py create mode 100644 sync-microservice/app/utils/watcher.py create mode 100644 sync-microservice/main.py create mode 100644 sync-microservice/requirements.txt diff --git a/backend/app/database/folders.py b/backend/app/database/folders.py index 7162104af..6a617d23c 100644 --- a/backend/app/database/folders.py +++ b/backend/app/database/folders.py @@ -180,9 +180,7 @@ def db_delete_folder(folder_path: FolderPath) -> None: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() abs_folder_path = os.path.abspath(folder_path) - cursor.execute( - "PRAGMA foreign_keys = ON;" - ) # Important for deleting rows in image_id_mapping and images table because they reference this folder_id + cursor.execute("PRAGMA foreign_keys = ON;") # Important for deleting rows in image_id_mapping and images table because they reference this folder_id conn.commit() cursor.execute( "SELECT folder_id FROM folders WHERE folder_path = ?", @@ -192,9 +190,7 @@ def db_delete_folder(folder_path: FolderPath) -> None: if not existing_folder: conn.close() - raise ValueError( - f"Error: Folder '{folder_path}' does not exist in the database." - ) + raise ValueError(f"Error: Folder '{folder_path}' does not exist in the database.") cursor.execute( "DELETE FROM folders WHERE folder_path = ?", @@ -205,9 +201,7 @@ def db_delete_folder(folder_path: FolderPath) -> None: conn.close() -def db_update_parent_ids_for_subtree( - root_folder_path: FolderPath, folder_map: FolderMap -) -> None: +def db_update_parent_ids_for_subtree(root_folder_path: FolderPath, folder_map: FolderMap) -> None: """ Update parent_folder_id for all folders in the subtree rooted at root_folder_path. Only updates folders whose parent_folder_id is NULL. @@ -240,9 +234,7 @@ def db_folder_exists(folder_path: FolderPath) -> bool: cursor = conn.cursor() try: abs_path = os.path.abspath(folder_path) - cursor.execute( - "SELECT folder_id FROM folders WHERE folder_path = ?", (abs_path,) - ) + cursor.execute("SELECT folder_id FROM folders WHERE folder_path = ?", (abs_path,)) result = cursor.fetchone() return bool(result) finally: @@ -261,18 +253,14 @@ def db_find_parent_folder_id(folder_path: FolderPath) -> Optional[FolderId]: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() try: - cursor.execute( - "SELECT folder_id FROM folders WHERE folder_path = ?", (parent_path,) - ) + cursor.execute("SELECT folder_id FROM folders WHERE folder_path = ?", (parent_path,)) result = cursor.fetchone() return result[0] if result else None finally: conn.close() -def db_update_ai_tagging_batch( - folder_ids: List[FolderId], ai_tagging_enabled: bool -) -> int: +def db_update_ai_tagging_batch(folder_ids: List[FolderId], ai_tagging_enabled: bool) -> int: """ Update AI_Tagging status for multiple folders in a single transaction. folder_ids: list of folder IDs to update @@ -342,6 +330,44 @@ def db_get_folder_ids_by_path_prefix(root_path: str) -> List[FolderIdPath]: conn.close() +def db_get_folder_ids_by_paths(folder_paths: List[FolderPath]) -> Dict[FolderPath, FolderId]: + """ + Get folder IDs for multiple folder paths in a single database query. + + Args: + folder_paths: List of folder paths to look up + + Returns: + Dictionary mapping folder paths to their corresponding folder IDs + """ + if not folder_paths: + return {} + + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + try: + # Convert all paths to absolute paths + abs_paths = [os.path.abspath(path) for path in folder_paths] + + # Create placeholders for the IN clause + placeholders = ",".join("?" * len(abs_paths)) + + cursor.execute( + f"SELECT folder_path, folder_id FROM folders WHERE folder_path IN ({placeholders})", + abs_paths, + ) + + results = cursor.fetchall() + + # Create a mapping from folder_path to folder_id + path_to_id = {folder_path: folder_id for folder_path, folder_id in results} + + return path_to_id + finally: + conn.close() + + def db_get_direct_child_folders(parent_folder_id: str) -> List[Tuple[str, str]]: """ Get all direct child folders (not subfolders) for a given parent folder. diff --git a/backend/app/database/images.py b/backend/app/database/images.py index 08cb64799..76ea83f9d 100644 --- a/backend/app/database/images.py +++ b/backend/app/database/images.py @@ -195,3 +195,73 @@ def db_insert_image_classes_batch(image_class_pairs: List[ImageClassPair]) -> bo return False finally: conn.close() + + +def db_get_images_by_folder_ids(folder_ids: List[int]) -> List[Tuple[ImageId, ImagePath, str]]: + """ + Get all images that belong to the specified folder IDs. + + Args: + folder_ids: List of folder IDs to search for images + + Returns: + List of tuples containing (image_id, image_path, thumbnail_path) + """ + if not folder_ids: + return [] + + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + try: + # Create placeholders for the IN clause + placeholders = ",".join("?" for _ in folder_ids) + cursor.execute( + f""" + SELECT id, path, thumbnailPath + FROM images + WHERE folder_id IN ({placeholders}) + """, + folder_ids, + ) + return cursor.fetchall() + except Exception as e: + print(f"Error getting images by folder IDs: {e}") + return [] + finally: + conn.close() + + +def db_delete_images_by_ids(image_ids: List[ImageId]) -> bool: + """ + Delete multiple images from the database by their IDs. + This will also delete associated records in image_classes due to CASCADE. + + Args: + image_ids: List of image IDs to delete + + Returns: + True if deletion was successful, False otherwise + """ + if not image_ids: + return True + + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + try: + # Create placeholders for the IN clause + placeholders = ",".join("?" for _ in image_ids) + cursor.execute( + f"DELETE FROM images WHERE id IN ({placeholders})", + image_ids, + ) + conn.commit() + print(f"Deleted {cursor.rowcount} obsolete image(s) from database") + return True + except Exception as e: + print(f"Error deleting images: {e}") + conn.rollback() + return False + finally: + conn.close() diff --git a/backend/app/routes/folders.py b/backend/app/routes/folders.py index 7493e0012..58e6551dd 100644 --- a/backend/app/routes/folders.py +++ b/backend/app/routes/folders.py @@ -1,4 +1,5 @@ from fastapi import APIRouter, HTTPException, status, Depends, Request +from typing import List, Tuple from app.database.folders import ( db_update_parent_ids_for_subtree, db_folder_exists, @@ -7,6 +8,7 @@ db_disable_ai_tagging_batch, db_delete_folders_batch, db_get_direct_child_folders, + db_get_folder_ids_by_path_prefix, ) from app.schemas.folders import ( AddFolderRequest, @@ -37,15 +39,24 @@ router = APIRouter() -def post_folder_add_sequence(folder_path: str): +def post_folder_add_sequence(folder_path: str, folder_id: int): """ Post-addition sequence for a folder. This function is called after a folder is successfully added. It processes images in the folder and updates the database. """ try: - # Process images in the folder - image_util_process_folder_images(folder_path) + # Get all folder IDs and paths that match the root path prefix + folder_data = [] + folder_ids_and_paths = db_get_folder_ids_by_path_prefix(folder_path) + + # Set all folders to non-recursive (False) + for folder_id_from_db, folder_path_from_db in folder_ids_and_paths: + folder_data.append((folder_path_from_db, folder_id_from_db, False)) + + print("Add folder: ", folder_data) + # Process images in all folders + image_util_process_folder_images(folder_data) except Exception as e: print(f"Error in post processing after folder {folder_path} was added: {e}") @@ -68,6 +79,32 @@ def post_AI_tagging_enabled_sequence(): return True +def post_sync_folder_sequence(folder_path: str, folder_id: int, added_folders: List[Tuple[str, str]]): + """ + Post-sync sequence for a folder. + This function is called after a folder is synced. + It processes images in the folder and updates the database. + """ + try: + # Create folder data array + folder_data = [] + + folder_data.append((folder_path, folder_id, False)) + + for added_folder_id, added_folder_path in added_folders: + folder_data.append((added_folder_path, added_folder_id, False)) + + print("Sync folder: ", folder_data) + # Process images in all folders + image_util_process_folder_images(folder_data) + image_util_process_untagged_images() + cluster_util_face_clusters_sync() + except Exception as e: + print(f"Error in post processing after folder {folder_path} was synced: {e}") + return False + return True + + def get_state(request: Request): return request.app.state @@ -82,9 +119,7 @@ def add_folder(request: AddFolderRequest, app_state=Depends(get_state)): # Step 1: Data Validation if not os.path.isdir(request.folder_path): - raise ValueError( - f"Error: '{request.folder_path}' is not a valid directory." - ) + raise ValueError(f"Error: '{request.folder_path}' is not a valid directory.") if ( not os.access(request.folder_path, os.R_OK) @@ -132,7 +167,7 @@ def add_folder(request: AddFolderRequest, app_state=Depends(get_state)): # Step 6: Call the post-addition sequence in a separate process executor: ProcessPoolExecutor = app_state.executor - executor.submit(post_folder_add_sequence, request.folder_path) + executor.submit(post_folder_add_sequence, request.folder_path, root_folder_id) return AddFolderResponse( success=True, @@ -287,33 +322,30 @@ def delete_folders(request: DeleteFoldersRequest): response_model=SyncFolderResponse, responses={code: {"model": ErrorResponse} for code in [400, 404, 500]}, ) -def sync_folder(request: SyncFolderRequest): +def sync_folder(request: SyncFolderRequest, app_state=Depends(get_state)): """Sync a folder by comparing filesystem folders with database entries and removing extra DB entries.""" try: - # Step 1: Validate request - - # Step 2: Get current state from both sources + # Step 1: Get current state from both sources db_child_folders = db_get_direct_child_folders(request.folder_id) - filesystem_folders = folder_util_get_filesystem_direct_child_folders( - request.folder_path - ) + filesystem_folders = folder_util_get_filesystem_direct_child_folders(request.folder_path) - # Step 3: Compare and identify differences + # Step 2: Compare and identify differences filesystem_folder_set = set(filesystem_folders) db_folder_paths = {folder_path for folder_id, folder_path in db_child_folders} folders_to_delete = db_folder_paths - filesystem_folder_set folders_to_add = filesystem_folder_set - db_folder_paths - # Step 4: Perform synchronization operations - deleted_count, deleted_folders = folder_util_delete_obsolete_folders( - db_child_folders, folders_to_delete - ) - added_count, added_folders = folder_util_add_multiple_folder_trees( - folders_to_add, request.folder_id - ) + # Step 3: Perform synchronization operations + deleted_count, deleted_folders = folder_util_delete_obsolete_folders(db_child_folders, folders_to_delete) + added_count, added_folders_with_ids = folder_util_add_multiple_folder_trees(folders_to_add, request.folder_id) - # Step 5: Return comprehensive response + # Extract just the paths for the API response + added_folders = [folder_path for folder_id, folder_path in added_folders_with_ids] + + executor: ProcessPoolExecutor = app_state.executor + executor.submit(post_sync_folder_sequence, request.folder_path, request.folder_id, added_folders_with_ids) + # Step 4: Return comprehensive response return SyncFolderResponse( success=True, message=f"Successfully synced folder. Added {added_count} folder(s), deleted {deleted_count} folder(s)", diff --git a/backend/app/utils/folders.py b/backend/app/utils/folders.py index fbf441d23..7cf5b3d59 100644 --- a/backend/app/utils/folders.py +++ b/backend/app/utils/folders.py @@ -10,9 +10,7 @@ ) -def folder_util_add_folder_tree( - root_path, parent_folder_id=None, AI_Tagging=False, taggingCompleted=None -): +def folder_util_add_folder_tree(root_path, parent_folder_id=None, AI_Tagging=False, taggingCompleted=None): """ Recursively collect folder data and insert all folders in a single database transaction. All folders are initially inserted with NULL parent_id, which is updated after insertion. @@ -31,9 +29,7 @@ def folder_util_add_folder_tree( parent_id = parent_folder_id else: parent_path = os.path.dirname(dirpath) - parent_id = ( - folder_map[parent_path][0] if parent_path in folder_map else None - ) + parent_id = folder_map[parent_path][0] if parent_path in folder_map else None # Store both folder_id and parent_id in the map folder_map[dirpath] = (this_folder_id, parent_id) @@ -99,9 +95,7 @@ def folder_util_get_filesystem_direct_child_folders(folder_path: str) -> List[st ) -def folder_util_delete_obsolete_folders( - db_child_folders: List[Tuple[str, str]], folders_to_delete: set -) -> Tuple[int, List[str]]: +def folder_util_delete_obsolete_folders(db_child_folders: List[Tuple[str, str]], folders_to_delete: set) -> Tuple[int, List[str]]: """ Delete folders from the database that are no longer present in the filesystem. @@ -116,11 +110,7 @@ def folder_util_delete_obsolete_folders( return 0, [] # Get the folder IDs for the folders to delete - folder_ids_to_delete = [ - folder_id - for folder_id, folder_path in db_child_folders - if folder_path in folders_to_delete - ] + folder_ids_to_delete = [folder_id for folder_id, folder_path in db_child_folders if folder_path in folders_to_delete] if folder_ids_to_delete: deleted_count = db_delete_folders_batch(folder_ids_to_delete) @@ -129,9 +119,7 @@ def folder_util_delete_obsolete_folders( return 0, [] -def folder_util_add_multiple_folder_trees( - folders_to_add: set, parent_folder_id: str -) -> Tuple[int, List[str]]: +def folder_util_add_multiple_folder_trees(folders_to_add: set, parent_folder_id: str) -> Tuple[int, List[Tuple[str, str]]]: """ Add multiple folder trees with same parent to the database. @@ -140,12 +128,12 @@ def folder_util_add_multiple_folder_trees( parent_folder_id: ID of the parent folder Returns: - Tuple of (added_count, added_folders_list) + Tuple of (added_count, added_folders_list) where added_folders_list contains (folder_id, folder_path) tuples """ if not folders_to_add: return 0, [] - added_folders = [] + added_folders = [] # List of (folder_id, folder_path) tuples added_count = 0 for folder_path in folders_to_add: @@ -161,7 +149,10 @@ def folder_util_add_multiple_folder_trees( # Update parent IDs for the new folder tree db_update_parent_ids_for_subtree(folder_path, folder_map) - added_folders.append(folder_path) + # Add all folders from the folder_map as (folder_id, folder_path) tuples + for folder_path_in_map, (folder_id_in_map, _) in folder_map.items(): + added_folders.append((folder_id_in_map, folder_path_in_map)) + added_count += len(folder_map) # Count all folders in the tree except Exception as e: diff --git a/backend/app/utils/images.py b/backend/app/utils/images.py index 07db6ef44..a5463c116 100644 --- a/backend/app/utils/images.py +++ b/backend/app/utils/images.py @@ -10,128 +10,84 @@ db_get_untagged_images, db_update_image_tagged_status, db_insert_image_classes_batch, + db_get_images_by_folder_ids, + db_delete_images_by_ids, ) -from app.database.folders import db_get_folder_ids_by_path_prefix from app.models.FaceDetector import FaceDetector from app.models.ObjectClassifier import ObjectClassifier -def image_util_is_valid_image(file_path: str) -> bool: - """Check if the file is a valid image with allowed extensions.""" - # Check file extension first - allowed_extensions = {".jpg", ".jpeg", ".png"} - file_extension = Path(file_path).suffix.lower() +def image_util_process_folder_images(folder_data: List[Tuple[str, int, bool]]) -> bool: + """Main function to process images in multiple folders based on provided folder data. - if file_extension not in allowed_extensions: - return False + Args: + folder_data: List of tuples containing (folder_path, folder_id, recursive) - # Then verify it's a valid image + Returns: + bool: True if all folders processed successfully, False otherwise + """ try: - with Image.open(file_path) as img: - img.verify() - return True - except Exception: - return False + # Ensure thumbnail directory exists + os.makedirs(THUMBNAIL_IMAGES_PATH, exist_ok=True) + all_image_records = [] + all_folder_ids = [] -def image_util_generate_thumbnail( - image_path: str, thumbnail_path: str, size: Tuple[int, int] = (200, 200) -) -> bool: - """Generate thumbnail for a single image.""" - try: - with Image.open(image_path) as img: - img.thumbnail(size) + # Process each folder in the provided data + for folder_path, folder_id, recursive in folder_data: + try: + # Add folder ID to list for obsolete image cleanup + all_folder_ids.append(folder_id) - # Convert to RGB if the image has an alpha channel or is not RGB - if img.mode in ("RGBA", "P"): - img = img.convert("RGB") + # Step 1: Get all image files from current folder + image_files = image_util_get_images_from_folder(folder_path, recursive) - img.save(thumbnail_path, "JPEG") # Always save thumbnails as JPEG - return True - except Exception as e: - print(f"Error generating thumbnail for {image_path}: {e}") - return False + if not image_files: + continue # No images in this folder, continue to next + # Step 2: Create folder path mapping for this folder + folder_path_to_id = {os.path.abspath(folder_path): folder_id} -def image_util_get_images_from_folder(folder_path: str) -> List[str]: - """Get all image files from a folder.""" - image_files = [] - for root, _, files in os.walk(folder_path): - for file in files: - file_path = os.path.join(root, file) - if image_util_is_valid_image(file_path): - image_files.append(file_path) - return image_files + # Step 3: Prepare image records for this folder + folder_image_records = image_util_prepare_image_records(image_files, folder_path_to_id) + all_image_records.extend(folder_image_records) + + except Exception as e: + print(f"Error processing folder {folder_path}: {e}") + continue # Continue with other folders even if one fails + # Step 4: Remove obsolete images that no longer exist in filesystem + if all_folder_ids: + image_util_remove_obsolete_images(all_folder_ids) -def image_util_process_folder_images(root_folder: str) -> bool: - """Main function to process images in a folder and its subfolders.""" + # Step 5: Bulk insert all new records if any exist + if all_image_records: + return db_bulk_insert_images(all_image_records) + + return True # No images to process is not an error + except Exception as e: + print(f"Error processing folders: {e}") + return False + + +def image_util_process_untagged_images() -> bool: + """Process all untagged images in folders with AI tagging enabled.""" try: - # Ensure thumbnail directory exists - os.makedirs(THUMBNAIL_IMAGES_PATH, exist_ok=True) + # Step 1: Get all untagged images and whose corresponding folder has AI tagging enabled + untagged_images = db_get_untagged_images() + if not untagged_images: + return True # No untagged images to process + + # Step 2: Process each untagged image + image_util_classify_and_face_detect_images(untagged_images) - # Get all folder IDs and create a path -> id mapping - folder_ids = db_get_folder_ids_by_path_prefix(root_folder) - if not folder_ids: - return False - - # Get all image files - image_files = image_util_get_images_from_folder(root_folder) - if not image_files: - return True # No images to process is not an error - - # Create a dictionary mapping folder paths to their IDs - folder_path_to_id: Dict[str, int] = {} - for folder_id in folder_ids: - path = os.path.abspath( - folder_id[1] - ) # Assuming db_get_folder_ids_by_path_prefix returns (id, path) tuples - folder_path_to_id[path] = folder_id[0] - - # Prepare image records - image_records = [] - for image_path in image_files: - parent_folder = os.path.dirname(image_path) - - # Find the most specific folder ID by checking parent folders - folder_id = None - current_path = parent_folder - while current_path: - if current_path in folder_path_to_id: - folder_id = folder_path_to_id[current_path] - break - current_path = os.path.dirname(current_path) - - if not folder_id: - continue # Skip if no matching folder ID found - - image_id = str(uuid.uuid4()) - thumbnail_name = f"thumbnail_{image_id}.jpg" - thumbnail_path = os.path.join(THUMBNAIL_IMAGES_PATH, thumbnail_name) - - # Generate thumbnail - if image_util_generate_thumbnail(image_path, thumbnail_path): - image_records.append( - { - "id": image_id, - "path": image_path, - "folder_id": folder_id, - "thumbnailPath": thumbnail_path, - "metadata": "{}", # Empty JSON object as default - "isTagged": False, - } - ) - - # Bulk insert all records - return db_bulk_insert_images(image_records) + return True except Exception as e: - print(f"Error processing folder {root_folder}: {e}") + print(f"Error processing untagged images: {e}") return False -def image_util_classify_and_face_detect_images( - untagged_images: List[Dict[str, str]] -) -> None: +def image_util_classify_and_face_detect_images(untagged_images: List[Dict[str, str]]) -> None: """Classify untagged images and detect faces if applicable.""" object_classifier = ObjectClassifier() face_detector = FaceDetector() @@ -143,10 +99,7 @@ def image_util_classify_and_face_detect_images( # Step 1: Get classes classes = object_classifier.get_classes(image_path) - # Step 2: Update the image status in the database - db_update_image_tagged_status(image_id, True) - - # Step 3: Insert class-image pairs if classes were detected + # Step 2: Insert class-image pairs if classes were detected if len(classes) > 0: # Create image-class pairs image_class_pairs = [(image_id, class_id) for class_id in classes] @@ -155,27 +108,188 @@ def image_util_classify_and_face_detect_images( # Insert the pairs into the database db_insert_image_classes_batch(image_class_pairs) - # Step 4: Detect faces if "person" class is present + # Step 3: Detect faces if "person" class is present if classes and 0 in classes and 0 < classes.count(0) < 7: face_detector.detect_faces(image_id, image_path) + + # Step 4: Update the image status in the database + db_update_image_tagged_status(image_id, True) finally: # Ensure resources are cleaned up object_classifier.close() face_detector.close() -def image_util_process_untagged_images() -> bool: - """Process all untagged images in folders with AI tagging enabled.""" +def image_util_prepare_image_records(image_files: List[str], folder_path_to_id: Dict[str, int]) -> List[Dict]: + """ + Prepare image records with thumbnails for database insertion. + + Args: + image_files: List of image file paths + folder_path_to_id: Dictionary mapping folder paths to IDs + + Returns: + List of image record dictionaries ready for database insertion + """ + image_records = [] + for image_path in image_files: + folder_id = image_util_find_folder_id_for_image(image_path, folder_path_to_id) + + if not folder_id: + continue # Skip if no matching folder ID found + + image_id = str(uuid.uuid4()) + thumbnail_name = f"thumbnail_{image_id}.jpg" + thumbnail_path = os.path.join(THUMBNAIL_IMAGES_PATH, thumbnail_name) + + # Generate thumbnail + if image_util_generate_thumbnail(image_path, thumbnail_path): + image_records.append( + { + "id": image_id, + "path": image_path, + "folder_id": folder_id, + "thumbnailPath": thumbnail_path, + "metadata": "{}", # Empty JSON object as default + "isTagged": False, + } + ) + + return image_records + + +def image_util_get_images_from_folder(folder_path: str, recursive: bool = True) -> List[str]: + """Get all image files from a folder. + + Args: + folder_path: Path to the folder to scan + recursive: If True, scan subfolders recursively. If False, only scan direct children. + + Returns: + List of image file paths + """ + image_files = [] + + if recursive: + # Recursive scan using os.walk + for root, _, files in os.walk(folder_path): + for file in files: + file_path = os.path.join(root, file) + if image_util_is_valid_image(file_path): + image_files.append(file_path) + else: + # Non-recursive scan, only direct children + try: + for file in os.listdir(folder_path): + file_path = os.path.join(folder_path, file) + if os.path.isfile(file_path) and image_util_is_valid_image(file_path): + image_files.append(file_path) + except OSError as e: + print(f"Error reading folder {folder_path}: {e}") + + return image_files + + +def image_util_generate_thumbnail(image_path: str, thumbnail_path: str, size: Tuple[int, int] = (200, 200)) -> bool: + """Generate thumbnail for a single image.""" try: - # Step 1: Get all untagged images and whose corresponding folder has AI tagging enabled - untagged_images = db_get_untagged_images() - if not untagged_images: - return True # No untagged images to process + with Image.open(image_path) as img: + img.thumbnail(size) - # Step 2: Process each untagged image - image_util_classify_and_face_detect_images(untagged_images) + # Convert to RGB if the image has an alpha channel or is not RGB + if img.mode in ("RGBA", "P"): + img = img.convert("RGB") + img.save(thumbnail_path, "JPEG") # Always save thumbnails as JPEG return True except Exception as e: - print(f"Error processing untagged images: {e}") + print(f"Error generating thumbnail for {image_path}: {e}") + return False + + +def image_util_remove_obsolete_images(folder_id_list: List[int]) -> int: + """ + Remove obsolete images that no longer exist in the filesystem. + + Args: + folder_id_list: List of folder IDs to check for obsolete images + + Returns: + Number of obsolete images removed + """ + existing_db_images = db_get_images_by_folder_ids(folder_id_list) + + obsolete_images = [] + for image_id, image_path, thumbnail_path in existing_db_images: + if not os.path.exists(image_path): + obsolete_images.append(image_id) + # Also remove thumbnail if it exists + if thumbnail_path and os.path.exists(thumbnail_path): + try: + os.remove(thumbnail_path) + print(f"Removed obsolete thumbnail: {thumbnail_path}") + except OSError as e: + print(f"Error removing thumbnail {thumbnail_path}: {e}") + + if obsolete_images: + db_delete_images_by_ids(obsolete_images) + print(f"Removed {len(obsolete_images)} obsolete image(s) from database") + + return len(obsolete_images) + + +def image_util_create_folder_path_mapping(folder_ids: List[Tuple[int, str]]) -> Dict[str, int]: + """ + Create a dictionary mapping folder paths to their IDs. + + Args: + folder_ids: List of tuples containing (folder_id, folder_path) + + Returns: + Dictionary mapping absolute folder paths to folder IDs + """ + folder_path_to_id: Dict[str, int] = {} + for folder_id, folder_path in folder_ids: + path = os.path.abspath(folder_path) + folder_path_to_id[path] = folder_id + return folder_path_to_id + + +def image_util_find_folder_id_for_image(image_path: str, folder_path_to_id: Dict[str, int]) -> int: + """ + Find the most specific folder ID for a given image path. + + Args: + image_path: Path to the image file + folder_path_to_id: Dictionary mapping folder paths to IDs + + Returns: + Folder ID if found, None otherwise + """ + parent_folder = os.path.dirname(image_path) + + current_path = parent_folder + while current_path: + if current_path in folder_path_to_id: + return folder_path_to_id[current_path] + current_path = os.path.dirname(current_path) + + return None + + +def image_util_is_valid_image(file_path: str) -> bool: + """Check if the file is a valid image with allowed extensions.""" + # Check file extension first + allowed_extensions = {".jpg", ".jpeg", ".png"} + file_extension = Path(file_path).suffix.lower() + + if file_extension not in allowed_extensions: + return False + + # Then verify it's a valid image + try: + with Image.open(file_path) as img: + img.verify() + return True + except Exception: return False diff --git a/backend/main.py b/backend/main.py index e398699a4..3dea8acd2 100644 --- a/backend/main.py +++ b/backend/main.py @@ -32,13 +32,13 @@ async def lifespan(app: FastAPI): # Create tables and initialize systems generate_openapi_json() + db_create_folders_table() + db_create_images_table() db_create_YOLO_classes_table() db_create_clusters_table() # Create clusters table first since faces references it db_create_faces_table() - db_create_folders_table() db_create_albums_table() db_create_album_images_table() - db_create_images_table() db_create_metadata_table() # Create ProcessPoolExecutor and attach it to app.state app.state.executor = ProcessPoolExecutor(max_workers=1) @@ -58,9 +58,7 @@ async def lifespan(app: FastAPI): "name": "PictoPy Postman Collection", "url": "https://www.postman.com/cryosat-explorer-62744145/workspace/pictopy/overview", }, - servers=[ - {"url": "http://localhost:8000", "description": "Local Development server"} - ], + servers=[{"url": "http://localhost:8000", "description": "Local Development server"}], openapi_tags=[ { "name": "Albums", @@ -94,9 +92,7 @@ def generate_openapi_json(): openapi_schema["info"]["contact"] = app.contact project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - openapi_path = os.path.join( - project_root, "docs", "backend", "backend_python", "openapi.json" - ) + openapi_path = os.path.join(project_root, "docs", "backend", "backend_python", "openapi.json") os.makedirs(os.path.dirname(openapi_path), exist_ok=True) @@ -125,12 +121,8 @@ async def root(): app.include_router(folders_router, prefix="/folders", tags=["Folders"]) app.include_router(albums_router, prefix="/albums", tags=["Albums"]) -app.include_router( - face_clusters_router, prefix="/face-clusters", tags=["Face Clusters"] -) -app.include_router( - user_preferences_router, prefix="/user-preferences", tags=["User Preferences"] -) +app.include_router(face_clusters_router, prefix="/face-clusters", tags=["Face Clusters"]) +app.include_router(user_preferences_router, prefix="/user-preferences", tags=["User Preferences"]) # Entry point for running with: python3 main.py diff --git a/sync-microservice/.gitignore b/sync-microservice/.gitignore new file mode 100644 index 000000000..8e5d9d6f8 --- /dev/null +++ b/sync-microservice/.gitignore @@ -0,0 +1,142 @@ +**/*.db + +# Python +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ + +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + + +images/ + +dist/ +tests/inputs/PictoPy.thumbnails/ + +.sync-env/ \ No newline at end of file diff --git a/sync-microservice/README.md b/sync-microservice/README.md new file mode 100644 index 000000000..d146157a9 --- /dev/null +++ b/sync-microservice/README.md @@ -0,0 +1,192 @@ +# PictoPy Sync Microservice + +A file system synchronization microservice for PictoPy that monitors folder changes and keeps the database in sync with the filesystem. + +## Features + +- 🔍 **Real-time File Monitoring**: Watches all folders registered in PictoPy database +- 🗄️ **Database Integration**: Connects to main PictoPy database to get folder information +- 📊 **Health Monitoring**: Provides health check endpoints for monitoring +- ⚡ **Async Processing**: Built with FastAPI and async/await for high performance +- 🔄 **Hot Reload**: Automatically starts watching folders on startup + +## Quick Start + +### Prerequisites + +- Python 3.8+ +- PictoPy backend database should be set up and accessible +- Virtual environment (recommended) + +### Installation + +1. **Navigate to the sync microservice directory:** + + ```bash + cd sync-microservice + ``` + +2. **Create and activate virtual environment:** + + ```bash + python -m venv .sync-env + source .sync-env/bin/activate # On Windows: .sync-env\Scripts\activate + ``` + +3. **Install dependencies:** + + ```bash + pip install -r requirements.txt + ``` + +4. **Start the service:** + ```bash + uvicorn main:app --reload --port 8001 + ``` + +## API Endpoints + +### Core Endpoints + +- **`GET /`** - Service information +- **`GET /health`** - Health check with database and watcher status +- **`GET /folders`** - List all folders being watched +- **`GET /watcher/status`** - Current watcher status + +### Example Responses + +**Health Check:** + +```json +{ + "status": "healthy", + "database": "connected", + "watcher": "running" +} +``` + +**Folders List:** + +```json +{ + "total_folders": 3, + "folders": [ + { "id": "uuid-1", "path": "/path/to/folder1" }, + { "id": "uuid-2", "path": "/path/to/folder2" } + ] +} +``` + +## Architecture + +``` +sync-microservice/ +├── app/ +│ ├── config/ +│ │ └── settings.py # Configuration settings +│ ├── database/ +│ │ ├── __init__.py +│ │ └── folders.py # Database operations for folders +│ └── utils/ +│ ├── __init__.py +│ └── file_watcher.py # File watching implementation +├── main.py # FastAPI application +└── requirements.txt # Dependencies +``` + +## Configuration + +The service connects to the main PictoPy database. Update `app/config/settings.py` if needed: + +```python +DATABASE_PATH = "../backend/app/database/PictoPy.db" +``` + +## File Watching + +The service automatically: + +1. **On Startup:** + + - Connects to PictoPy database + - Retrieves all folder paths and IDs + - Starts watching existing folders + - Reports status + +2. **During Operation:** + + - Monitors file changes (add, modify, delete) + - Logs all detected changes + - Maps changes to specific folder IDs + - Prepares for future database sync operations + +3. **On Shutdown:** + - Gracefully stops file watcher + - Cleans up resources + +## Development + +### Adding New Features + +1. **Database Operations**: Add new functions to `app/database/folders.py` +2. **File Processing**: Extend `app/utils/file_watcher.py` +3. **API Endpoints**: Add routes to `main.py` + +### Current TODOs + +The file watcher currently logs changes but doesn't process them. Future enhancements: + +- Image file detection and database updates +- Thumbnail generation for new images +- AI processing trigger for new images +- Database cleanup for deleted files +- Integration with main PictoPy backend APIs + +## Monitoring + +Use the health endpoint to monitor service status: + +```bash +curl http://localhost:8001/health +``` + +## Troubleshooting + +### Common Issues + +1. **Database Connection Failed** + + - Ensure PictoPy backend database exists + - Check database path in settings.py + - Verify file permissions + +2. **No Folders to Watch** + + - Add folders to PictoPy backend first + - Check database has folder entries + - Verify folder paths exist in filesystem + +3. **Watcher Not Starting** + - Check folder permissions + - Ensure folders exist on filesystem + - Review logs for specific errors + +### Logs + +The service provides detailed console output for: + +- Startup process +- Database connections +- Folder discovery +- File change events +- Error conditions + +## Integration + +This microservice is designed to work alongside the main PictoPy backend: + +- **Port 8000**: Main PictoPy backend +- **Port 8001**: Sync microservice (this service) +- **Shared Database**: Both services use the same SQLite database + +The sync service operates independently but relies on the main backend's database structure. diff --git a/sync-microservice/app/config/settings.py b/sync-microservice/app/config/settings.py new file mode 100644 index 000000000..00d1ab6c5 --- /dev/null +++ b/sync-microservice/app/config/settings.py @@ -0,0 +1,24 @@ +# Model Exports Path +MODEL_EXPORTS_PATH = "app/models/ONNX_Exports" +PRIMARY_BACKEND_URL = "http://localhost:8000" +SYNC_MICROSERVICE_URL = "http://localhost:8001" + +# Object Detection Models: +SMALL_OBJ_DETECTION_MODEL = f"{MODEL_EXPORTS_PATH}/YOLOv11_Small.onnx" +NANO_OBJ_DETECTION_MODEL = f"{MODEL_EXPORTS_PATH}/YOLOv11_Nano.onnx" +MEDIUM_OBJ_DETECTION_MODEL = f"{MODEL_EXPORTS_PATH}/YOLOv11_Medium.onnx" + +# Face Detection Models: +SMALL_FACE_DETECTION_MODEL = f"{MODEL_EXPORTS_PATH}/YOLOv11_Small_Face.onnx" +NANO_FACE_DETECTION_MODEL = f"{MODEL_EXPORTS_PATH}/YOLOv11_Nano_Face.onnx" +MEDIUM_FACE_DETECTION_MODEL = f"{MODEL_EXPORTS_PATH}/YOLOv11_Medium_Face.onnx" + +# FaceNet Model to extract face embeddings: +DEFAULT_FACENET_MODEL = f"{MODEL_EXPORTS_PATH}/FaceNet_128D.onnx" + +TEST_INPUT_PATH = "tests/inputs" +TEST_OUTPUT_PATH = "tests/outputs" +# Point to the main PictoPy database +DATABASE_PATH = "../backend/app/database/PictoPy.db" +THUMBNAIL_IMAGES_PATH = "./images/thumbnails" +IMAGES_PATH = "./images" diff --git a/sync-microservice/app/core/__init__.py b/sync-microservice/app/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sync-microservice/app/core/lifespan.py b/sync-microservice/app/core/lifespan.py new file mode 100644 index 000000000..e646debe8 --- /dev/null +++ b/sync-microservice/app/core/lifespan.py @@ -0,0 +1,54 @@ +from contextlib import asynccontextmanager +from fastapi import FastAPI +from app.database.folders import ( + db_check_database_connection, +) +from app.utils.watcher import ( + watcher_util_start_folder_watcher, + watcher_util_stop_folder_watcher, +) + +# Global variable to track watcher status +watcher_started = False + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """ + Manage the application lifespan - startup and shutdown events. + """ + global watcher_started + + try: + # Startup + print("Starting PictoPy Sync Microservice...") + + # Check database connection + if not db_check_database_connection(): + print("Failed to connect to PictoPy database") + print( + "Make sure the main PictoPy backend is set up and the database exists" + ) + raise RuntimeError("Database connection failed") + + print("Database connection successful") + + watcher_started = watcher_util_start_folder_watcher() + + print("Sync microservice is ready!") + + yield + + except KeyboardInterrupt: + print("\nReceived keyboard interrupt (Ctrl+C)") + print("Initiating graceful shutdown...") + except Exception as e: + print(f"Unexpected error during startup: {e}") + raise + finally: + # Shutdown + print("Shutting down sync microservice...") + if watcher_started: + watcher_util_stop_folder_watcher() + print("Folder watcher stopped") + print("Shutdown complete") diff --git a/sync-microservice/app/database/__init__.py b/sync-microservice/app/database/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sync-microservice/app/database/folders.py b/sync-microservice/app/database/folders.py new file mode 100644 index 000000000..f6633b6e9 --- /dev/null +++ b/sync-microservice/app/database/folders.py @@ -0,0 +1,117 @@ +import sqlite3 +from typing import List, Tuple, NamedTuple +from app.config.settings import DATABASE_PATH + +# Type definitions +FolderId = str +FolderPath = str +FolderIdPath = Tuple[FolderId, str] + + +class FolderTaggingInfo(NamedTuple): + """Represents folder tagging information""" + + folder_id: FolderId + folder_path: FolderPath + tagging_percentage: float + + +def db_get_all_folders_with_ids() -> List[FolderIdPath]: + """ + Get all folders from the database with their IDs and paths. + + Returns: + List of tuples containing (folder_id, folder_path) + """ + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + try: + cursor.execute( + """ + SELECT folder_id, folder_path FROM folders + ORDER BY folder_path + """ + ) + return cursor.fetchall() + except Exception as e: + print(f"Error getting folders from database: {e}") + return [] + finally: + conn.close() + + +def db_check_database_connection() -> bool: + """ + Check if the database connection is working and the folders table exists. + + Returns: + True if connection is successful and table exists, False otherwise + """ + try: + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + # Check if folders table exists + cursor.execute( + """ + SELECT name FROM sqlite_master + WHERE type='table' AND name='folders' + """ + ) + result = cursor.fetchone() + conn.close() + + return result is not None + except Exception as e: + print(f"Database connection error: {e}") + return False + + +def db_get_tagging_progress() -> List[FolderTaggingInfo]: + """ + Calculate tagging percentage for all folders. + Tagging percentage = (tagged images / total images) * 100 + + Returns: + List of FolderTaggingInfo containing folder_id, folder_path, and tagging_percentage + """ + conn = sqlite3.connect(DATABASE_PATH) + cursor = conn.cursor() + + try: + cursor.execute( + """ + SELECT + f.folder_id, + f.folder_path, + COUNT(i.id) as total_images, + COUNT(CASE WHEN i.isTagged = 1 THEN 1 END) as tagged_images + FROM folders f + LEFT JOIN images i ON f.folder_id = i.folder_id + GROUP BY f.folder_id, f.folder_path + """ + ) + + results = cursor.fetchall() + + folder_info_list = [] + for folder_id, folder_path, total_images, tagged_images in results: + # Calculate percentage, handle division by zero + if total_images > 0: + tagging_percentage = (tagged_images / total_images) * 100 + else: + tagging_percentage = 0.0 + + folder_info_list.append( + FolderTaggingInfo( + folder_id=folder_id, + folder_path=folder_path, + tagging_percentage=round(tagging_percentage, 2), + ) + ) + + return folder_info_list + + finally: + conn.close() diff --git a/sync-microservice/app/routes/__init__.py b/sync-microservice/app/routes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sync-microservice/app/routes/folders.py b/sync-microservice/app/routes/folders.py new file mode 100644 index 000000000..4f7f8c847 --- /dev/null +++ b/sync-microservice/app/routes/folders.py @@ -0,0 +1,50 @@ +from fastapi import APIRouter +from typing import Union +from app.database.folders import db_get_tagging_progress +from app.schemas.folders import ( + FolderTaggingStatusSuccessResponse, + FolderTaggingStatusErrorResponse, + FolderTaggingInfo, +) + +router = APIRouter(prefix="/folders", tags=["folders"]) + + +@router.get( + "/status", + response_model=Union[ + FolderTaggingStatusSuccessResponse, FolderTaggingStatusErrorResponse + ], +) +def get_folders_tagging_status(): + """ + Get tagging progress for all folders. + + Returns: + List of folders with their tagging progress information including: + - folder_id: Unique identifier for the folder + - folder_path: Path to the folder + - tagging_percentage: Percentage of images that have been tagged (0-100) + """ + try: + tagging_progress = db_get_tagging_progress() + + folder_info_list = [ + FolderTaggingInfo( + folder_id=folder.folder_id, + folder_path=folder.folder_path, + tagging_percentage=folder.tagging_percentage, + ) + for folder in tagging_progress + ] + + return FolderTaggingStatusSuccessResponse( + status="success", + data=folder_info_list, + total_folders=len(tagging_progress), + ) + except Exception as e: + return FolderTaggingStatusErrorResponse( + status="error", + message=f"Failed to retrieve tagging progress: {str(e)}", + ) diff --git a/sync-microservice/app/routes/health.py b/sync-microservice/app/routes/health.py new file mode 100644 index 000000000..ad9039ec0 --- /dev/null +++ b/sync-microservice/app/routes/health.py @@ -0,0 +1,17 @@ +from fastapi import APIRouter +from app.database.folders import db_check_database_connection +from app.utils.watcher import watcher_util_is_watcher_running +from app.schemas.health import HealthCheckResponse + +router = APIRouter(tags=["health"]) + + +@router.get("/health", response_model=HealthCheckResponse) +async def health_check(): + """Health check endpoint.""" + db_status = db_check_database_connection() + return HealthCheckResponse( + status="healthy" if db_status else "unhealthy", + database="connected" if db_status else "disconnected", + watcher="running" if watcher_util_is_watcher_running() else "stopped", + ) diff --git a/sync-microservice/app/routes/watcher.py b/sync-microservice/app/routes/watcher.py new file mode 100644 index 000000000..0d1823116 --- /dev/null +++ b/sync-microservice/app/routes/watcher.py @@ -0,0 +1,85 @@ +from fastapi import APIRouter, HTTPException +from app.utils.watcher import ( + watcher_util_start_folder_watcher, + watcher_util_stop_folder_watcher, + watcher_util_restart_folder_watcher, + watcher_util_is_watcher_running, + watcher_util_get_watcher_info, +) +from app.schemas.watcher import ( + WatcherStatusResponse, + WatcherControlResponse, +) + +router = APIRouter(prefix="/watcher", tags=["watcher"]) + + +@router.get("/status", response_model=WatcherStatusResponse) +async def get_watcher_status(): + """Get folder watcher status.""" + return WatcherStatusResponse(**watcher_util_get_watcher_info()) + + +@router.post("/restart", response_model=WatcherControlResponse) +async def restart_watcher(): + """Restart the folder watcher with fresh data from database.""" + try: + success = watcher_util_restart_folder_watcher() + if success: + return WatcherControlResponse( + success=True, + message="Folder watcher restarted successfully", + watcher_info=WatcherStatusResponse(**watcher_util_get_watcher_info()), + ) + else: + return WatcherControlResponse( + success=False, + message="Failed to restart folder watcher", + watcher_info=WatcherStatusResponse(**watcher_util_get_watcher_info()), + ) + except Exception as e: + raise HTTPException( + status_code=500, detail=f"Error restarting watcher: {str(e)}" + ) + + +@router.post("/stop", response_model=WatcherControlResponse) +async def stop_watcher(): + """Stop the folder watcher.""" + try: + watcher_util_stop_folder_watcher() + return WatcherControlResponse( + success=True, + message="Folder watcher stopped", + watcher_info=WatcherStatusResponse(**watcher_util_get_watcher_info()), + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error stopping watcher: {str(e)}") + + +@router.post("/start", response_model=WatcherControlResponse) +async def start_watcher(): + """Start the folder watcher.""" + try: + if watcher_util_is_watcher_running(): + return WatcherControlResponse( + success=False, + message="Watcher is already running", + watcher_info=WatcherStatusResponse(**watcher_util_get_watcher_info()), + ) + + success = watcher_util_start_folder_watcher() + if success: + return WatcherControlResponse( + success=True, + message="Folder watcher started successfully", + watcher_info=WatcherStatusResponse(**watcher_util_get_watcher_info()), + ) + else: + return WatcherControlResponse( + success=False, + message="Failed to start folder watcher", + watcher_info=WatcherStatusResponse(**watcher_util_get_watcher_info()), + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error starting watcher: {str(e)}") diff --git a/sync-microservice/app/schemas/__init__.py b/sync-microservice/app/schemas/__init__.py new file mode 100644 index 000000000..f9b5a1f33 --- /dev/null +++ b/sync-microservice/app/schemas/__init__.py @@ -0,0 +1 @@ +"""Schemas package for sync-microservice API responses and requests.""" diff --git a/sync-microservice/app/schemas/folders.py b/sync-microservice/app/schemas/folders.py new file mode 100644 index 000000000..403a201c8 --- /dev/null +++ b/sync-microservice/app/schemas/folders.py @@ -0,0 +1,36 @@ +from pydantic import BaseModel, Field +from typing import List, Literal + + +class FolderTaggingInfo(BaseModel): + """Individual folder tagging information schema.""" + + folder_id: str = Field(..., description="Unique identifier for the folder") + folder_path: str = Field(..., description="Path to the folder") + tagging_percentage: float = Field( + ..., + ge=0, + le=100, + description="Percentage of images that have been tagged (0-100)", + ) + + +class FolderTaggingStatusSuccessResponse(BaseModel): + """Success response schema for folder tagging status.""" + + status: Literal["success"] + data: List[FolderTaggingInfo] + total_folders: int = Field(..., ge=0, description="Total number of folders") + + +class FolderTaggingStatusErrorResponse(BaseModel): + """Error response schema for folder tagging status.""" + + status: Literal["error"] + message: str = Field(..., description="Error message describing what went wrong") + data: List[FolderTaggingInfo] = Field( + default_factory=list, description="Empty list on error" + ) + total_folders: int = Field( + default=0, description="Total number of folders (0 on error)" + ) diff --git a/sync-microservice/app/schemas/health.py b/sync-microservice/app/schemas/health.py new file mode 100644 index 000000000..c13971166 --- /dev/null +++ b/sync-microservice/app/schemas/health.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel +from typing import Literal + + +class HealthCheckResponse(BaseModel): + """Health check endpoint response schema.""" + + status: Literal["healthy", "unhealthy"] + database: Literal["connected", "disconnected"] + watcher: Literal["running", "stopped"] diff --git a/sync-microservice/app/schemas/watcher.py b/sync-microservice/app/schemas/watcher.py new file mode 100644 index 000000000..7c5c5f6a8 --- /dev/null +++ b/sync-microservice/app/schemas/watcher.py @@ -0,0 +1,33 @@ +from pydantic import BaseModel +from typing import List, Optional + + +class WatchedFolder(BaseModel): + """Schema for a watched folder.""" + + id: str + path: str + + +class WatcherStatusResponse(BaseModel): + """Watcher status endpoint response schema.""" + + is_running: bool + folders_count: int + thread_alive: bool + thread_id: Optional[int] + watched_folders: List[WatchedFolder] + + +class WatcherControlResponse(BaseModel): + """Schema for watcher control operations (start/stop/restart).""" + + success: bool + message: str + watcher_info: WatcherStatusResponse + + +class WatcherErrorResponse(BaseModel): + """Schema for watcher error responses.""" + + detail: str diff --git a/sync-microservice/app/utils/__init__.py b/sync-microservice/app/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/sync-microservice/app/utils/file_watcher.py b/sync-microservice/app/utils/file_watcher.py new file mode 100644 index 000000000..e69de29bb diff --git a/sync-microservice/app/utils/watcher.py b/sync-microservice/app/utils/watcher.py new file mode 100644 index 000000000..b71b53755 --- /dev/null +++ b/sync-microservice/app/utils/watcher.py @@ -0,0 +1,362 @@ +import os +import threading +import time +from typing import List, Tuple, Dict, Optional +from watchfiles import watch, Change +import httpx +from app.database.folders import db_get_all_folders_with_ids +from app.config.settings import PRIMARY_BACKEND_URL + +FolderIdPath = Tuple[str, str] + +# Global variables to track watcher state +watcher_thread: Optional[threading.Thread] = None +stop_event = threading.Event() +watched_folders: List[FolderIdPath] = [] +folder_id_map: Dict[str, str] = {} + + +def watcher_util_get_folder_id_if_watched(file_path: str) -> Optional[str]: + """ + Check if the given file path is one of our watched folders. + + Args: + file_path: Path to check + + Returns: + Folder ID if the path is a watched folder, None otherwise + """ + # Normalize the file path + normalized_path = os.path.abspath(file_path) + + # Check if this path matches any of our watched folders + for folder_id, folder_path in watched_folders: + if os.path.abspath(folder_path) == normalized_path: + return folder_id + + return None + + +def watcher_util_handle_file_changes(changes: set) -> None: + """ + Handle file changes detected by watchfiles. + + Args: + changes: Set of (change_type, file_path) tuples + """ + deleted_folder_ids = [] + + for change, file_path in changes: + print(f"File change detected: {change} - {file_path}") + + if change == Change.deleted: + deleted_folder_id = watcher_util_get_folder_id_if_watched(file_path) + if deleted_folder_id: + print( + f" Watched folder deleted: {file_path} (ID: {deleted_folder_id})" + ) + deleted_folder_ids.append(deleted_folder_id) + else: + closest_folder = watcher_util_find_closest_parent_folder( + file_path, watched_folders + ) + + if closest_folder: + folder_id, folder_path = closest_folder + print(f" Closest parent folder: {folder_path} (ID: {folder_id})") + + watcher_util_call_sync_folder_api(folder_id, folder_path) + else: + print(f" No watched parent folder found for: {file_path}") + + # If any watched folders were deleted, call the delete API + if deleted_folder_ids: + print(f"Calling delete API for {len(deleted_folder_ids)} deleted folders") + watcher_util_call_delete_folders_api(deleted_folder_ids) + watcher_util_restart_folder_watcher() + + +def watcher_util_find_closest_parent_folder( + file_path: str, watched_folders: List[FolderIdPath] +) -> Optional[Tuple[str, str]]: + """ + Find the closest parent folder for a given file path from the watched folders. + + Args: + file_path: Path to the file that changed + watched_folders: List of (folder_id, folder_path) tuples + + Returns: + Tuple of (folder_id, folder_path) if found, None otherwise + """ + # Normalize the file path + file_path = os.path.abspath(file_path) + + best_match = None + longest_match_length = 0 + + for folder_id, folder_path in watched_folders: + # Normalize the folder path + folder_path = os.path.abspath(folder_path) + + # Check if this folder is a parent of the file + if file_path.startswith(folder_path): + # Ensure it's a proper parent (not just a prefix) + if file_path == folder_path or file_path[len(folder_path)] == os.sep: + # Choose the longest matching path (closest parent) + if len(folder_path) > longest_match_length: + longest_match_length = len(folder_path) + best_match = (folder_id, folder_path) + print("best match: ", best_match) + + return best_match + + +def watcher_util_call_sync_folder_api(folder_id: str, folder_path: str) -> None: + """ + Call the primary backend's sync-folder API endpoint. + + Args: + folder_id: ID of the folder to sync + folder_path: Path of the folder to sync + """ + try: + url = f"{PRIMARY_BACKEND_URL}/folders/sync-folder" + payload = {"folder_path": folder_path, "folder_id": folder_id} + + with httpx.Client(timeout=30.0) as client: + response = client.request("POST", url, json=payload) + + if response.status_code == 200: + print(f"Successfully synced folder {folder_path} (ID: {folder_id})") + else: + print( + f"Failed to sync folder {folder_path}. Status: {response.status_code}, Response: {response.text}" + ) + + except httpx.RequestError as e: + print(f"Network error while syncing folder {folder_path}: {e}") + except Exception as e: + print(f"Unexpected error while syncing folder {folder_path}: {e}") + + +def watcher_util_call_delete_folders_api(folder_ids: List[str]) -> None: + """ + Call the primary backend's delete-folders API endpoint. + + Args: + folder_ids: List of folder IDs to delete + """ + try: + url = f"{PRIMARY_BACKEND_URL}/folders/delete-folders" + payload = {"folder_ids": folder_ids} + + with httpx.Client(timeout=30.0) as client: + response = client.request("DELETE", url, json=payload) + + if response.status_code == 200: + print(f"Successfully deleted folders with IDs: {folder_ids}") + else: + print( + f"Failed to delete folders. Status: {response.status_code}, Response: {response.text}" + ) + + except httpx.RequestError as e: + print(f"Network error while deleting folders {folder_ids}: {e}") + except Exception as e: + print(f"Unexpected error while deleting folders {folder_ids}: {e}") + + +def watcher_util_watcher_worker(folder_paths: List[str]) -> None: + """ + Worker function that runs the file watcher in a background thread. + + Args: + folder_paths: List of folder paths to watch + """ + try: + print(f"Starting watcher for {len(folder_paths)} folders") + for changes in watch(*folder_paths, stop_event=stop_event, recursive=False): + if stop_event.is_set(): + print("Stop event detected in watcher loop") + break + watcher_util_handle_file_changes(changes) + except Exception as e: + print(f"Error in watcher worker: {e}") + finally: + print("Watcher stopped") + + +def watcher_util_get_existing_folders( + folders: List[FolderIdPath], +) -> List[FolderIdPath]: + """ + Filter folders to only include those that exist in the filesystem. + + Args: + folders: List of (folder_id, folder_path) tuples + + Returns: + List of existing folders + """ + existing_folders = [] + for folder_id, folder_path in folders: + if os.path.exists(folder_path) and os.path.isdir(folder_path): + existing_folders.append((folder_id, folder_path)) + else: + print(f"Warning: Folder does not exist: {folder_path}") + return existing_folders + + +def watcher_util_is_watcher_running() -> bool: + """Check if the watcher thread is running.""" + return watcher_thread is not None and watcher_thread.is_alive() + + +def watcher_util_start_folder_watcher() -> bool: + """ + Initialize and start the folder watcher with folders from the database. + + Returns: + True if watcher started successfully, False otherwise + """ + global watcher_thread, watched_folders, folder_id_map + + if watcher_util_is_watcher_running(): + print("Watcher is already running.") + return False + + print("Initializing folder watcher...") + + try: + # Simple synchronous database call + folders = db_get_all_folders_with_ids() + if not folders: + print("No folders found in database") + return False + + print(f"Found {len(folders)} folders in database") + + # Simple synchronous file system checks + existing_folders = watcher_util_get_existing_folders(folders) + if not existing_folders: + print("No existing folders to watch") + return False + + watched_folders = existing_folders + folder_id_map = { + folder_path: folder_id for folder_id, folder_path in existing_folders + } + + folder_paths = [folder_path for _, folder_path in existing_folders] + + print(f"Starting to watch {len(folder_paths)} folders:") + for folder_id, folder_path in existing_folders: + print(f" - {folder_path} (ID: {folder_id})") + + # Reset stop event and start background thread + stop_event.clear() + watcher_thread = threading.Thread( + target=watcher_util_watcher_worker, + args=(folder_paths,), + daemon=True, # Dies when main program exits + ) + watcher_thread.start() + + print("Folder watcher started successfully") + return True + + except Exception as e: + print(f"Error starting folder watcher: {e}") + return False + + +def watcher_util_stop_folder_watcher() -> None: + """Stop the folder watcher.""" + global watcher_thread, watched_folders, folder_id_map + + if not watcher_util_is_watcher_running(): + print("Watcher is not running") + return + + try: + print("Stopping folder watcher...") + + # Signal the watcher to stop + stop_event.set() + + # Wait for thread to finish + watcher_thread.join(timeout=5.0) + + if watcher_thread.is_alive(): + print("Warning: Watcher thread did not stop gracefully") + else: + print("Watcher stopped successfully") + + except Exception as e: + print(f"Error stopping watcher: {e}") + finally: + watcher_thread = None + # Clear state + watched_folders = [] + folder_id_map = {} + + +def watcher_util_restart_folder_watcher() -> bool: + """ + Restart the folder watcher by stopping the current one and starting fresh. + + Returns: + True if restart was successful, False otherwise + """ + print("Restarting folder watcher...") + watcher_util_stop_folder_watcher() + return watcher_util_start_folder_watcher() + + +def watcher_util_get_watcher_info() -> dict: + """Get information about the current watcher state.""" + return { + "is_running": watcher_util_is_watcher_running(), + "folders_count": len(watched_folders), + "thread_alive": watcher_thread.is_alive() if watcher_thread else False, + "thread_id": watcher_thread.ident if watcher_thread else None, + "watched_folders": [ + {"id": folder_id, "path": folder_path} + for folder_id, folder_path in watched_folders + ], + } + + +def watcher_util_wait_for_watcher() -> None: + """ + Wait for the watcher to finish (useful for keeping the program running). + """ + if watcher_thread and watcher_thread.is_alive(): + try: + watcher_thread.join() # Wait indefinitely + except KeyboardInterrupt: + print("Interrupted by user") + watcher_util_stop_folder_watcher() + else: + print("No watcher thread to wait for") + + +# Simple usage examples +def main(): + """Simple example of how to use the folder watcher.""" + print("Starting folder watcher example...") + + success = watcher_util_start_folder_watcher() + if success: + print("Watcher started, will run for 10 seconds...") + try: + time.sleep(10) # Just sleep - no async complexity! + except KeyboardInterrupt: + print("Interrupted by user") + finally: + watcher_util_stop_folder_watcher() + else: + print("Failed to start watcher") + + print("Example finished") diff --git a/sync-microservice/main.py b/sync-microservice/main.py new file mode 100644 index 000000000..5f5477c50 --- /dev/null +++ b/sync-microservice/main.py @@ -0,0 +1,16 @@ +from fastapi import FastAPI +from app.core.lifespan import lifespan +from app.routes import health, watcher, folders + +# Create FastAPI app with lifespan management +app = FastAPI( + title="PictoPy Sync Microservice", + description="File system synchronization service for PictoPy", + version="1.0.0", + lifespan=lifespan, +) + +# Include route modules +app.include_router(health.router, prefix="/api/v1") +app.include_router(watcher.router, prefix="/api/v1") +app.include_router(folders.router, prefix="/api/v1") diff --git a/sync-microservice/requirements.txt b/sync-microservice/requirements.txt new file mode 100644 index 000000000..a3be28f17 --- /dev/null +++ b/sync-microservice/requirements.txt @@ -0,0 +1,40 @@ +annotated-types==0.7.0 +anyio==4.10.0 +certifi==2025.8.3 +click==8.2.1 +dnspython==2.7.0 +email_validator==2.2.0 +fastapi==0.116.1 +fastapi-cli==0.0.8 +fastapi-cloud-cli==0.1.5 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.6.4 +httpx==0.28.1 +idna==3.10 +Jinja2==3.1.6 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +mdurl==0.1.2 +pip==25.0.1 +pydantic==2.11.7 +pydantic_core==2.33.2 +Pygments==2.19.2 +python-dotenv==1.1.1 +python-multipart==0.0.20 +PyYAML==6.0.2 +rich==14.1.0 +rich-toolkit==0.14.9 +rignore==0.6.4 +sentry-sdk==2.34.1 +shellingham==1.5.4 +sniffio==1.3.1 +starlette==0.47.2 +typer==0.16.0 +typing_extensions==4.14.1 +typing-inspection==0.4.1 +urllib3==2.5.0 +uvicorn==0.35.0 +uvloop==0.21.0 +watchfiles==1.1.0 +websockets==15.0.1 \ No newline at end of file From ba8cf74b9711dde7ebee97836f48c6584bc7a900 Mon Sep 17 00:00:00 2001 From: rahulharpal1603 Date: Wed, 20 Aug 2025 15:17:16 +0530 Subject: [PATCH 2/2] Fix Lint errors --- backend/app/database/folders.py | 28 +++++++++++++++++++++------- backend/app/database/images.py | 4 +++- backend/app/routes/folders.py | 31 ++++++++++++++++++++++++------- backend/app/utils/folders.py | 22 +++++++++++++++++----- backend/app/utils/images.py | 28 +++++++++++++++++++++------- backend/main.py | 16 ++++++++++++---- sync-microservice/pyproject.toml | 2 ++ 7 files changed, 100 insertions(+), 31 deletions(-) create mode 100644 sync-microservice/pyproject.toml diff --git a/backend/app/database/folders.py b/backend/app/database/folders.py index 6a617d23c..150ce764c 100644 --- a/backend/app/database/folders.py +++ b/backend/app/database/folders.py @@ -180,7 +180,9 @@ def db_delete_folder(folder_path: FolderPath) -> None: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() abs_folder_path = os.path.abspath(folder_path) - cursor.execute("PRAGMA foreign_keys = ON;") # Important for deleting rows in image_id_mapping and images table because they reference this folder_id + cursor.execute( + "PRAGMA foreign_keys = ON;" + ) # Important for deleting rows in image_id_mapping and images table because they reference this folder_id conn.commit() cursor.execute( "SELECT folder_id FROM folders WHERE folder_path = ?", @@ -190,7 +192,9 @@ def db_delete_folder(folder_path: FolderPath) -> None: if not existing_folder: conn.close() - raise ValueError(f"Error: Folder '{folder_path}' does not exist in the database.") + raise ValueError( + f"Error: Folder '{folder_path}' does not exist in the database." + ) cursor.execute( "DELETE FROM folders WHERE folder_path = ?", @@ -201,7 +205,9 @@ def db_delete_folder(folder_path: FolderPath) -> None: conn.close() -def db_update_parent_ids_for_subtree(root_folder_path: FolderPath, folder_map: FolderMap) -> None: +def db_update_parent_ids_for_subtree( + root_folder_path: FolderPath, folder_map: FolderMap +) -> None: """ Update parent_folder_id for all folders in the subtree rooted at root_folder_path. Only updates folders whose parent_folder_id is NULL. @@ -234,7 +240,9 @@ def db_folder_exists(folder_path: FolderPath) -> bool: cursor = conn.cursor() try: abs_path = os.path.abspath(folder_path) - cursor.execute("SELECT folder_id FROM folders WHERE folder_path = ?", (abs_path,)) + cursor.execute( + "SELECT folder_id FROM folders WHERE folder_path = ?", (abs_path,) + ) result = cursor.fetchone() return bool(result) finally: @@ -253,14 +261,18 @@ def db_find_parent_folder_id(folder_path: FolderPath) -> Optional[FolderId]: conn = sqlite3.connect(DATABASE_PATH) cursor = conn.cursor() try: - cursor.execute("SELECT folder_id FROM folders WHERE folder_path = ?", (parent_path,)) + cursor.execute( + "SELECT folder_id FROM folders WHERE folder_path = ?", (parent_path,) + ) result = cursor.fetchone() return result[0] if result else None finally: conn.close() -def db_update_ai_tagging_batch(folder_ids: List[FolderId], ai_tagging_enabled: bool) -> int: +def db_update_ai_tagging_batch( + folder_ids: List[FolderId], ai_tagging_enabled: bool +) -> int: """ Update AI_Tagging status for multiple folders in a single transaction. folder_ids: list of folder IDs to update @@ -330,7 +342,9 @@ def db_get_folder_ids_by_path_prefix(root_path: str) -> List[FolderIdPath]: conn.close() -def db_get_folder_ids_by_paths(folder_paths: List[FolderPath]) -> Dict[FolderPath, FolderId]: +def db_get_folder_ids_by_paths( + folder_paths: List[FolderPath], +) -> Dict[FolderPath, FolderId]: """ Get folder IDs for multiple folder paths in a single database query. diff --git a/backend/app/database/images.py b/backend/app/database/images.py index 76ea83f9d..15466753c 100644 --- a/backend/app/database/images.py +++ b/backend/app/database/images.py @@ -197,7 +197,9 @@ def db_insert_image_classes_batch(image_class_pairs: List[ImageClassPair]) -> bo conn.close() -def db_get_images_by_folder_ids(folder_ids: List[int]) -> List[Tuple[ImageId, ImagePath, str]]: +def db_get_images_by_folder_ids( + folder_ids: List[int], +) -> List[Tuple[ImageId, ImagePath, str]]: """ Get all images that belong to the specified folder IDs. diff --git a/backend/app/routes/folders.py b/backend/app/routes/folders.py index 58e6551dd..9ffe9ec86 100644 --- a/backend/app/routes/folders.py +++ b/backend/app/routes/folders.py @@ -79,7 +79,9 @@ def post_AI_tagging_enabled_sequence(): return True -def post_sync_folder_sequence(folder_path: str, folder_id: int, added_folders: List[Tuple[str, str]]): +def post_sync_folder_sequence( + folder_path: str, folder_id: int, added_folders: List[Tuple[str, str]] +): """ Post-sync sequence for a folder. This function is called after a folder is synced. @@ -119,7 +121,9 @@ def add_folder(request: AddFolderRequest, app_state=Depends(get_state)): # Step 1: Data Validation if not os.path.isdir(request.folder_path): - raise ValueError(f"Error: '{request.folder_path}' is not a valid directory.") + raise ValueError( + f"Error: '{request.folder_path}' is not a valid directory." + ) if ( not os.access(request.folder_path, os.R_OK) @@ -327,7 +331,9 @@ def sync_folder(request: SyncFolderRequest, app_state=Depends(get_state)): try: # Step 1: Get current state from both sources db_child_folders = db_get_direct_child_folders(request.folder_id) - filesystem_folders = folder_util_get_filesystem_direct_child_folders(request.folder_path) + filesystem_folders = folder_util_get_filesystem_direct_child_folders( + request.folder_path + ) # Step 2: Compare and identify differences filesystem_folder_set = set(filesystem_folders) @@ -337,14 +343,25 @@ def sync_folder(request: SyncFolderRequest, app_state=Depends(get_state)): folders_to_add = filesystem_folder_set - db_folder_paths # Step 3: Perform synchronization operations - deleted_count, deleted_folders = folder_util_delete_obsolete_folders(db_child_folders, folders_to_delete) - added_count, added_folders_with_ids = folder_util_add_multiple_folder_trees(folders_to_add, request.folder_id) + deleted_count, deleted_folders = folder_util_delete_obsolete_folders( + db_child_folders, folders_to_delete + ) + added_count, added_folders_with_ids = folder_util_add_multiple_folder_trees( + folders_to_add, request.folder_id + ) # Extract just the paths for the API response - added_folders = [folder_path for folder_id, folder_path in added_folders_with_ids] + added_folders = [ + folder_path for folder_id, folder_path in added_folders_with_ids + ] executor: ProcessPoolExecutor = app_state.executor - executor.submit(post_sync_folder_sequence, request.folder_path, request.folder_id, added_folders_with_ids) + executor.submit( + post_sync_folder_sequence, + request.folder_path, + request.folder_id, + added_folders_with_ids, + ) # Step 4: Return comprehensive response return SyncFolderResponse( success=True, diff --git a/backend/app/utils/folders.py b/backend/app/utils/folders.py index 7cf5b3d59..d440abaaa 100644 --- a/backend/app/utils/folders.py +++ b/backend/app/utils/folders.py @@ -10,7 +10,9 @@ ) -def folder_util_add_folder_tree(root_path, parent_folder_id=None, AI_Tagging=False, taggingCompleted=None): +def folder_util_add_folder_tree( + root_path, parent_folder_id=None, AI_Tagging=False, taggingCompleted=None +): """ Recursively collect folder data and insert all folders in a single database transaction. All folders are initially inserted with NULL parent_id, which is updated after insertion. @@ -29,7 +31,9 @@ def folder_util_add_folder_tree(root_path, parent_folder_id=None, AI_Tagging=Fal parent_id = parent_folder_id else: parent_path = os.path.dirname(dirpath) - parent_id = folder_map[parent_path][0] if parent_path in folder_map else None + parent_id = ( + folder_map[parent_path][0] if parent_path in folder_map else None + ) # Store both folder_id and parent_id in the map folder_map[dirpath] = (this_folder_id, parent_id) @@ -95,7 +99,9 @@ def folder_util_get_filesystem_direct_child_folders(folder_path: str) -> List[st ) -def folder_util_delete_obsolete_folders(db_child_folders: List[Tuple[str, str]], folders_to_delete: set) -> Tuple[int, List[str]]: +def folder_util_delete_obsolete_folders( + db_child_folders: List[Tuple[str, str]], folders_to_delete: set +) -> Tuple[int, List[str]]: """ Delete folders from the database that are no longer present in the filesystem. @@ -110,7 +116,11 @@ def folder_util_delete_obsolete_folders(db_child_folders: List[Tuple[str, str]], return 0, [] # Get the folder IDs for the folders to delete - folder_ids_to_delete = [folder_id for folder_id, folder_path in db_child_folders if folder_path in folders_to_delete] + folder_ids_to_delete = [ + folder_id + for folder_id, folder_path in db_child_folders + if folder_path in folders_to_delete + ] if folder_ids_to_delete: deleted_count = db_delete_folders_batch(folder_ids_to_delete) @@ -119,7 +129,9 @@ def folder_util_delete_obsolete_folders(db_child_folders: List[Tuple[str, str]], return 0, [] -def folder_util_add_multiple_folder_trees(folders_to_add: set, parent_folder_id: str) -> Tuple[int, List[Tuple[str, str]]]: +def folder_util_add_multiple_folder_trees( + folders_to_add: set, parent_folder_id: str +) -> Tuple[int, List[Tuple[str, str]]]: """ Add multiple folder trees with same parent to the database. diff --git a/backend/app/utils/images.py b/backend/app/utils/images.py index a5463c116..76eea5d7d 100644 --- a/backend/app/utils/images.py +++ b/backend/app/utils/images.py @@ -49,7 +49,9 @@ def image_util_process_folder_images(folder_data: List[Tuple[str, int, bool]]) - folder_path_to_id = {os.path.abspath(folder_path): folder_id} # Step 3: Prepare image records for this folder - folder_image_records = image_util_prepare_image_records(image_files, folder_path_to_id) + folder_image_records = image_util_prepare_image_records( + image_files, folder_path_to_id + ) all_image_records.extend(folder_image_records) except Exception as e: @@ -87,7 +89,9 @@ def image_util_process_untagged_images() -> bool: return False -def image_util_classify_and_face_detect_images(untagged_images: List[Dict[str, str]]) -> None: +def image_util_classify_and_face_detect_images( + untagged_images: List[Dict[str, str]] +) -> None: """Classify untagged images and detect faces if applicable.""" object_classifier = ObjectClassifier() face_detector = FaceDetector() @@ -120,7 +124,9 @@ def image_util_classify_and_face_detect_images(untagged_images: List[Dict[str, s face_detector.close() -def image_util_prepare_image_records(image_files: List[str], folder_path_to_id: Dict[str, int]) -> List[Dict]: +def image_util_prepare_image_records( + image_files: List[str], folder_path_to_id: Dict[str, int] +) -> List[Dict]: """ Prepare image records with thumbnails for database insertion. @@ -158,7 +164,9 @@ def image_util_prepare_image_records(image_files: List[str], folder_path_to_id: return image_records -def image_util_get_images_from_folder(folder_path: str, recursive: bool = True) -> List[str]: +def image_util_get_images_from_folder( + folder_path: str, recursive: bool = True +) -> List[str]: """Get all image files from a folder. Args: @@ -190,7 +198,9 @@ def image_util_get_images_from_folder(folder_path: str, recursive: bool = True) return image_files -def image_util_generate_thumbnail(image_path: str, thumbnail_path: str, size: Tuple[int, int] = (200, 200)) -> bool: +def image_util_generate_thumbnail( + image_path: str, thumbnail_path: str, size: Tuple[int, int] = (200, 200) +) -> bool: """Generate thumbnail for a single image.""" try: with Image.open(image_path) as img: @@ -238,7 +248,9 @@ def image_util_remove_obsolete_images(folder_id_list: List[int]) -> int: return len(obsolete_images) -def image_util_create_folder_path_mapping(folder_ids: List[Tuple[int, str]]) -> Dict[str, int]: +def image_util_create_folder_path_mapping( + folder_ids: List[Tuple[int, str]] +) -> Dict[str, int]: """ Create a dictionary mapping folder paths to their IDs. @@ -255,7 +267,9 @@ def image_util_create_folder_path_mapping(folder_ids: List[Tuple[int, str]]) -> return folder_path_to_id -def image_util_find_folder_id_for_image(image_path: str, folder_path_to_id: Dict[str, int]) -> int: +def image_util_find_folder_id_for_image( + image_path: str, folder_path_to_id: Dict[str, int] +) -> int: """ Find the most specific folder ID for a given image path. diff --git a/backend/main.py b/backend/main.py index 3dea8acd2..a5ef8ef3d 100644 --- a/backend/main.py +++ b/backend/main.py @@ -58,7 +58,9 @@ async def lifespan(app: FastAPI): "name": "PictoPy Postman Collection", "url": "https://www.postman.com/cryosat-explorer-62744145/workspace/pictopy/overview", }, - servers=[{"url": "http://localhost:8000", "description": "Local Development server"}], + servers=[ + {"url": "http://localhost:8000", "description": "Local Development server"} + ], openapi_tags=[ { "name": "Albums", @@ -92,7 +94,9 @@ def generate_openapi_json(): openapi_schema["info"]["contact"] = app.contact project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) - openapi_path = os.path.join(project_root, "docs", "backend", "backend_python", "openapi.json") + openapi_path = os.path.join( + project_root, "docs", "backend", "backend_python", "openapi.json" + ) os.makedirs(os.path.dirname(openapi_path), exist_ok=True) @@ -121,8 +125,12 @@ async def root(): app.include_router(folders_router, prefix="/folders", tags=["Folders"]) app.include_router(albums_router, prefix="/albums", tags=["Albums"]) -app.include_router(face_clusters_router, prefix="/face-clusters", tags=["Face Clusters"]) -app.include_router(user_preferences_router, prefix="/user-preferences", tags=["User Preferences"]) +app.include_router( + face_clusters_router, prefix="/face-clusters", tags=["Face Clusters"] +) +app.include_router( + user_preferences_router, prefix="/user-preferences", tags=["User Preferences"] +) # Entry point for running with: python3 main.py diff --git a/sync-microservice/pyproject.toml b/sync-microservice/pyproject.toml new file mode 100644 index 000000000..83fd24d53 --- /dev/null +++ b/sync-microservice/pyproject.toml @@ -0,0 +1,2 @@ +[tool.ruff] +line-length = 300