From d05c81ab9fa7b74d1ced1be4905fa742f526def0 Mon Sep 17 00:00:00 2001 From: JonnyTran Date: Tue, 12 Aug 2025 11:10:58 -0700 Subject: [PATCH 1/8] Refactor file handling and type annotations in API and context modules - Updated type hints in `files.py` for better clarity and consistency. - Enhanced error handling in `put_file` and `list_objects` functions. - Improved metadata handling in `ObjectMetadata` and `ListObjectsResponse` classes. - Cleaned up optional parameters and added type ignores where necessary for compatibility. --- .../extralit_server/api/handlers/v1/files.py | 7 +++- .../extralit_server/api/schemas/v1/files.py | 14 +++---- .../src/extralit_server/contexts/files.py | 41 +++++++++++-------- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/extralit-server/src/extralit_server/api/handlers/v1/files.py b/extralit-server/src/extralit_server/api/handlers/v1/files.py index 7fc6e0d94..4706c15bc 100644 --- a/extralit-server/src/extralit_server/api/handlers/v1/files.py +++ b/extralit-server/src/extralit_server/api/handlers/v1/files.py @@ -72,7 +72,12 @@ async def put_file( try: response = files.put_object( - client, bucket, object, data=file.file, size=file.size, content_type=file.content_type + client, + bucket, + object, + data=file.file, + size=file.size, # type: ignore + content_type=file.content_type, # type: ignore ) return response except S3Error as se: diff --git a/extralit-server/src/extralit_server/api/schemas/v1/files.py b/extralit-server/src/extralit_server/api/schemas/v1/files.py index 1af728715..bd7ff4f6b 100644 --- a/extralit-server/src/extralit_server/api/schemas/v1/files.py +++ b/extralit-server/src/extralit_server/api/schemas/v1/files.py @@ -44,10 +44,10 @@ def parse_metadata(cls, v): return v @classmethod - def from_minio_object(cls, minio_object: Object): + def from_minio_object(cls, minio_object: Object) -> "ObjectMetadata": return cls( bucket_name=minio_object.bucket_name, - object_name=minio_object.object_name, + object_name=minio_object.object_name or "", last_modified=minio_object.last_modified, is_latest=None if minio_object.is_latest is None else minio_object.is_latest.lower() == "true", etag=minio_object.etag, @@ -58,7 +58,7 @@ def from_minio_object(cls, minio_object: Object): ) @classmethod - def from_minio_write_response(cls, write_result: ObjectWriteResult): + def from_minio_write_response(cls, write_result: ObjectWriteResult) -> "ObjectMetadata": return cls( bucket_name=write_result.bucket_name, object_name=write_result.object_name, @@ -76,10 +76,10 @@ class ListObjectsResponse(BaseModel): objects: Iterable[ObjectMetadata] = Field(default_factory=list) def __len__(self) -> int: - return len(self.objects) + return len(self.objects) # type: ignore def __getitem__(self, index) -> ObjectMetadata: - return self.objects[index] + return self.objects[index] # type: ignore def __iter__(self): return iter(self.objects) @@ -114,14 +114,14 @@ def assign_version_id(cls, objects: List[ObjectMetadata]) -> List[ObjectMetadata class FileObjectResponse(BaseModel): response: HTTPResponse - metadata: Optional[ObjectMetadata] + metadata: ObjectMetadata versions: Optional[ListObjectsResponse] class Config: arbitrary_types_allowed = True @property - def version_tag(self) -> str: + def version_tag(self) -> Optional[str]: if not self.metadata or not self.versions: return "" else: diff --git a/extralit-server/src/extralit_server/contexts/files.py b/extralit-server/src/extralit_server/contexts/files.py index f16c6af33..463e03303 100644 --- a/extralit-server/src/extralit_server/contexts/files.py +++ b/extralit-server/src/extralit_server/contexts/files.py @@ -24,6 +24,7 @@ from typing import Any, BinaryIO, Dict, List, Optional, Union from urllib.parse import urlparse from uuid import UUID +from minio.datatypes import Object from urllib3 import HTTPResponse from fastapi import HTTPException @@ -83,8 +84,8 @@ def put_object( data: Union[BinaryIO, bytes], length: Optional[int] = None, content_type: Optional[str] = None, - part_size: int = None, - metadata: Dict[str, Any] = None, + part_size: Optional[int] = None, + metadata: Optional[Dict[str, Any]] = None, ) -> ObjectWriteResult: # Ensure bucket exists bucket_path = self._get_bucket_path(bucket_name) @@ -126,7 +127,7 @@ def put_object( object_name=object_name, version_id=version_id, etag=content_hash, - http_headers={}, + http_headers={}, # type: ignore last_modified=None, location=None, ) @@ -153,7 +154,7 @@ def get_object(self, bucket_name: str, object_name: str, version_id: Optional[st with open(meta_path, "r") as f: json.load(f) - return HTTPResponse(body=io.BytesIO(content), preload_content=False) + return HTTPResponse(body=io.BytesIO(content), preload_content=False) # type: ignore def stat_object(self, bucket_name: str, object_name: str, version_id: Optional[str] = None) -> ObjectMetadata: if version_id: @@ -177,15 +178,13 @@ def stat_object(self, bucket_name: str, object_name: str, version_id: Optional[s stats = path.stat() - last_modified = datetime.fromtimestamp(stats.st_mtime) - return ObjectMetadata( bucket_name=bucket_name, object_name=object_name, version_id=version_id or metadata.get("version_id"), etag=metadata.get("etag"), size=stats.st_size, - last_modified=last_modified, + last_modified=datetime.fromtimestamp(stats.st_mtime), metadata=metadata, content_type=metadata.get("content_type", "application/octet-stream"), ) @@ -235,6 +234,7 @@ def list_objects( if start_after: files = [f for f in files if str(f.relative_to(bucket_path)) > start_after] + result = [] for file_path in files: object_name = str(file_path.relative_to(bucket_path)) stats = file_path.stat() @@ -252,25 +252,27 @@ def list_objects( object_name=object_name, etag=metadata.get("etag"), size=stats.st_size, - last_modified=stats.st_mtime, + last_modified=datetime.fromtimestamp(stats.st_mtime), metadata=metadata, content_type=metadata.get("content_type", "application/octet-stream"), version_id=metadata.get("version_id") if include_version else None, ) - yield obj + result.append(obj) + + return result def get_minio_client() -> Optional[Union[Minio, LocalFileStorage]]: if None in [settings.s3_endpoint, settings.s3_access_key, settings.s3_secret_key]: # Use local file system storage if S3 settings are not provided - local_storage_path = os.path.join(settings.home_path, "storage") + local_storage_path = os.path.join(settings.home_path, "storage") # type: ignore _LOGGER.info(f"Using local file storage at: {local_storage_path}") return LocalFileStorage(local_storage_path) try: parsed_url = urlparse(settings.s3_endpoint) - hostname = parsed_url.hostname + hostname: str = str(parsed_url.hostname) port = parsed_url.port if hostname is None: @@ -318,10 +320,12 @@ def list_objects( recursive=True, start_after: Optional[str] = None, ) -> ListObjectsResponse: - objects = client.list_objects( + objects: List[ObjectMetadata | Object] = client.list_objects( # type: ignore bucket, prefix=prefix, recursive=recursive, include_version=include_version, start_after=start_after ) - objects = [ObjectMetadata.from_minio_object(obj) for obj in objects] + objects: List[ObjectMetadata] = [ + obj if isinstance(obj, ObjectMetadata) else ObjectMetadata.from_minio_object(obj) for obj in objects + ] return ListObjectsResponse(objects=objects) @@ -368,9 +372,9 @@ def put_object( bucket: str, object: str, data: Union[BinaryIO, bytes, str], - content_type: str = None, - size: int = None, - metadata: Dict[str, Any] = None, + size: int, + content_type: str = "application/octet-stream", + metadata: Optional[Dict[str, Any]] = None, part_size: int = 100 * 1024 * 1024, ) -> ObjectMetadata: if isinstance(data, bytes): @@ -391,7 +395,7 @@ def put_object( content_type=content_type, length=size, part_size=part_size, - metadata=metadata, + metadata=metadata or {}, ) return ObjectMetadata.from_minio_write_response(response) @@ -512,7 +516,8 @@ def delete_bucket(client: Union[Minio, LocalFileStorage], workspace_name: str): obj_list = list(objects) for obj in obj_list: try: - client.remove_object(workspace_name, obj.object_name, version_id=obj.version_id) + if obj.object_name is not None: + client.remove_object(workspace_name, obj.object_name, version_id=obj.version_id) except S3Error as remove_err: _LOGGER.warning( f"Error removing object {obj.object_name} (version: {obj.version_id}) during bucket delete: {remove_err}" From 8ca8056d11081af53f1f2d458b80c977ffe92ab6 Mon Sep 17 00:00:00 2001 From: JonnyTran Date: Tue, 12 Aug 2025 11:11:35 -0700 Subject: [PATCH 2/8] Fix metadata handling in get_object function to ensure proper ObjectMetadata usage --- extralit-server/src/extralit_server/contexts/files.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extralit-server/src/extralit_server/contexts/files.py b/extralit-server/src/extralit_server/contexts/files.py index 463e03303..226b62061 100644 --- a/extralit-server/src/extralit_server/contexts/files.py +++ b/extralit-server/src/extralit_server/contexts/files.py @@ -357,7 +357,11 @@ def get_object( else: versions = None - return FileObjectResponse(response=obj, metadata=stat, versions=versions) + return FileObjectResponse( + response=obj, + metadata=stat if isinstance(stat, ObjectMetadata) else ObjectMetadata.from_minio_object(stat), + versions=versions, + ) except S3Error as se: _LOGGER.error(f"Error getting object {object} from bucket {bucket}: {se}") From 611eebf26b965df2c315c3dd7e62ce3032337da6 Mon Sep 17 00:00:00 2001 From: JonnyTran Date: Tue, 12 Aug 2025 14:03:48 -0700 Subject: [PATCH 3/8] Refactor DocumentsAPI to support multi-criteria document retrieval - Updated the `get` method to accept a dictionary of parameters for searching documents by workspace ID, ID, PMID, DOI, or reference. - Enhanced error handling for cases with no documents found or multiple documents returned. - Refactored related document model and repository methods to align with the new API structure. - Updated frontend components to utilize the new document fetching logic, ensuring compatibility with the revised API. --- .../base/base-pdf-viewer/PDFViewer.vue | 6 +- .../container/mode/useDocumentViewModel.ts | 56 +++++---- .../v1/domain/entities/document/Document.ts | 15 +-- .../usecases/get-document-by-id-use-case.ts | 24 ++-- .../repositories/DocumentRepository.ts | 30 ++--- .../api/handlers/v1/documents.py | 84 +++++++------- .../unit/api/handlers/v1/test_documents.py | 80 ++++++++++++- extralit/src/extralit/_api/_documents.py | 51 +++----- extralit/src/extralit/_models/_documents.py | 6 +- extralit/src/extralit/documents/_resource.py | 109 +++++++----------- 10 files changed, 254 insertions(+), 207 deletions(-) diff --git a/extralit-frontend/components/base/base-pdf-viewer/PDFViewer.vue b/extralit-frontend/components/base/base-pdf-viewer/PDFViewer.vue index 1e2da70f2..84abea2f8 100644 --- a/extralit-frontend/components/base/base-pdf-viewer/PDFViewer.vue +++ b/extralit-frontend/components/base/base-pdf-viewer/PDFViewer.vue @@ -19,7 +19,7 @@ -