Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions ait/commons/util/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,8 @@ def parse_args(args):
# parser_clear.add_argument('-a', action='store_true', help='clear all - selection and known dirs')

parser_list = cmd_parser.add_parser('list', help='list contents of the area')
parser_list.add_argument('-b', action='store_true', help='list all areas in the S3 bucket (authorised users only)')
parser_list.add_argument('-processing', action='store_true', help='access the processed data (authorised users '
'only)')

# parser_upload = cmd_parser.add_parser('upload', help='upload files to the area')
# group_upload = parser_upload.add_mutually_exclusive_group(required=True)
Expand Down Expand Up @@ -143,7 +144,8 @@ def parse_args(args):
group_delete.add_argument('-d', action='store_true', help='delete upload area and contents (authorised users only)')

parser_sync = cmd_parser.add_parser('sync',
help='copy data from selected upload area to ingest upload area (authorised users only)')
help='copy data from selected upload area to ingest upload area (authorised '
'users only)')
parser_sync.add_argument('INGEST_UPLOAD_AREA', help='Ingest upload area', type=valid_ingest_upload_area)

ps = [parser]
Expand Down
96 changes: 87 additions & 9 deletions ait/commons/util/command/list.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import hashlib
import csv

from ait.commons.util.common import format_err
from ait.commons.util.local_state import get_selected_area
from ait.commons.util.user_profile import get_profile
from urllib.parse import urlparse


def print_area(k, area):
Expand All @@ -20,6 +25,34 @@ def print_area(k, area):
print()


def get_s3_path():
while True:
s3_path = input("Enter the S3 path (e.g., s3://bucket-name/folder/): ").strip()
parsed_url = urlparse(s3_path)

if parsed_url.scheme == 's3' and parsed_url.netloc:
return s3_path
else:
print("Invalid S3 path. Please enter a valid S3 path starting with 's3://'.")


def calculate_md5(s3_client, bucket_name, key):
md5_hash = hashlib.md5()

try:
# Stream the object in chunks
response = s3_client.get_object(Bucket=bucket_name, Key=key)

for chunk in response['Body'].iter_chunks(chunk_size=8192):
md5_hash.update(chunk)

return md5_hash.hexdigest()
except Exception as e:
print(f"Failed to compute MD5 for {key}: {e}")

return None


class CmdList:
"""
admin and user
Expand All @@ -29,22 +62,67 @@ class CmdList:
def __init__(self, aws, args):
self.aws = aws
self.args = args
self.user = get_profile('morphic-util').username
self.processing = getattr(self.args, 'processing', None)

self.s3_cli = self.aws.common_session.client('s3')

def run(self):
selected_area = get_selected_area() # select area is a S3 bucket
if self.processing:
if self.user != 'morphic-admin':
return False, "Admin function only"
else:
print("Access granted")

s3_path = get_s3_path()
self.list_s3_files(s3_path)

return True, None

else:
selected_area = get_selected_area() # select area is a S3 bucket

if not selected_area:
return False, 'No area selected'

try:
self.list_bucket_contents(selected_area)
# print_count(folder_count + files_count)
return True, None

except Exception as e:
return False, format_err(e, 'list')

def list_s3_files(self, s3_path):
parsed_url = urlparse(s3_path)
bucket_name = parsed_url.netloc
prefix = parsed_url.path.lstrip('/')
output_file = 's3_file_md5s.tsv'

with open(output_file, 'w', newline='') as csvfile:
tsv_writer = csv.writer(csvfile, delimiter=',')
tsv_writer.writerow(['File Name', 'MD5 Hash']) # Write header row

try:
response = self.s3_cli.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if 'Contents' in response:
print(f"\nFiles in '{s3_path}'")

if not selected_area:
return False, 'No area selected'
for obj in response['Contents']:
file_key = obj['Key']
if not file_key.endswith('/'): # Skip folders
md5_hash = calculate_md5(self.s3_cli, bucket_name, file_key)

try:
self.list_bucket_contents(selected_area)
# print_count(folder_count + files_count)
return True, None
if md5_hash:
print(f"{file_key} - MD5: {md5_hash}")
tsv_writer.writerow([file_key, md5_hash]) # Write to file
else:
print("\nNo files found.")
except Exception as e:
print(f"\nError: {e}")

except Exception as e:
return False, format_err(e, 'list')
print(f"\nResults saved to {output_file}")

def list_bucket_contents(self, selected_area, prefix=''):
result = self.s3_cli.list_objects_v2(Bucket=selected_area, Delimiter='/', Prefix=prefix)
Expand Down
8 changes: 4 additions & 4 deletions ait/commons/util/command/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from ait.commons.util.spreadsheet_util import SubmissionError
from ait.commons.util.user_profile import get_profile
from ait.commons.util.provider_api_util import APIProvider
from ait.commons.util.provider_api_util import ProviderApi


def matching_expression_alteration_and_cell_line(cell_line, expression_alteration):
Expand Down Expand Up @@ -206,7 +206,7 @@ def __init__(self, args):
self.access_token = get_profile('morphic-util').access_token
self.type = getattr(self.args, 'type', None)
self.file = getattr(self.args, 'file', None)
self.provider_api = APIProvider(self.BASE_URL)
self.provider_api = ProviderApi(self.BASE_URL)

def run(self):
"""
Expand Down Expand Up @@ -1282,5 +1282,5 @@ def delete_dataset(self, dataset, access_token):
print(f"Deleting {data_file}")
self.provider_api.delete(f"{self.BASE_URL}/files/{data_file}", access_token)

print(f"\nDeleting the dataset: {dataset}")
self.provider_api.delete(f"{self.BASE_URL}/datasets/{dataset}", access_token)
# print(f"\nDeleting the dataset: {dataset}")
# self.provider_api.delete(f"{self.BASE_URL}/datasets/{dataset}", access_token)
4 changes: 2 additions & 2 deletions ait/commons/util/command/submit_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ait.commons.util.command.submit import CmdSubmit, get_entity_id_from_hal_link, create_new_submission_envelope
from ait.commons.util.command.upload import CmdUpload
from ait.commons.util.user_profile import get_profile
from ait.commons.util.provider_api_util import APIProvider
from ait.commons.util.provider_api_util import ProviderApi
from ait.commons.util.spreadsheet_util import SpreadsheetSubmitter, ValidationError, \
merge_library_preparation_sequencing_file, merge_cell_line_and_differentiated_cell_line, \
merge_differentiated_cell_line_and_library_preparation, SubmissionError
Expand Down Expand Up @@ -88,7 +88,7 @@ def __init__(self, args):
self.user_profile = get_profile('morphic-util')
self.access_token = self.user_profile.access_token
self.aws = Aws(self.user_profile)
self.provider_api = APIProvider(self.BASE_URL)
self.provider_api = ProviderApi(self.BASE_URL)
self.validation_errors = []
self.submission_errors = []
self.submission_envelope_id = None
Expand Down
4 changes: 2 additions & 2 deletions ait/commons/util/command/view.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ait.commons.util.aws_client import Aws
from ait.commons.util.provider_api_util import APIProvider
from ait.commons.util.provider_api_util import ProviderApi
from ait.commons.util.user_profile import get_profile


Expand All @@ -10,7 +10,7 @@ def __init__(self, args):
self.args = args
self.access_token = get_profile('morphic-util').access_token
self.user_profile = get_profile('morphic-util')
self.provider_api = APIProvider(self.base_url)
self.provider_api = ProviderApi(self.base_url)

if hasattr(self.args, 'dataset') and self.args.dataset is not None:
self.dataset = self.args.dataset
Expand Down
2 changes: 1 addition & 1 deletion ait/commons/util/provider_api_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import requests


class APIProvider:
class ProviderApi:
def __init__(self, base_url):
self.base_url = base_url

Expand Down