From 68dd9cc322c2770d227619bbfa62a24374b558ab Mon Sep 17 00:00:00 2001 From: Anthony Sarkis <18080164+anthony-sarkis@users.noreply.github.com> Date: Thu, 16 Jun 2022 18:58:11 -0700 Subject: [PATCH 1/7] Upgrade to use class object instead of json -> Before we were using the json which introduced some potential confusion now it uses the directory object -> Move functions inside class -> Move default setting inside here... the API returning default in that way still not best but working in existing pattern --- sdk/diffgram/core/directory.py | 144 +++++++++++++++------------------ 1 file changed, 66 insertions(+), 78 deletions(-) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index ee42ba3..fef781c 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -4,86 +4,81 @@ from diffgram.core.diffgram_dataset_iterator import DiffgramDatasetIterator from multiprocessing.pool import ThreadPool as Pool -def get_directory_list(self): - """ - Get a list of available directories for a project - Arguments - self - - Expects - self.project_string_id - - Returns - directory_list, array of dicts - - """ +class Directory(DiffgramDatasetIterator): - if self.project_string_id is None: - raise Exception("No project string." + \ - "Set a project string using .auth()") + def __init__(self, + client, + file_id_list_sliced = None, + init_file_ids = True, + validate_ids = True): - if type(self.project_string_id) != str: - raise Exception("project_string_id must be of type String") + self.client = client + self.id = None + self.file_list_metadata = {} + self.nickname = None + if file_id_list_sliced is None and init_file_ids: + self.file_id_list = self.all_file_ids() + elif not init_file_ids: + self.file_id_list = [] + elif file_id_list_sliced is not None: + self.file_id_list = file_id_list_sliced + super(Directory, self).__init__(self.client, self.file_id_list, validate_ids) - endpoint = "/api/v1/project/" + self.project_string_id + \ - "/directory/list" - response = self.session.get(self.host + endpoint) + def get_directory_list(self): + """ + Get a list of available directories for a project + """ - self.handle_errors(response) + if self.client.project_string_id is None: + raise Exception("No project string." + \ + "Set a project string using .auth()") - directory_list = response.json() + if type(self.client.project_string_id) != str: + raise Exception("project_string_id must be of type String") - return directory_list + endpoint = "/api/v1/project/" + self.client.project_string_id + \ + "/directory/list" + response = self.client.session.get(self.client.host + endpoint) -def set_directory_by_name(self, name): - """ + self.client.handle_errors(response) - Arguments - self - name, string + data = response.json() + + directory_list_json = data.get('directory_list') + default_directory_json = data.get('default_directory') - """ + if default_directory_json: + self.client.directory_id = default_directory_json.get('id') - if name is None: - raise Exception("No name provided.") + directory_list = self.convert_json_to_sdk_object(directory_list_json) - # Don't refresh by default, just set from existing + return directory_list + - names_attempted = [] - did_set = False + def convert_json_to_sdk_object(self, directory_list_json): - for directory in self.directory_list: + directory_list = [] - nickname = directory.get("nickname") - if nickname == name: - self.set_default_directory(directory.get("id")) - did_set = True - break - else: - names_attempted.append(nickname) + for directory_json in directory_list_json: + new_directory = Directory( + client = self.client, + init_file_ids = False, + validate_ids = False + ) + refresh_from_dict(new_directory, directory_json) + directory_list.append(new_directory) - if did_set is False: - raise Exception(name, " does not exist. Valid names are: " + - str(names_attempted)) + return directory_list -class Directory(DiffgramDatasetIterator): + def set_default(): - def __init__(self, client, file_id_list_sliced = None, init_file_ids = True, validate_ids = True): + if not self.client.directory_list: + self.client.directory_list = self.get_directory_list() - self.client = client - self.id = None - self.file_list_metadata = {} - if file_id_list_sliced is None and init_file_ids: - self.file_id_list = self.all_file_ids() - elif not init_file_ids: - self.file_id_list = [] - elif file_id_list_sliced is not None: - self.file_id_list = file_id_list_sliced - super(Directory, self).__init__(self.client, self.file_id_list, validate_ids) def all_files(self): """ @@ -190,7 +185,7 @@ def new(self, name: str): # generator expression returns True if the directory # is not found. this is a bit awkward. if next((dir for dir in self.client.directory_list - if dir['nickname'] == name), True) is not True: + if dir.nickname == name), True) is not True: raise Exception(name, "Already exists") packet = {'nickname': name} @@ -208,18 +203,15 @@ def new(self, name: str): project = data.get('project') if project: - directory_list = project.get('directory_list') - # TODO upgrade directory_list here to be 1st class objects instead of JSON - if directory_list: - self.client.directory_list = directory_list + directory_list_json = project.get('directory_list') + if directory_list_json: + self.client.directory_list = self.convert_json_to_sdk_object(directory_list_json) new_directory = None # TODO the route about should return the newly created dataset directly - for directory_json in self.client.directory_list: - nickname = directory_json.get("nickname") - if nickname == name: - new_directory = Directory(client = self.client) - refresh_from_dict(new_directory, directory_json) + for directory in self.client.directory_list: + if directory.nickname == name: + new_directory = directory return new_directory @@ -319,20 +311,16 @@ def get(self, names_attempted = [] did_set = False - for directory_json in self.client.directory_list: - - nickname = directory_json.get("nickname") - if nickname == name: - # TODO change the general directory_list - # to use object approach (over dict) + if not self.client.directory_list: + self.client.directory_list = self.get_directory_list() - new_directory = Directory(client = self.client) - refresh_from_dict(new_directory, directory_json) + for directory in self.client.directory_list: - return new_directory + if directory.nickname == name: + return directory else: - names_attempted.append(nickname) + names_attempted.append(directory.nickname) if did_set is False: raise Exception(name, " does not exist. Valid names are: " + From e38117cf2957715f8eb6e29a461042726701db7a Mon Sep 17 00:00:00 2001 From: Anthony Sarkis <18080164+anthony-sarkis@users.noreply.github.com> Date: Thu, 16 Jun 2022 19:01:22 -0700 Subject: [PATCH 2/7] Seperate concerns and improve startup -> move default dir and label setting to optional and more cleanly define at init instead of inside auth -> use new directory class -> use new get_directory_list() -> get dir now checks if none -> Added print statements for now since still so much changing but those can be optional log statements in future of course --- sdk/diffgram/core/core.py | 110 +++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 42 deletions(-) diff --git a/sdk/diffgram/core/core.py b/sdk/diffgram/core/core.py index db6fea4..70f65b6 100644 --- a/sdk/diffgram/core/core.py +++ b/sdk/diffgram/core/core.py @@ -3,8 +3,6 @@ from diffgram import __version__ from diffgram.file.view import get_label_file_dict -from diffgram.core.directory import get_directory_list -from diffgram.core.directory import set_directory_by_name from diffgram.convert.convert import convert_label from diffgram.label.label_new import label_new @@ -29,7 +27,10 @@ def __init__( client_secret = None, debug = False, staging = False, - host = None + host = None, + set_default_directory = True, + refresh_local_label_dict = True + ): self.session = requests.Session() @@ -50,24 +51,41 @@ def __init__( self.host = "https://diffgram.com" else: self.host = host - self.directory_id = None - self.name_to_file_id = None + self.auth( project_string_id = project_string_id, client_id = client_id, client_secret = client_secret) - self.client_id = client_id - self.client_secret = client_secret self.file = FileConstructor(self) - self.train = Train(self) + #self.train = Train(self) self.job = Job(self) self.guide = Guide(self) - self.directory = Directory(self, validate_ids = False) + self.directory = Directory(self, + init_file_ids = False, + validate_ids = False) self.export = Export(self) self.task = Task(client = self) + + self.directory_id = None + self.name_to_file_id = None + + + if set_default_directory is True: + self.set_default_directory() + print("Default directory set:", self.directory_id) + + if refresh_local_label_dict is True: + self.get_label_file_dict() + + self.client_id = client_id + self.client_secret = client_secret + self.label_schema_list = self.get_label_schema_list() + self.directory_list = [] + + def get_member_list(self): url = '/api/project/{}/view'.format(self.project_string_id) response = self.session.get(url = self.host + url) @@ -216,9 +234,7 @@ def handle_errors(self, def auth(self, project_string_id, client_id = None, - client_secret = None, - set_default_directory = True, - refresh_local_label_dict = True + client_secret = None ): """ Define authorization configuration @@ -242,47 +258,59 @@ def auth(self, if client_id and client_secret: self.session.auth = (client_id, client_secret) - if set_default_directory is True: - self.set_default_directory() - if refresh_local_label_dict is True: - # Refresh local labels from Diffgram project - self.get_label_file_dict() + def set_directory_by_name(self, name): + """ + + Arguments + self + name, string + + """ + + if name is None: + raise Exception("No name provided.") + + # Don't refresh by default, just set from existing + + names_attempted = [] + did_set = False + + if not self.directory_list: + self.directory_list = self.directory.get_directory_list() + + for directory in self.directory_list: + + if directory.nickname == name: + self.set_default_directory(directory = directory) + did_set = True + break + else: + names_attempted.append(directory.nickname) + + if did_set is False: + raise Exception(name, " does not exist. Valid names are: " + + str(names_attempted)) + def set_default_directory(self, - directory_id = None): + directory_id = None, + directory = None): """ -> If no id is provided fetch directory list for project and set first directory to default. -> Sets the headers of self.session - Arguments - directory_id, int, defaults to None - - Returns - None - - Future - TODO return error if invalid directory? - """ if directory_id: - # TODO check if valid? - # data = {} - # data["directory_id"] = directory_id self.directory_id = directory_id - else: - - data = self.get_directory_list() - - self.default_directory = data['default_directory'] - - # Hold over till refactoring (would prefer to - # just call self.directory_default.id - self.directory_id = self.default_directory['id'] + if directory: + self.directory_id = directory.id + self.default_directory = directory + + self.directory_list = self.directory.get_directory_list() - self.directory_list = data["directory_list"] self.session.headers.update( {'directory_id': str(self.directory_id)}) @@ -290,7 +318,5 @@ def set_default_directory(self, # TODO review not using this pattern anymore setattr(Project, "get_label_file_dict", get_label_file_dict) -setattr(Project, "get_directory_list", get_directory_list) setattr(Project, "convert_label", convert_label) setattr(Project, "label_new", label_new) -setattr(Project, "set_directory_by_name", set_directory_by_name) From 4ac6e86b7b066e54dafdaee6bc9279e5d1ea358f Mon Sep 17 00:00:00 2001 From: Anthony Sarkis <18080164+anthony-sarkis@users.noreply.github.com> Date: Thu, 16 Jun 2022 19:01:48 -0700 Subject: [PATCH 3/7] Align with schema direction only schema is needed not directory id (changing API side too) --- sdk/diffgram/file/view.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sdk/diffgram/file/view.py b/sdk/diffgram/file/view.py index 1cdb1c8..ca9f14d 100644 --- a/sdk/diffgram/file/view.py +++ b/sdk/diffgram/file/view.py @@ -42,14 +42,14 @@ def get_label_file_dict(self, schema_id = None, use_session = True): endpoint = "/api/v1/project/" + self.project_string_id + \ "/labels/view/name_to_file_id" - params = {'schema_id': schema_id, 'directory_id': self.directory_id} + params = {'schema_id': schema_id} if use_session: response = self.session.get(self.host + endpoint, params = params) else: # Add Auth response = requests.get(self.host + endpoint, params = params, - headers = {'directory_id': str(self.directory_id)}, + headers = {'schema_id': str(schema_id)}, auth = self.get_http_auth()) self.handle_errors(response) @@ -57,5 +57,6 @@ def get_label_file_dict(self, schema_id = None, use_session = True): data = response.json() if data["log"]["success"] == True: self.name_to_file_id = data["name_to_file_id"] + print("Loaded schema") else: raise Exception(data["log"]["errors"]) From a72a041dc44c44b3a8d8da28b08e5f6eb11d39b3 Mon Sep 17 00:00:00 2001 From: Anthony Sarkis <18080164+anthony-sarkis@users.noreply.github.com> Date: Thu, 16 Jun 2022 19:02:03 -0700 Subject: [PATCH 4/7] Align with new pattern --- sdk/diffgram/file/file_3d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/diffgram/file/file_3d.py b/sdk/diffgram/file/file_3d.py index 37f7b5b..09024f5 100644 --- a/sdk/diffgram/file/file_3d.py +++ b/sdk/diffgram/file/file_3d.py @@ -64,7 +64,7 @@ def upload(self, dataset_name = None, chunk_size = 5000000): self.client.project_string_id ) chunk_size = 5000000 # 5 MB chunks - dataset_id = self.client.default_directory['id'] + dataset_id = self.client.default_directory.id if dataset_name is not None: dataset_id = self.client.directory.get(dataset_name).id From 2b4dbdd435e5745a2201706d69b6674a1f74e9b3 Mon Sep 17 00:00:00 2001 From: Anthony Sarkis <18080164+anthony-sarkis@users.noreply.github.com> Date: Thu, 16 Jun 2022 19:08:08 -0700 Subject: [PATCH 5/7] opencv should be optional only needed in some cases, heavy requirement to put here --- sdk/diffgram/brain/brain.py | 6 +++++- sdk/setup.py | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/sdk/diffgram/brain/brain.py b/sdk/diffgram/brain/brain.py index 9665408..c8fe7ae 100644 --- a/sdk/diffgram/brain/brain.py +++ b/sdk/diffgram/brain/brain.py @@ -4,7 +4,11 @@ import tempfile # TODO import these only if local prediction is needed -import cv2 + +try: + import cv2 +except: + print("Could not import cv2") try: import tensorflow as tf diff --git a/sdk/setup.py b/sdk/setup.py index e7df02a..423dc37 100644 --- a/sdk/setup.py +++ b/sdk/setup.py @@ -1,8 +1,11 @@ import setuptools import diffgram.__init__ as init -with open(".././README.md", "r") as fh: - long_description = fh.read() +try: + with open(".././README.md", "r") as fh: + long_description = fh.read() +except: + long_description = "None" setuptools.setup( name = init.__name__, @@ -22,7 +25,6 @@ ], install_requires=[ 'requests>=2.20.1', - 'opencv-python>=4.0.0.21', 'scipy>=1.1.0', 'six>=1.9.0', 'pillow>=6.1.0', From 3f5ccd74b92a5bd7d2786a0fb6c8c6d4b67666e5 Mon Sep 17 00:00:00 2001 From: Anthony Sarkis <18080164+anthony-sarkis@users.noreply.github.com> Date: Thu, 16 Jun 2022 19:39:49 -0700 Subject: [PATCH 6/7] Fix init bug - this was preventing to_tensorflow() from working init_file_ids asumes the id was already set but for now refresh_from_dict pattern means the id is set after init so refactor `init_files()` and call it after... We will need to work on this more Luckily at least now it's refactored to `convert_json_to_sdk_object()` so we have it only in one place --- sdk/diffgram/core/core.py | 12 ++++----- sdk/diffgram/core/directory.py | 45 +++++++++++++++------------------- 2 files changed, 26 insertions(+), 31 deletions(-) diff --git a/sdk/diffgram/core/core.py b/sdk/diffgram/core/core.py index 70f65b6..f1a1ad5 100644 --- a/sdk/diffgram/core/core.py +++ b/sdk/diffgram/core/core.py @@ -28,7 +28,7 @@ def __init__( debug = False, staging = False, host = None, - set_default_directory = True, + init_default_directory = True, refresh_local_label_dict = True ): @@ -71,7 +71,7 @@ def __init__( self.name_to_file_id = None - if set_default_directory is True: + if init_default_directory is True: self.set_default_directory() print("Default directory set:", self.directory_id) @@ -83,7 +83,7 @@ def __init__( self.label_schema_list = self.get_label_schema_list() - self.directory_list = [] + self.directory_list = None def get_member_list(self): @@ -302,14 +302,14 @@ def set_default_directory(self, -> Sets the headers of self.session """ - if directory_id: self.directory_id = directory_id if directory: self.directory_id = directory.id self.default_directory = directory - - self.directory_list = self.directory.get_directory_list() + + if not self.directory_list: + self.directory_list = self.directory.get_directory_list() self.session.headers.update( {'directory_id': str(self.directory_id)}) diff --git a/sdk/diffgram/core/directory.py b/sdk/diffgram/core/directory.py index fef781c..f3dc269 100644 --- a/sdk/diffgram/core/directory.py +++ b/sdk/diffgram/core/directory.py @@ -18,7 +18,7 @@ def __init__(self, self.file_list_metadata = {} self.nickname = None if file_id_list_sliced is None and init_file_ids: - self.file_id_list = self.all_file_ids() + self.init_files() elif not init_file_ids: self.file_id_list = [] elif file_id_list_sliced is not None: @@ -26,6 +26,10 @@ def __init__(self, super(Directory, self).__init__(self.client, self.file_id_list, validate_ids) + + def init_files(self): + self.file_id_list = self.all_file_ids() + def get_directory_list(self): """ Get a list of available directories for a project @@ -68,16 +72,15 @@ def convert_json_to_sdk_object(self, directory_list_json): init_file_ids = False, validate_ids = False ) - refresh_from_dict(new_directory, directory_json) - directory_list.append(new_directory) + refresh_from_dict(new_directory, directory_json) - return directory_list + # note timing issue, this needs to happen after id is refreshed + new_directory.init_files() + directory_list.append(new_directory) - def set_default(): + return directory_list - if not self.client.directory_list: - self.client.directory_list = self.get_directory_list() def all_files(self): @@ -89,7 +92,10 @@ def all_files(self): page_num = 1 result = [] while page_num is not None: - diffgram_files = self.list_files(limit = 1000, page_num = page_num, file_view_mode = 'base') + diffgram_files = self.list_files( + limit = 1000, + page_num = page_num, + file_view_mode = 'base') page_num = self.file_list_metadata['next_page'] result = result + diffgram_files return result @@ -98,7 +104,12 @@ def all_file_ids(self, query = None): page_num = 1 result = [] - diffgram_ids = self.list_files(limit = 5000, page_num = page_num, file_view_mode = 'ids_only', query = query) + diffgram_ids = self.list_files( + limit = 5000, + page_num = page_num, + file_view_mode = 'ids_only', + query = query) + if diffgram_ids is False: raise Exception('Error Fetching Files: Please check you are providing a valid query.') result = result + diffgram_ids @@ -223,22 +234,6 @@ def list_files( file_view_mode: str = 'annotation', query: str = None): """ - Get a list of files in directory (from Diffgram service). - - Assumes we are using the default directory. - this can be changed ie by: project.set_directory_by_name(dir_name) - - We don't have a strong Directory concept in the SDK yet - So for now assume that we need to - call project.set_directory_by_name(dir_name) first - if we want to change the directory - - - WIP Feb 3, 2020 - A lot of "hard coded" options here. - Want to think a bit more about what we want to - expose options here and what good contexts are. - """ if self.id: logging.info("Using Dataset ID " + str(self.id)) From 24356c47cb92a79190d7879734e27fc482325610 Mon Sep 17 00:00:00 2001 From: Pablo Date: Fri, 17 Jun 2022 09:39:21 -0600 Subject: [PATCH 7/7] fix: dir initialization --- sdk/diffgram/core/core.py | 4 ++-- sdk/diffgram/file/view.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sdk/diffgram/core/core.py b/sdk/diffgram/core/core.py index f1a1ad5..57cbec1 100644 --- a/sdk/diffgram/core/core.py +++ b/sdk/diffgram/core/core.py @@ -72,7 +72,7 @@ def __init__( if init_default_directory is True: - self.set_default_directory() + self.set_default_directory(directory = self.directory) print("Default directory set:", self.directory_id) if refresh_local_label_dict is True: @@ -308,7 +308,7 @@ def set_default_directory(self, self.directory_id = directory.id self.default_directory = directory - if not self.directory_list: + if not hasattr(self, 'directory_list'): self.directory_list = self.directory.get_directory_list() self.session.headers.update( diff --git a/sdk/diffgram/file/view.py b/sdk/diffgram/file/view.py index ca9f14d..460ead7 100644 --- a/sdk/diffgram/file/view.py +++ b/sdk/diffgram/file/view.py @@ -49,7 +49,6 @@ def get_label_file_dict(self, schema_id = None, use_session = True): # Add Auth response = requests.get(self.host + endpoint, params = params, - headers = {'schema_id': str(schema_id)}, auth = self.get_http_auth()) self.handle_errors(response)