From d361b3fa978bd2d6efaa199ff3b54836700cfb5d Mon Sep 17 00:00:00 2001 From: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> Date: Mon, 7 Dec 2020 08:58:41 -0600 Subject: [PATCH 01/45] Create development.yaml --- .github/workflows/development.yaml | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/development.yaml diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml new file mode 100644 index 000000000..6a2b91b3c --- /dev/null +++ b/.github/workflows/development.yaml @@ -0,0 +1,36 @@ +# This is a basic workflow to help you get started with Actions + +name: CI + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + + # Runs a single command using the runners shell + - name: Run a one-line script + run: echo Hello, world! + + # Runs a set of commands using the runners shell + - name: Run a multi-line script + run: | + echo Add other actions to build, + echo test, and deploy your project. From 34062707cabf121ada6e123429ee0c403e41717d Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Mon, 7 Dec 2020 09:23:35 -0600 Subject: [PATCH 02/45] Replace TravisCI with GHA. --- .github/workflows/development.yaml | 69 ++++++++++++++++++------------ .travis.yml | 59 ------------------------- 2 files changed, 41 insertions(+), 87 deletions(-) delete mode 100644 .travis.yml diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index 6a2b91b3c..ad2035531 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -1,36 +1,49 @@ -# This is a basic workflow to help you get started with Actions - -name: CI - -# Controls when the action will run. +name: Development on: - # Triggers the workflow on push or pull request events but only for the master branch push: - branches: [ master ] + branches: + - '**' # every branch + - '!stage*' # exclude branches beginning with stage pull_request: - branches: [ master ] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel + branches: + - '**' # every branch + - '!stage*' # exclude branches beginning with stage jobs: - # This workflow contains a single job called "build" - build: - # The type of runner that the job will run on + test: + if: github.event_name == 'push' || github.event_name == 'pull_request' runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job + strategy: + matrix: + py_ver: ["3.8"] + mysql_ver: ["8.0", "5.7", "5.6"] + include: + - py_ver: "3.7" + mysql_ver: "5.7" + - py_ver: "3.6" + mysql_ver: "5.7" + - py_ver: "3.5" + mysql_ver: "5.7" steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - - # Runs a single command using the runners shell - - name: Run a one-line script - run: echo Hello, world! - - # Runs a set of commands using the runners shell - - name: Run a multi-line script + - name: Install dependencies run: | - echo Add other actions to build, - echo test, and deploy your project. + python -m pip install --upgrade pip + pip install flake8 + - name: Run syntax tests + run: flake8 datajoint --count --select=E9,F63,F7,F82 --show-source --statistics + - name: Run primary tests + env: + UID: "1001" + GID: "116" + PY_VER: ${{matrix.py_ver}} + MYSQL_VER: ${{matrix.mysql_ver}} + ALPINE_VER: "3.10" + MINIO_VER: RELEASE.2019-09-26T19-42-35Z + COMPOSE_HTTP_TIMEOUT: "120" + COVERALLS_SERVICE_NAME: travis-ci + COVERALLS_REPO_TOKEN: fd0BoXG46TPReEem0uMy7BJO5j0w1MQiY + run: docker-compose -f LNX-docker-compose.yml up --build --exit-code-from app + - name: Run style tests + run: | + flake8 --ignore=E121,E123,E126,E226,E24,E704,W503,W504,E722,F401,W605 datajoint \ + --count --max-complexity=62 --max-line-length=127 --statistics \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 72c11490b..000000000 --- a/.travis.yml +++ /dev/null @@ -1,59 +0,0 @@ -sudo: required -env: - global: - - MINIO_VER="RELEASE.2019-09-26T19-42-35Z" - - ALPINE_VER="3.10" - - COMPOSE_HTTP_TIMEOUT="300" - - UID="2000" - - GID="2000" - - COVERALLS_SERVICE_NAME="travis-ci" - - COVERALLS_REPO_TOKEN="fd0BoXG46TPReEem0uMy7BJO5j0w1MQiY" -services: -- docker -main: &main - stage: "Tests & Coverage: Alpine" - os: linux - dist: xenial # precise, trusty, xenial, bionic - language: shell - script: - - docker-compose -f LNX-docker-compose.yml up --build --exit-code-from app -jobs: - include: - - stage: "Lint: Syntax" - language: python - install: - - pip install flake8 - script: - - flake8 datajoint --count --select=E9,F63,F7,F82 --show-source --statistics - - <<: *main - env: - - PY_VER: "3.8" - - MYSQL_VER: "5.7" - - <<: *main - env: - - PY_VER: "3.7" - - MYSQL_VER: "5.7" - - <<: *main - env: - - PY_VER: "3.6" - - MYSQL_VER: "5.7" - - <<: *main - env: - - PY_VER: "3.5" - - MYSQL_VER: "5.7" - - <<: *main - env: - - PY_VER: "3.8" - - MYSQL_VER: "8.0" - - <<: *main - env: - - PY_VER: "3.8" - - MYSQL_VER: "5.6" - - stage: "Lint: Style" - language: python - install: - - pip install flake8 - script: - - | - flake8 --ignore=E121,E123,E126,E226,E24,E704,W503,W504,E722,F401,W605 datajoint \ - --count --max-complexity=62 --max-line-length=127 --statistics \ No newline at end of file From e7d60c7f2ff2cf399ad8baf69c76385d9b5c3027 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Mon, 7 Dec 2020 09:26:53 -0600 Subject: [PATCH 03/45] Add python setup hook. --- .github/workflows/development.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index ad2035531..55ced7eb9 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -25,6 +25,10 @@ jobs: mysql_ver: "5.7" steps: - uses: actions/checkout@v2 + - name: Set up Python ${{matrix.py_ver}} + uses: actions/setup-python@v2 + with: + python-version: ${{matrix.py_ver}} - name: Install dependencies run: | python -m pip install --upgrade pip From b3e7570aefed191953aa9f84c14744aa67b970ca Mon Sep 17 00:00:00 2001 From: synicix Date: Tue, 8 Dec 2020 19:29:37 -0600 Subject: [PATCH 04/45] Added basic list_tables function for schema class --- datajoint/schemas.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index a708db3ce..fa67a53cf 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -288,6 +288,15 @@ def repl(s): with open(python_filename, 'wt') as f: f.write(python_code) + def list_tables(self): + """ + Return a list of all tables in the schema except tables with ~ in first character such as ~logs + :return: A list of table names in their raw datajoint naming convection form + """ + + query_result = self.connection.query('SELECT table_name FROM information_schema.tables WHERE table_schema = \'' + self.database + '\'').fetchall() + return [table_name_tuple[0] for table_name_tuple in query_result if '~' != table_name_tuple[0][0]] + class VirtualModule(types.ModuleType): """ From 32b2874bc50485a671fb88dfdc78ba7d6ae7a2f6 Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 9 Dec 2020 17:11:51 -0600 Subject: [PATCH 05/45] Update list_tables to more efficient solution --- datajoint/schemas.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index fa67a53cf..27b768ed2 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -294,8 +294,9 @@ def list_tables(self): :return: A list of table names in their raw datajoint naming convection form """ - query_result = self.connection.query('SELECT table_name FROM information_schema.tables WHERE table_schema = \'' + self.database + '\'').fetchall() - return [table_name_tuple[0] for table_name_tuple in query_result if '~' != table_name_tuple[0][0]] + return [table_name for (table_name,) in self.connection.query(""" + SELECT table_name FROM information_schema.tables + WHERE table_schema = %s and table_name NOT LIKE '~%%'""", args=(self.database))] class VirtualModule(types.ModuleType): From 804ba11dc1d8f2790d626911bc2c99bcfb16d37b Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 9 Dec 2020 17:14:05 -0600 Subject: [PATCH 06/45] Update comments to fit in 95 characters convention --- datajoint/schemas.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index 27b768ed2..5b03e2033 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -290,7 +290,8 @@ def repl(s): def list_tables(self): """ - Return a list of all tables in the schema except tables with ~ in first character such as ~logs + Return a list of all tables in the schema except tables with ~ in first character such + as ~logs and ~job :return: A list of table names in their raw datajoint naming convection form """ From 828807319fe350cb0b5378f68ad36c55685440e8 Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 9 Dec 2020 18:12:14 -0600 Subject: [PATCH 07/45] Added test for schema.list_tables --- tests/test_schema.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_schema.py b/tests/test_schema.py index 98ec3e7f5..c180e922d 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -4,6 +4,7 @@ from . import schema from . import schema_empty from . import PREFIX, CONN_INFO +from .schema_simple import schema as schema_simple def relation_selector(attr): @@ -105,6 +106,9 @@ class Unit(dj.Part): test_schema.drop() +def test_list_tables(): + assert(['#a', '#argmax_test', '#data_a', '#data_b', '#i_j', '#j_i', '#l', '#t_test_update', '__b', '__b__c', '__d', '__e', '__e__f', 'f', 'reserved_word'] == schema_simple.list_tables()) + def test_schema_save(): assert_true("class Experiment(dj.Imported)" in schema.schema.code) From a5ea9869f4042238a70d66ffc93910f74c92dd33 Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 9 Dec 2020 22:20:44 -0600 Subject: [PATCH 08/45] Update exception to more general form. Need to find out exactly what error it is to turn it back to specifics --- datajoint/s3.py | 7 ++++--- tests/__init__.py | 12 ++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/datajoint/s3.py b/datajoint/s3.py index 72e0f4f06..f16e5f0e1 100644 --- a/datajoint/s3.py +++ b/datajoint/s3.py @@ -37,7 +37,7 @@ def get(self, name): logger.debug('get: {}:{}'.format(self.bucket, name)) try: return self.client.get_object(self.bucket, str(name)).data - except minio.error.NoSuchKey: + except Exception as e: raise errors.MissingExternalFile('Missing s3 key %s' % name) from None def fget(self, name, local_filepath): @@ -59,7 +59,8 @@ def exists(self, name): logger.debug('exists: {}:{}'.format(self.bucket, name)) try: self.client.stat_object(self.bucket, str(name)) - except minio.error.NoSuchKey: + except Exception as e: + print(e) return False return True @@ -67,7 +68,7 @@ def get_size(self, name): logger.debug('get_size: {}:{}'.format(self.bucket, name)) try: return self.client.stat_object(self.bucket, str(name)).size - except minio.error.NoSuchKey: + except Exception as e: raise errors.MissingExternalFile from None def remove_object(self, name): diff --git a/tests/__init__.py b/tests/__init__.py index bb0b814c4..7ff8adaec 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -136,7 +136,9 @@ def setup_package(): region = "us-east-1" try: minioClient.make_bucket(S3_MIGRATE_BUCKET, location=region) - except minio.error.BucketAlreadyOwnedByYou: + except Exception as e: + print(e) + print('Minio make_bucket failed') pass pathlist = Path(source).glob('**/*') @@ -149,7 +151,9 @@ def setup_package(): # Add S3 try: minioClient.make_bucket(S3_CONN_INFO['bucket'], location=region) - except minio.error.BucketAlreadyOwnedByYou: + except Exception as e: + print(e) + print('Minio make_bucket failed') pass # Add old File Content @@ -179,14 +183,14 @@ def teardown_package(): remove("dj_local_conf.json") # Remove old S3 - objs = list(minioClient.list_objects_v2( + objs = list(minioClient.list_objects( S3_MIGRATE_BUCKET, recursive=True)) objs = [minioClient.remove_object(S3_MIGRATE_BUCKET, o.object_name.encode('utf-8')) for o in objs] minioClient.remove_bucket(S3_MIGRATE_BUCKET) # Remove S3 - objs = list(minioClient.list_objects_v2(S3_CONN_INFO['bucket'], recursive=True)) + objs = list(minioClient.list_objects(S3_CONN_INFO['bucket'], recursive=True)) objs = [minioClient.remove_object(S3_CONN_INFO['bucket'], o.object_name.encode('utf-8')) for o in objs] minioClient.remove_bucket(S3_CONN_INFO['bucket']) From 780d57bc14819ea3710c1276ab43c2be9fae54a1 Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 9 Dec 2020 22:29:24 -0600 Subject: [PATCH 09/45] Update print(e) statements to avoid github action errors --- datajoint/s3.py | 2 ++ tests/__init__.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/s3.py b/datajoint/s3.py index f16e5f0e1..19f792eb2 100644 --- a/datajoint/s3.py +++ b/datajoint/s3.py @@ -38,6 +38,7 @@ def get(self, name): try: return self.client.get_object(self.bucket, str(name)).data except Exception as e: + print(e) raise errors.MissingExternalFile('Missing s3 key %s' % name) from None def fget(self, name, local_filepath): @@ -69,6 +70,7 @@ def get_size(self, name): try: return self.client.stat_object(self.bucket, str(name)).size except Exception as e: + print(e) raise errors.MissingExternalFile from None def remove_object(self, name): diff --git a/tests/__init__.py b/tests/__init__.py index 7ff8adaec..807e90930 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -138,7 +138,6 @@ def setup_package(): minioClient.make_bucket(S3_MIGRATE_BUCKET, location=region) except Exception as e: print(e) - print('Minio make_bucket failed') pass pathlist = Path(source).glob('**/*') @@ -153,7 +152,6 @@ def setup_package(): minioClient.make_bucket(S3_CONN_INFO['bucket'], location=region) except Exception as e: print(e) - print('Minio make_bucket failed') pass # Add old File Content From d489b474a9747f29e3879e419de7b2f458113a7e Mon Sep 17 00:00:00 2001 From: synicix Date: Wed, 9 Dec 2020 23:05:07 -0600 Subject: [PATCH 10/45] Added more specifc cases for error --- tests/__init__.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 807e90930..a2ecc65e8 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -136,8 +136,7 @@ def setup_package(): region = "us-east-1" try: minioClient.make_bucket(S3_MIGRATE_BUCKET, location=region) - except Exception as e: - print(e) + except minio.error.S3Error: pass pathlist = Path(source).glob('**/*') @@ -150,8 +149,7 @@ def setup_package(): # Add S3 try: minioClient.make_bucket(S3_CONN_INFO['bucket'], location=region) - except Exception as e: - print(e) + except minio.error.S3Error: pass # Add old File Content From 78c208af6da7e9217a020f4056acbb9628123990 Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 00:26:43 -0600 Subject: [PATCH 11/45] Change error handling to be more specific to minio.error.S3Error --- datajoint/s3.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/datajoint/s3.py b/datajoint/s3.py index 19f792eb2..92f044eb8 100644 --- a/datajoint/s3.py +++ b/datajoint/s3.py @@ -37,8 +37,7 @@ def get(self, name): logger.debug('get: {}:{}'.format(self.bucket, name)) try: return self.client.get_object(self.bucket, str(name)).data - except Exception as e: - print(e) + except minio.error.S3Error: raise errors.MissingExternalFile('Missing s3 key %s' % name) from None def fget(self, name, local_filepath): @@ -60,8 +59,7 @@ def exists(self, name): logger.debug('exists: {}:{}'.format(self.bucket, name)) try: self.client.stat_object(self.bucket, str(name)) - except Exception as e: - print(e) + except minio.error.S3Error: return False return True @@ -69,8 +67,7 @@ def get_size(self, name): logger.debug('get_size: {}:{}'.format(self.bucket, name)) try: return self.client.stat_object(self.bucket, str(name)).size - except Exception as e: - print(e) + except minio.error.S3Error: raise errors.MissingExternalFile from None def remove_object(self, name): From 912a31a0735232bd7062fedf8f70670d43e7222e Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 14:44:03 -0600 Subject: [PATCH 12/45] Change error handling to be a bit more specific simlar to the old version --- datajoint/s3.py | 23 ++++++++++++++++------- tests/__init__.py | 10 ++++++---- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/datajoint/s3.py b/datajoint/s3.py index 92f044eb8..99f12a6c1 100644 --- a/datajoint/s3.py +++ b/datajoint/s3.py @@ -37,8 +37,11 @@ def get(self, name): logger.debug('get: {}:{}'.format(self.bucket, name)) try: return self.client.get_object(self.bucket, str(name)).data - except minio.error.S3Error: - raise errors.MissingExternalFile('Missing s3 key %s' % name) from None + except minio.error.S3Error as e: + if e.code == 'NoSuchKey': + raise errors.MissingExternalFile('Missing s3 key %s' % name) from None + else: + raise e def fget(self, name, local_filepath): """get file from object name to local filepath""" @@ -59,17 +62,23 @@ def exists(self, name): logger.debug('exists: {}:{}'.format(self.bucket, name)) try: self.client.stat_object(self.bucket, str(name)) - except minio.error.S3Error: - return False + except minio.error.S3Error as e: + if e.code == 'NoSuchKey': + return False + else: + raise e return True def get_size(self, name): logger.debug('get_size: {}:{}'.format(self.bucket, name)) try: return self.client.stat_object(self.bucket, str(name)).size - except minio.error.S3Error: - raise errors.MissingExternalFile from None - + except minio.error.S3Error as e: + if e.code == 'NoSuchKey': + raise errors.MissingExternalFile from None + else: + raise e + def remove_object(self, name): logger.debug('remove_object: {}:{}'.format(self.bucket, name)) try: diff --git a/tests/__init__.py b/tests/__init__.py index a2ecc65e8..6d578f95d 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -136,8 +136,9 @@ def setup_package(): region = "us-east-1" try: minioClient.make_bucket(S3_MIGRATE_BUCKET, location=region) - except minio.error.S3Error: - pass + except minio.error.S3Error as e: + if e.code != 'BucketAlreadyOwnedByYou': + raise e pathlist = Path(source).glob('**/*') for path in pathlist: @@ -149,8 +150,9 @@ def setup_package(): # Add S3 try: minioClient.make_bucket(S3_CONN_INFO['bucket'], location=region) - except minio.error.S3Error: - pass + except minio.error.S3Error as e: + if e.code != 'BucketAlreadyOwnedByYou': + raise e # Add old File Content try: From a11dd92173d71400931fde91172f986db4845985 Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 14:45:40 -0600 Subject: [PATCH 13/45] Update requirement to set minio>=7.0.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5c84e822c..fa3882448 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ pandas tqdm networkx pydot -minio +minio>=7.0.0 matplotlib From 0fb4e52504dbf259676742b9f74dfa2c625d9d34 Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 15:06:08 -0600 Subject: [PATCH 14/45] Remove white space --- datajoint/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/s3.py b/datajoint/s3.py index 99f12a6c1..d991bcd77 100644 --- a/datajoint/s3.py +++ b/datajoint/s3.py @@ -78,7 +78,7 @@ def get_size(self, name): raise errors.MissingExternalFile from None else: raise e - + def remove_object(self, name): logger.debug('remove_object: {}:{}'.format(self.bucket, name)) try: From b541887789ca9820d752435cc93258ac849acbf0 Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 15:44:27 -0600 Subject: [PATCH 15/45] Remove additonal white space --- datajoint/s3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/s3.py b/datajoint/s3.py index d991bcd77..0e3efb2dd 100644 --- a/datajoint/s3.py +++ b/datajoint/s3.py @@ -78,7 +78,7 @@ def get_size(self, name): raise errors.MissingExternalFile from None else: raise e - + def remove_object(self, name): logger.debug('remove_object: {}:{}'.format(self.bucket, name)) try: From 82d23a7465d109abde77b34449a2210f1b9d3890 Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 16:21:27 -0600 Subject: [PATCH 16/45] Format line to fit is 95 char requirements... --- tests/test_schema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_schema.py b/tests/test_schema.py index c180e922d..2d868b60d 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -107,7 +107,9 @@ class Unit(dj.Part): test_schema.drop() def test_list_tables(): - assert(['#a', '#argmax_test', '#data_a', '#data_b', '#i_j', '#j_i', '#l', '#t_test_update', '__b', '__b__c', '__d', '__e', '__e__f', 'f', 'reserved_word'] == schema_simple.list_tables()) + assert(['#a', '#argmax_test', '#data_a', '#data_b', '#i_j', '#j_i', '#l',\ + '#t_test_update', '__b', '__b__c', '__d', '__e', '__e__f', 'f',\ + 'reserved_word'] == schema_simple.list_tables()) def test_schema_save(): From 9021ff9831a5bac11999ee19fc27752044cc656a Mon Sep 17 00:00:00 2001 From: synicix Date: Thu, 10 Dec 2020 16:27:33 -0600 Subject: [PATCH 17/45] Remove white space --- datajoint/schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index 5b03e2033..6dc3777ed 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -298,7 +298,7 @@ def list_tables(self): return [table_name for (table_name,) in self.connection.query(""" SELECT table_name FROM information_schema.tables WHERE table_schema = %s and table_name NOT LIKE '~%%'""", args=(self.database))] - + class VirtualModule(types.ModuleType): """ From 6f2f15b58f53affe25f3479da5d3ee993622ed85 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 10 Dec 2020 21:14:49 -0600 Subject: [PATCH 18/45] fix topological sorting for dependencies --- datajoint/dependencies.py | 30 ++++++++++++++++++++++++++---- datajoint/diagram.py | 26 +++----------------------- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/datajoint/dependencies.py b/datajoint/dependencies.py index 170eeac80..1142da768 100644 --- a/datajoint/dependencies.py +++ b/datajoint/dependencies.py @@ -1,9 +1,31 @@ import networkx as nx import itertools +import re from collections import defaultdict, OrderedDict from .errors import DataJointError +def unite_master_parts(lst): + """ + reorder a list of tables names so that part tables immediately follow their master tables without breaking + the topological order. + Without this correction, a simple topological sort may insert other descendants between master and parts + :example: + _unite(['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) + -> ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`'] + """ + if len(lst) <= 2: + return lst + el = lst.pop() + lst = unite_master_parts(lst) + match = re.match(r'(?P`\w+`.`\w+)__\w+`', el) + if match: + master = match.group('master') + if not lst[-1].startswith(master): + return unite_master_parts(lst[:-1] + [el, lst[-1]]) + return lst + [el] + + class Dependencies(nx.DiGraph): """ The graph of dependencies (foreign keys) between loaded tables. @@ -118,8 +140,8 @@ def descendants(self, full_table_name): self.load(force=False) nodes = self.subgraph( nx.algorithms.dag.descendants(self, full_table_name)) - return [full_table_name] + list( - nx.algorithms.dag.topological_sort(nodes)) + return unite_master_parts([full_table_name] + list( + nx.algorithms.dag.topological_sort(nodes))) def ancestors(self, full_table_name): """ @@ -129,5 +151,5 @@ def ancestors(self, full_table_name): self.load(force=False) nodes = self.subgraph( nx.algorithms.dag.ancestors(self, full_table_name)) - return [full_table_name] + list(reversed(list( - nx.algorithms.dag.topological_sort(nodes)))) + return unite_master_parts(list(reversed(list( + nx.algorithms.dag.topological_sort(nodes)))) + [full_table_name]) diff --git a/datajoint/diagram.py b/datajoint/diagram.py index 45fe95ea1..41ead0712 100644 --- a/datajoint/diagram.py +++ b/datajoint/diagram.py @@ -5,6 +5,7 @@ import warnings import inspect from .table import Table +from .dependencies import unite_master_parts try: from matplotlib import pyplot as plt @@ -155,29 +156,8 @@ def is_part(part, master): return self def topological_sort(self): - """ - :return: list of nodes in topological order - """ - - def _unite(lst): - """ - reorder list so that parts immediately follow their masters without breaking the topological order. - Without this correction, simple topological sort may insert other descendants between master and parts - :example: - _unite(['a', 'a__q', 'b', 'c', 'c__q', 'b__q', 'd', 'a__r']) - -> ['a', 'a__q', 'a__r', 'b', 'b__q', 'c', 'c__q', 'd'] - """ - if len(lst) <= 2: - return lst - el = lst.pop() - lst = _unite(lst) - if '__' in el: - master = el.split('__')[0] - if not lst[-1].startswith(master): - return _unite(lst[:-1] + [el, lst[-1]]) - return lst + [el] - - return _unite(list(nx.algorithms.dag.topological_sort( + """ :return: list of nodes in topological order """ + return unite_master_parts(list(nx.algorithms.dag.topological_sort( nx.DiGraph(self).subgraph(self.nodes_to_show)))) def __add__(self, arg): From 9021677142e04271c2219f0e13339a8944f50d79 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 10 Dec 2020 21:53:02 -0600 Subject: [PATCH 19/45] fix #821: the display of part tables in schema.save() --- datajoint/schemas.py | 2 +- datajoint/table.py | 1 - datajoint/user_tables.py | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index a708db3ce..ed436f294 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -258,7 +258,7 @@ def make_class_definition(table): class_name = table.split('.')[1].strip('`') indent = '' if tier == 'Part': - class_name = class_name.split('__')[1] + class_name = class_name.split('__')[-1] indent += ' ' class_name = to_camel_case(class_name) diff --git a/datajoint/table.py b/datajoint/table.py index 037d914ef..475c92f02 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -266,7 +266,6 @@ def make_row_to_insert(row): :param row: A tuple to insert :return: a dict with fields 'names', 'placeholders', 'values' """ - def make_placeholder(name, value): """ For a given attribute `name` with `value`, return its processed value or value placeholder diff --git a/datajoint/user_tables.py b/datajoint/user_tables.py index 3942264b5..c86926a90 100644 --- a/datajoint/user_tables.py +++ b/datajoint/user_tables.py @@ -14,6 +14,7 @@ supported_class_attrs = { 'key_source', 'describe', 'alter', 'heading', 'populate', 'progress', 'primary_key', 'proj', 'aggr', 'fetch', 'fetch1', 'head', 'tail', + 'descendants', 'ancestors', 'parents', 'children', 'insert', 'insert1', 'drop', 'drop_quick', 'delete', 'delete_quick'} From 53251234e001d33a1f9444f3267426c8a254b1b3 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 10 Dec 2020 22:09:34 -0600 Subject: [PATCH 20/45] add a test for unite_master_parts --- tests/test_dependencies.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index ed986b94c..6e32d4817 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -1,10 +1,16 @@ -from nose.tools import assert_true, assert_false, assert_equal, assert_list_equal, raises +from nose.tools import assert_true, raises, assert_equal from .schema import * +from datajoint.dependencies import unite_master_parts + + +def test_unite_master_parts(): + assert_equal(unite_master_parts( + ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']), + ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`']) def test_nullable_dependency(): """test nullable unique foreign key""" - # Thing C has a nullable dependency on B whose primary key is composite a = ThingA() b = ThingB() From 20e7284a43fb45ade011f9b0179d5f1c43b5c704 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 10 Dec 2020 22:23:37 -0600 Subject: [PATCH 21/45] update CHANGELOG --- CHANGELOG.md | 5 ++++- docs-parts/intro/Releases_lang1.rst | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8781a7911..f01887533 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,10 @@ ## Release notes -### 0.12.8 -- Nov 30, 2020 +### 0.12.8 -- Dec 14, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 +* Fix display of part tables in `schema.save` (#821). PR #833 +* Add `schema.list_tables` ( +* Fix minio new version regression. PR #847 ### 0.12.7 -- Oct 27, 2020 * Fix case sensitivity issues to adapt to MySQL 8+. PR #819 diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index fb42486d2..193745614 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,6 +1,9 @@ -0.12.8 -- Nov 30, 2020 +0.12.8 -- Dec 14, 2020 --------------------- * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 +* Fix display of part tables in `schema.save` (#821). PR #833 +* Add `schema.list_tables`. PR #844 +* Fix minio new version regression. PR #847 0.12.7 -- Oct 27, 2020 ---------------------- From 2f989647e9956bc62dcdec4538c0c7a225934bfa Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 10 Dec 2020 22:30:25 -0600 Subject: [PATCH 22/45] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f01887533..7044ea403 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ### 0.12.8 -- Dec 14, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Fix display of part tables in `schema.save` (#821). PR #833 -* Add `schema.list_tables` ( +* Add `schema.list_tables`. PR #844 * Fix minio new version regression. PR #847 ### 0.12.7 -- Oct 27, 2020 From 7f3ed71ee3638cbc4621e408501b590a7eabcf2c Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 10 Dec 2020 23:59:45 -0600 Subject: [PATCH 23/45] fix the printout of foreign keys to part tables schema.save() --- datajoint/schemas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index 238b241a4..046f20767 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -247,7 +247,6 @@ def save(self, python_filename=None): This method is in preparation for a future release and is not officially supported. :return: a string containing the body of a complete Python module defining this schema. """ - module_count = itertools.count() # add virtual modules for referenced modules with names vmod0, vmod1, ... module_lookup = collections.defaultdict(lambda: 'vmod' + str(next(module_count))) @@ -263,10 +262,11 @@ def make_class_definition(table): class_name = to_camel_case(class_name) def repl(s): - d, tab = s.group(1), s.group(2) - return ('' if d == db else (module_lookup[d]+'.')) + to_camel_case(tab) + d, tabs = s.group(1), s.group(2) + return ('' if d == db else (module_lookup[d]+'.')) + '.'.join( + to_camel_case(tab) for tab in tabs.lstrip('__').split('__')) - return ('' if tier == 'Part' else '@schema\n') + \ + return ('' if tier == 'Part' else '\n@schema\n') + \ '{indent}class {class_name}(dj.{tier}):\n{indent} definition = """\n{indent} {defi}"""'.format( class_name=class_name, indent=indent, @@ -276,8 +276,8 @@ def repl(s): FreeTable(self.connection, table).describe(printout=False).replace('\n', '\n ' + indent))) diagram = Diagram(self) - body = '\n\n\n'.join(make_class_definition(table) for table in diagram.topological_sort()) - python_code = '\n\n\n'.join(( + body = '\n\n'.join(make_class_definition(table) for table in diagram.topological_sort()) + python_code = '\n\n'.join(( '"""This module was auto-generated by datajoint from an existing schema"""', "import datajoint as dj\n\nschema = dj.Schema('{db}')".format(db=db), '\n'.join("{module} = dj.VirtualModule('{module}', '{schema_name}')".format(module=v, schema_name=k) From 25981a16fc1de4a931a15b4160ba420a6ea7d680 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 14 Dec 2020 15:01:14 -0600 Subject: [PATCH 24/45] bugfix in topological sorting of table names for schema.save() --- datajoint/dependencies.py | 33 +++++++++++++++++++-------------- tests/test_dependencies.py | 20 ++++++++++++++++++-- 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/datajoint/dependencies.py b/datajoint/dependencies.py index 1142da768..16dfc8c56 100644 --- a/datajoint/dependencies.py +++ b/datajoint/dependencies.py @@ -11,19 +11,24 @@ def unite_master_parts(lst): the topological order. Without this correction, a simple topological sort may insert other descendants between master and parts :example: - _unite(['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) - -> ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`'] + unit_master_parts( + ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) -> + ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`'] """ - if len(lst) <= 2: - return lst - el = lst.pop() - lst = unite_master_parts(lst) - match = re.match(r'(?P`\w+`.`\w+)__\w+`', el) - if match: - master = match.group('master') - if not lst[-1].startswith(master): - return unite_master_parts(lst[:-1] + [el, lst[-1]]) - return lst + [el] + for i in range(2, len(lst)): + name = lst[i] + match = re.match(r'(?P`\w+`.`\w+)__\w+`', name) + if match: # name is a part table + master = match.group('master') + for j in range(i-1, -1, -1): + if lst[j] == master + '`' or lst[j].startswith(master + '__'): + # move from the ith position to the (j+1)th position + del lst[i] + lst = lst[:j+1] + [name] + lst[j+1:] + break + else: + raise DataJointError("Found a part table {name} without its master table.".format(name=name)) + return lst class Dependencies(nx.DiGraph): @@ -151,5 +156,5 @@ def ancestors(self, full_table_name): self.load(force=False) nodes = self.subgraph( nx.algorithms.dag.ancestors(self, full_table_name)) - return unite_master_parts(list(reversed(list( - nx.algorithms.dag.topological_sort(nodes)))) + [full_table_name]) + return list(reversed(unite_master_parts(list( + nx.algorithms.dag.topological_sort(nodes)) + [full_table_name]))) diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index 6e32d4817..2b7687bb5 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -1,12 +1,28 @@ -from nose.tools import assert_true, raises, assert_equal +from nose.tools import assert_true, raises, assert_list_equal from .schema import * from datajoint.dependencies import unite_master_parts def test_unite_master_parts(): - assert_equal(unite_master_parts( + assert_list_equal(unite_master_parts( ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']), ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`']) + assert_list_equal(unite_master_parts( + [ + '`cells`.`cell_analysis_method`', + '`cells`.`cell_analysis_method_task_type`', + '`cells`.`cell_analysis_method_users`', + '`cells`.`favorite_selection`', + '`cells`.`cell_analysis_method__cell_selection_params`', + '`cells`.`cell_analysis_method__field_detect_params`']), + [ + '`cells`.`cell_analysis_method`', + '`cells`.`cell_analysis_method__cell_selection_params`', + '`cells`.`cell_analysis_method__field_detect_params`', + '`cells`.`cell_analysis_method_task_type`', + '`cells`.`cell_analysis_method_users`', + '`cells`.`favorite_selection`' + ]) def test_nullable_dependency(): From fb4e71ee922c0f95603b8295eeb1006176f804be Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 14 Dec 2020 15:25:44 -0600 Subject: [PATCH 25/45] improve docstring in dependencies.unite_master_parts --- datajoint/dependencies.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/datajoint/dependencies.py b/datajoint/dependencies.py index 16dfc8c56..51d997cfb 100644 --- a/datajoint/dependencies.py +++ b/datajoint/dependencies.py @@ -7,11 +7,12 @@ def unite_master_parts(lst): """ - reorder a list of tables names so that part tables immediately follow their master tables without breaking + re-order a list of table names so that part tables immediately follow their master tables without breaking the topological order. - Without this correction, a simple topological sort may insert other descendants between master and parts + Without this correction, a simple topological sort may insert other descendants between master and parts. + The input list must be topologically sorted. :example: - unit_master_parts( + unite_master_parts( ['`s`.`a`', '`s`.`a__q`', '`s`.`b`', '`s`.`c`', '`s`.`c__q`', '`s`.`b__q`', '`s`.`d`', '`s`.`a__r`']) -> ['`s`.`a`', '`s`.`a__q`', '`s`.`a__r`', '`s`.`b`', '`s`.`b__q`', '`s`.`c`', '`s`.`c__q`', '`s`.`d`'] """ From 45c1540b19a923d55f3c74508c7b786decb79e20 Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 15 Dec 2020 15:44:14 -0600 Subject: [PATCH 26/45] Add template for bug reports and new feature requests. --- .github/ISSUE_TEMPLATE/bug_report.md | 37 ++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 46 +++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..d62a4a6ec --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,37 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: 'bug, awaiting-triage' +assignees: '' + +--- + +## Bug Report + +### Description +A clear and concise description of what is the overall operation that is intended to be performed that resulted in an error. + +### Reproducibility +Include: +- OS (WIN | MACOS | Linux) +- Python Version OR MATLAB Version +- MySQL Version +- MySQL Deployment Strategy (local-native | local-docker | remote) +- DataJoint Version +- Minimum number of steps to reliably reproduce the issue +- Complete error stack as a result of evaluating the above steps + +### Expected Behavior +A clear and concise description of what you expected to happen. + +### Screenshots (Optional) +If applicable, add screenshots to help explain your problem. + +### Additional Research and Context (Optional) +Add any additional research or context that was conducted in creating this report. + +For example: +- Related GitHub issues and PR's either within this repository or in other relevant repositories. +- Specific links to specific lines or a focus within source code. +- Relevant summary of Maintainers development meetings, milestones, projects, etc. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 000000000..49b7fa295 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,46 @@ +--- +name: Feature request +about: Suggest an idea for a new feature +title: '' +labels: 'enhancement, awaiting-triage' +assignees: '' + +--- + +## Feature Request + +### Problem +A clear and concise description how this idea has manifested and the context. Elaborate on the need for this feature and/or what could be improved. Ex. I'm always frustrated when [...] + +### Requirements +A clear and concise description of the requirements to satisfy the new feature. Detail what you expect from a successful implementation of the feature. Ex. When using this feature, it should [...] + +### Justification +Provide the key benefits in making this a supported feature. Ex. Adding support for this feature would ensure [...] + +### Alternative Considerations +Do you currently have a work-around for this? Provide any alternative solutions or features you've considered. + +### Related Errors (Optional) +Add any errors as a direct result of not exposing this feature. + +Please include steps to reproduce provided errors as follows: +- OS (WIN | MACOS | Linux) +- Python Version OR MATLAB Version +- MySQL Version +- MySQL Deployment Strategy (local-native | local-docker | remote) +- DataJoint Version +- Minimum number of steps to reliably reproduce the issue +- Complete error stack as a result of evaluating the above steps + +### Screenshots (Optional) +If applicable, add screenshots to help explain your feature. + +### Additional Research and Context (Optional) +Add any additional research or context that was conducted in creating this feature request. + +For example: +- Related GitHub issues and PR's either within this repository or in other relevant repositories. +- Specific links to specific line or focus within source code. +- Relevant summary of Maintainers development meetings, milestones, projects, etc. +- Any additional supplemental web references or links that would further justify this feature request. From f36dc05894012f7968c7e9ec6f7c00fcd265966b Mon Sep 17 00:00:00 2001 From: guzman-raphael Date: Tue, 15 Dec 2020 18:25:14 -0600 Subject: [PATCH 27/45] Remove optional reference since there is no means to enforce issue structure; it is simply a guide. --- .github/ISSUE_TEMPLATE/bug_report.md | 4 ++-- .github/ISSUE_TEMPLATE/feature_request.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index d62a4a6ec..d386d4d4d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -25,10 +25,10 @@ Include: ### Expected Behavior A clear and concise description of what you expected to happen. -### Screenshots (Optional) +### Screenshots If applicable, add screenshots to help explain your problem. -### Additional Research and Context (Optional) +### Additional Research and Context Add any additional research or context that was conducted in creating this report. For example: diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 49b7fa295..4d4eeffd9 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -21,7 +21,7 @@ Provide the key benefits in making this a supported feature. Ex. Adding support ### Alternative Considerations Do you currently have a work-around for this? Provide any alternative solutions or features you've considered. -### Related Errors (Optional) +### Related Errors Add any errors as a direct result of not exposing this feature. Please include steps to reproduce provided errors as follows: @@ -33,10 +33,10 @@ Please include steps to reproduce provided errors as follows: - Minimum number of steps to reliably reproduce the issue - Complete error stack as a result of evaluating the above steps -### Screenshots (Optional) +### Screenshots If applicable, add screenshots to help explain your feature. -### Additional Research and Context (Optional) +### Additional Research and Context Add any additional research or context that was conducted in creating this feature request. For example: From 362d89a0acb15839c0540c9f780fc3763cf8c223 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 18 Dec 2020 09:31:16 -0600 Subject: [PATCH 28/45] Update CHANGELOG.md Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7044ea403..051a9e868 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ### 0.12.8 -- Dec 14, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Fix display of part tables in `schema.save` (#821). PR #833 -* Add `schema.list_tables`. PR #844 +* Add `schema.list_tables`. (#838) PR #844 * Fix minio new version regression. PR #847 ### 0.12.7 -- Oct 27, 2020 From fbce1a44d4f5de047e8f18ce2e931480f7c43d93 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 18 Dec 2020 09:31:30 -0600 Subject: [PATCH 29/45] Update CHANGELOG.md Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 051a9e868..15b2e8650 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ ### 0.12.8 -- Dec 14, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 -* Fix display of part tables in `schema.save` (#821). PR #833 +* Load dependencies before querying dependencies. (#179) PR #833 +* Fix display of part tables in `schema.save`. (#821) PR #833 * Add `schema.list_tables`. (#838) PR #844 * Fix minio new version regression. PR #847 From 8ac9fffa243dd2e7f054dd712f2fa89b2540b17b Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 18 Dec 2020 09:33:37 -0600 Subject: [PATCH 30/45] Update CHANGELOG.md Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15b2e8650..d572aece7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## Release notes -### 0.12.8 -- Dec 14, 2020 +### 0.12.8 -- Dec 21, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Load dependencies before querying dependencies. (#179) PR #833 * Fix display of part tables in `schema.save`. (#821) PR #833 From e956ae3861e79204c64d38a8bcd35ed92e07a3b7 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 18 Dec 2020 09:35:20 -0600 Subject: [PATCH 31/45] Update datajoint/dependencies.py Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- datajoint/dependencies.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datajoint/dependencies.py b/datajoint/dependencies.py index 51d997cfb..a4e1afb0c 100644 --- a/datajoint/dependencies.py +++ b/datajoint/dependencies.py @@ -24,8 +24,7 @@ def unite_master_parts(lst): for j in range(i-1, -1, -1): if lst[j] == master + '`' or lst[j].startswith(master + '__'): # move from the ith position to the (j+1)th position - del lst[i] - lst = lst[:j+1] + [name] + lst[j+1:] + lst[j+1:i+1] = [name] + lst[j+1:i] break else: raise DataJointError("Found a part table {name} without its master table.".format(name=name)) From f2c8a535ab23fa4184a1503363bb6d049e90bd65 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 18 Dec 2020 09:36:07 -0600 Subject: [PATCH 32/45] Include `parts` in the list of class properties in UserTable Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- datajoint/user_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/user_tables.py b/datajoint/user_tables.py index c86926a90..cdf969902 100644 --- a/datajoint/user_tables.py +++ b/datajoint/user_tables.py @@ -14,7 +14,7 @@ supported_class_attrs = { 'key_source', 'describe', 'alter', 'heading', 'populate', 'progress', 'primary_key', 'proj', 'aggr', 'fetch', 'fetch1', 'head', 'tail', - 'descendants', 'ancestors', 'parents', 'children', + 'descendants', 'parts', 'ancestors', 'parents', 'children', 'insert', 'insert1', 'drop', 'drop_quick', 'delete', 'delete_quick'} From 2730055de2d4dfe9db2f63986a4c1fbccc719209 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 18 Dec 2020 09:37:17 -0600 Subject: [PATCH 33/45] Update CHANGELOG.md Co-authored-by: Raphael Guzman <38401847+guzman-raphael@users.noreply.github.com> --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d572aece7..72cb360bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ * Fix display of part tables in `schema.save`. (#821) PR #833 * Add `schema.list_tables`. (#838) PR #844 * Fix minio new version regression. PR #847 + * Add more S3 logging for debugging. (#831) PR #832 + * Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 ### 0.12.7 -- Oct 27, 2020 * Fix case sensitivity issues to adapt to MySQL 8+. PR #819 From 4ff94663afbbb4775f90e99447b915b448654213 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 21 Dec 2020 10:16:28 -0600 Subject: [PATCH 34/45] minor style --- datajoint/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datajoint/utils.py b/datajoint/utils.py index 8ad6f4b48..a20f26059 100644 --- a/datajoint/utils.py +++ b/datajoint/utils.py @@ -45,13 +45,13 @@ def to_camel_case(s): :param s: string in under_score notation :returns: string in CamelCase notation Example: - >>> to_camel_case("table_name") # yields "TableName" + >>> to_camel_case("table_name") # returns "TableName" """ def to_upper(match): return match.group(0)[-1].upper() - return re.sub('(^|[_\W])+[a-zA-Z]', to_upper, s) + return re.sub(r'(^|[_\W])+[a-zA-Z]', to_upper, s) def from_camel_case(s): From ae767b7b92aac05508c177614b7e18de5e58a917 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Mon, 21 Dec 2020 10:37:30 -0600 Subject: [PATCH 35/45] update release notes in documentation --- CHANGELOG.md | 6 +++--- datajoint/table.py | 6 ++---- docs-parts/intro/Releases_lang1.rst | 9 ++++++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72cb360bf..c0e096efb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,13 @@ ## Release notes -### 0.12.8 -- Dec 21, 2020 +### 0.12.8 -- Dec 22, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Load dependencies before querying dependencies. (#179) PR #833 * Fix display of part tables in `schema.save`. (#821) PR #833 * Add `schema.list_tables`. (#838) PR #844 * Fix minio new version regression. PR #847 - * Add more S3 logging for debugging. (#831) PR #832 - * Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 +* Add more S3 logging for debugging. (#831) PR #832 +* Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 ### 0.12.7 -- Oct 27, 2020 * Fix case sensitivity issues to adapt to MySQL 8+. PR #819 diff --git a/datajoint/table.py b/datajoint/table.py index 475c92f02..7eca61113 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -161,10 +161,8 @@ def parts(self, as_objects=False): return nodes def ancestors(self, as_objects=False): - nodes = [node for node in self.connection.dependencies.ancestors(self.full_table_name) if not node.isdigit()] - if as_objects: - nodes = [FreeTable(self.connection, c) for c in nodes] - return nodes + return [FreeTable(self.connection, node) if as_objects else node + for node in self.connection.dependencies.ancestors(self.full_table_name) if not node.isdigit()] @property def is_declared(self): diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 193745614..1db08b524 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,9 +1,12 @@ -0.12.8 -- Dec 14, 2020 +0.12.8 -- Dec 22, 2020 --------------------- * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 -* Fix display of part tables in `schema.save` (#821). PR #833 -* Add `schema.list_tables`. PR #844 +* Load dependencies before querying dependencies. (#179) PR #833 +* Fix display of part tables in `schema.save`. (#821) PR #833 +* Add `schema.list_tables`. (#838) PR #844 * Fix minio new version regression. PR #847 +* Add more S3 logging for debugging. (#831) PR #832 +* Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 0.12.7 -- Oct 27, 2020 ---------------------- From 137de92d10b392d4157ad3bf564fcfeefc85cc58 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 24 Dec 2020 08:33:19 -0600 Subject: [PATCH 36/45] improve table.parents and table.children with the option to include foreign key information --- datajoint/autopopulate.py | 28 +++++------- datajoint/table.py | 94 ++++++++++++++++++++++---------------- tests/test_fetch.py | 2 +- tests/test_foreign_keys.py | 2 +- 4 files changed, 67 insertions(+), 59 deletions(-) diff --git a/datajoint/autopopulate.py b/datajoint/autopopulate.py index 22f945bd7..d851fe9f0 100644 --- a/datajoint/autopopulate.py +++ b/datajoint/autopopulate.py @@ -32,25 +32,19 @@ def key_source(self): The default value is the join of the parent relations. Users may override to change the granularity or the scope of populate() calls. """ - def parent_gen(self): - if self.target.full_table_name not in self.connection.dependencies: - self.connection.dependencies.load() - for parent_name, fk_props in self.target.parents(primary=True).items(): - if not parent_name.isdigit(): # simple foreign key - yield FreeTable(self.connection, parent_name).proj() - else: - grandparent = list(self.connection.dependencies.in_edges(parent_name))[0][0] - yield FreeTable(self.connection, grandparent).proj(**{ - attr: ref for attr, ref in fk_props['attr_map'].items() if ref != attr}) + def _rename_attributes(table, props): + return (table.proj( + **{attr: ref for attr, ref in props['attr_map'].items() if attr != ref}) + if props['aliased'] else table) if self._key_source is None: - parents = parent_gen(self) - try: - self._key_source = next(parents) - except StopIteration: - raise DataJointError('A relation must have primary dependencies for auto-populate to work') from None - for q in parents: - self._key_source *= q + parents = self.target.parents(primary=True, as_objects=True, foreign_key_info=True) + if not parents: + raise DataJointError( + 'A relation must have primary dependencies for auto-populate to work') from None + self._key_source = _rename_attributes(*parents[0]) + for q in parents[1:]: + self._key_source *= _rename_attributes(*q) return self._key_source def make(self, key): diff --git a/datajoint/table.py b/datajoint/table.py index 7eca61113..72c47a655 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -11,7 +11,7 @@ from .declare import declare, alter from .expression import QueryExpression from . import blob -from .utils import user_choice +from .utils import user_choice, OrderedDict from .heading import Heading from .errors import DuplicateError, AccessError, DataJointError, UnknownAttributeError from .version import __version__ as version @@ -40,8 +40,7 @@ class Table(QueryExpression): @property def heading(self): """ - Returns the table heading. If the table is not declared, attempts to declare it and return heading. - :return: table heading + :return: table heading. If the table is not declared, attempts to declare it first. """ if self._heading is None: self._heading = Heading() # instance-level heading @@ -53,7 +52,8 @@ def heading(self): def declare(self, context=None): """ Declare the table in the schema based on self.definition. - :param context: the context for foreign key resolution. If None, foreign keys are not allowed. + :param context: the context for foreign key resolution. If None, foreign keys are + not allowed. """ if self.connection.in_transaction: raise DataJointError('Cannot declare new tables inside a transaction, ' @@ -116,38 +116,59 @@ def get_select_fields(self, select_fields=None): """ return '*' if select_fields is None else self.heading.project(select_fields).as_sql - def parents(self, primary=None, as_objects=False): + def parents(self, primary=None, as_objects=False, foreign_key_info=False): """ :param primary: if None, then all parents are returned. If True, then only foreign keys composed of - primary key attributes are considered. If False, the only foreign keys including at least one non-primary - attribute are considered. - :param as_objects: if False (default), the output is a dict describing the foreign keys. If True, return table objects. - :return: dict of tables referenced with self's foreign keys or list of table objects if as_objects=True - """ - parents = self.connection.dependencies.parents(self.full_table_name, primary) + primary key attributes are considered. If False, return foreign keys including at least one + secondary attribute. + :param as_objects: if False, return table names. If True, return table objects. + :param foreign_key_info: if True, each element in result also includes foreign key info. + :return: list of parents as table names or table objects + with (optional) foreign key information. + """ + get_edge = self.connection.dependencies.parents + nodes = [next(iter(get_edge(name).items())) if name.isdigit() else (name, props) + for name, props in get_edge(self.full_table_name, primary).items()] if as_objects: - parents = [FreeTable(self.connection, c) for c in parents] - return parents + nodes = [(FreeTable(self.connection, name), props) for name, props in nodes] + if not foreign_key_info: + nodes = [name for name, props in nodes] + return nodes - def children(self, primary=None, as_objects=False): + def children(self, primary=None, as_objects=False, foreign_key_info=False): """ :param primary: if None, then all children are returned. If True, then only foreign keys composed of - primary key attributes are considered. If False, the only foreign keys including at least one non-primary - attribute are considered. - :param as_objects: if False (default), the output is a dict describing the foreign keys. If True, return table objects. - :return: dict of tables with foreign keys referencing self or list of table objects if as_objects=True - """ - nodes = dict((next(iter(self.connection.dependencies.children(k).items())) if k.isdigit() else (k, v)) - for k, v in self.connection.dependencies.children(self.full_table_name, primary).items()) + primary key attributes are considered. If False, return foreign keys including at least one + secondary attribute. + :param as_objects: if False, return table names. If True, return table objects. + :param foreign_key_info: if True, each element in result also includes foreign key info. + :return: list of children as table names or table objects + with (optional) foreign key information. + """ + get_edge = self.connection.dependencies.children + nodes = [next(iter(get_edge(name).items())) if name.isdigit() else (name, props) + for name, props in get_edge(self.full_table_name, primary).items()] if as_objects: - nodes = [FreeTable(self.connection, c) for c in nodes] + nodes = [(FreeTable(self.connection, name), props) for name, props in nodes] + if not foreign_key_info: + nodes = [name for name, props in nodes] return nodes def descendants(self, as_objects=False): - nodes = [node for node in self.connection.dependencies.descendants(self.full_table_name) if not node.isdigit()] - if as_objects: - nodes = [FreeTable(self.connection, c) for c in nodes] - return nodes + """ + :param as_objects: False - a list of table names; True - a list of table objects. + :return: list of tables descendants in topological order. + """ + return [FreeTable(self.connection, node) if as_objects else node + for node in self.connection.dependencies.descendants(self.full_table_name) if not node.isdigit()] + + def ancestors(self, as_objects=False): + """ + :param as_objects: False - a list of table names; True - a list of table objects. + :return: list of tables ancestors in topological order. + """ + return [FreeTable(self.connection, node) if as_objects else node + for node in self.connection.dependencies.ancestors(self.full_table_name) if not node.isdigit()] def parts(self, as_objects=False): """ @@ -156,13 +177,7 @@ def parts(self, as_objects=False): """ nodes = [node for node in self.connection.dependencies.nodes if not node.isdigit() and node.startswith(self.full_table_name[:-1] + '__')] - if as_objects: - nodes = [FreeTable(self.connection, c) for c in nodes] - return nodes - - def ancestors(self, as_objects=False): - return [FreeTable(self.connection, node) if as_objects else node - for node in self.connection.dependencies.ancestors(self.full_table_name) if not node.isdigit()] + return [FreeTable(self.connection, c) for c in nodes] if as_objects else nodes @property def is_declared(self): @@ -525,7 +540,7 @@ def describe(self, context=None, printout=True): del frame if self.full_table_name not in self.connection.dependencies: self.connection.dependencies.load() - parents = self.parents() + parents = self.parents(foreign_key_info=True) in_key = True definition = ('# ' + self.heading.table_info['comment'] + '\n' if self.heading.table_info['comment'] else '') @@ -538,11 +553,10 @@ def describe(self, context=None, printout=True): in_key = False attributes_thus_far.add(attr.name) do_include = True - for parent_name, fk_props in list(parents.items()): # need list() to force a copy + for parent_name, fk_props in parents: if attr.name in fk_props['attr_map']: do_include = False if attributes_thus_far.issuperset(fk_props['attr_map']): - parents.pop(parent_name) # foreign key properties try: index_props = indexes.pop(tuple(fk_props['attr_map'])) @@ -552,19 +566,19 @@ def describe(self, context=None, printout=True): index_props = [k for k, v in index_props.items() if v] index_props = ' [{}]'.format(', '.join(index_props)) if index_props else '' - if not parent_name.isdigit(): + if not fk_props['aliased']: # simple foreign key definition += '->{props} {class_name}\n'.format( props=index_props, class_name=lookup_class_name(parent_name, context) or parent_name) else: # projected foreign key - parent_name = list(self.connection.dependencies.in_edges(parent_name))[0][0] - lst = [(attr, ref) for attr, ref in fk_props['attr_map'].items() if ref != attr] definition += '->{props} {class_name}.proj({proj_list})\n'.format( props=index_props, class_name=lookup_class_name(parent_name, context) or parent_name, - proj_list=','.join('{}="{}"'.format(a, b) for a, b in lst)) + proj_list=','.join( + '{}="{}"'.format(attr, ref) + for attr, ref in fk_props['attr_map'].items() if ref != attr)) attributes_declared.update(fk_props['attr_map']) if do_include: attributes_declared.add(attr.name) diff --git a/tests/test_fetch.py b/tests/test_fetch.py index 8c8d50549..c72c3c896 100644 --- a/tests/test_fetch.py +++ b/tests/test_fetch.py @@ -201,7 +201,7 @@ def test_fetch1_step3(self): self.lang.fetch1('name') def test_decimal(self): - """Tests that decimal fields are correctly fetched and used in restrictions, see issue #334""" + """ Tests that decimal fields are correctly fetched and used in restrictions, see issue #334""" rel = schema.DecimalPrimaryKey() rel.insert1([decimal.Decimal('3.1415926')]) keys = rel.fetch() diff --git a/tests/test_foreign_keys.py b/tests/test_foreign_keys.py index 719ae7129..fbf023bb0 100644 --- a/tests/test_foreign_keys.py +++ b/tests/test_foreign_keys.py @@ -1,4 +1,4 @@ -from nose.tools import assert_equal, assert_false, assert_true, raises +from nose.tools import assert_equal, assert_false, assert_true from datajoint.declare import declare from . import schema_advanced From 2523cc6072fdda951857970e95eeae2534241417 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 24 Dec 2020 09:42:51 -0600 Subject: [PATCH 37/45] fix the requirements file --- requirements.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8f758659e..628e14e6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,10 +6,5 @@ pandas tqdm networkx pydot -<<<<<<< HEAD -minio -matplotlib -======= minio>=7.0.0 -matplotlib ->>>>>>> master +matplotlib \ No newline at end of file From 2c9071ec27a9f2543ae6dad79ba61ebe5989fb1d Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 24 Dec 2020 17:36:06 -0600 Subject: [PATCH 38/45] minor PEP8 styling --- datajoint/schemas.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/datajoint/schemas.py b/datajoint/schemas.py index 106c9c21b..04fee44a1 100644 --- a/datajoint/schemas.py +++ b/datajoint/schemas.py @@ -196,9 +196,9 @@ def _decorate_table(self, table_class, context, assert_declared=False): contents = list(instance.contents) if len(contents) > len(instance): if instance.heading.has_autoincrement: - warnings.warn( - 'Contents has changed but cannot be inserted because {table} has autoincrement.'.format( - table=instance.__class__.__name__)) + warnings.warn(('Contents has changed but cannot be inserted because ' + '{table} has autoincrement.').format( + table=instance.__class__.__name__)) else: instance.insert(contents, skip_duplicates=True) @@ -338,15 +338,16 @@ def replace(s): return ('' if d == db else (module_lookup[d]+'.')) + '.'.join( to_camel_case(tab) for tab in tabs.lstrip('__').split('__')) - return ('' if tier == 'Part' else '\n@schema\n') + \ - '{indent}class {class_name}(dj.{tier}):\n{indent} definition = """\n{indent} {defi}"""'.format( + return ('' if tier == 'Part' else '\n@schema\n') + ( + '{indent}class {class_name}(dj.{tier}):\n' + '{indent} definition = """\n' + '{indent} {defi}"""').format( class_name=class_name, indent=indent, tier=tier, - defi=re.sub( - r'`([^`]+)`.`([^`]+)`', replace, - FreeTable(self.connection, table).describe(printout=False).replace( - '\n', '\n ' + indent))) + defi=re.sub(r'`([^`]+)`.`([^`]+)`', replace, + FreeTable(self.connection, table).describe(printout=False) + ).replace('\n', '\n ' + indent)) diagram = Diagram(self) body = '\n\n'.join(make_class_definition(table) for table in diagram.topological_sort()) From 69a52104fb7cfd538a194a63425b6d7f65ed1c2a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 24 Dec 2020 18:09:43 -0600 Subject: [PATCH 39/45] fix release notes --- CHANGELOG.md | 4 ---- docs-parts/intro/Releases_lang1.rst | 21 +-------------------- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ceedef8f..50d2676b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,6 @@ * Python datatypes are now enabled by default in blobs (#761). PR #785 * Added permissive join and restriction operators `@` and `^` (#785) PR #754 -### 0.12.8 -- Nov 30, 2020 -* table.children, .parents, .descendents, and ancestors optionally return queryable objects. PR #833 -======= ### 0.13.0 -- January 11, 2020 * Reimplement query parsing, fixing issues (#386, #449, #450, #484). PR #754 * Add table method `.update1` to update a row in the table with new values @@ -25,7 +22,6 @@ * Fix minio new version regression. PR #847 * Add more S3 logging for debugging. (#831) PR #832 * Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 ->>>>>>> aggr_fix ### 0.12.7 -- Oct 27, 2020 * Fix case sensitivity issues to adapt to MySQL 8+. PR #819 diff --git a/docs-parts/intro/Releases_lang1.rst b/docs-parts/intro/Releases_lang1.rst index 0aed2993a..d741434dd 100644 --- a/docs-parts/intro/Releases_lang1.rst +++ b/docs-parts/intro/Releases_lang1.rst @@ -1,5 +1,4 @@ -<<<<<<< HEAD -0.13.0 -- Dec 15, 2020 +0.13.0 -- Jan 11, 2020 ---------------------- * Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484). PR #754 * Re-implement cascading deletes for better performance. PR #839. @@ -7,23 +6,6 @@ * Python datatypes are now enabled by default in blobs (#761). PR #785 * Added permissive join and restriction operators `@` and `^` (#785) PR #754 -0.12.8 -- Nov 30, 2020 ----------------------- -* table.children, .parents, .descendents, and ancestors optionally return queryable objects. PR #833 - -0.12.7 -- Oct 27, 2020 ----------------------- -* Fix case sensitivity issues to adapt to MySQL 8+. PR #819 -* Fix pymysql regression bug (#814) PR #816 -* Adapted attribute types now have dtype=object in all recarray results. PR #811 -======= -0.13.0 -- Jan 11, 2021 ---------------------- -* Reimplement query parsing, fixing issues (#386, #449, #450, #484). PR #754 -* Add table method `.update1` to update a row in the table with new values -* Python datatypes are now enabled by default in blobs (#761). PR #785 -* Added permissive join and restriction operators `@` and `^` (#785) PR #754 - 0.12.8 -- Dec 22, 2020 ---------------------- * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 @@ -33,7 +15,6 @@ * Fix minio new version regression. PR #847 * Add more S3 logging for debugging. (#831) PR #832 * Convert testing framework from TravisCI to GitHub Actions (#841) PR #840 ->>>>>>> aggr_fix 0.12.7 -- Oct 27, 2020 ---------------------- From 4e3d4d7b781162b33143019313fcb6114b01d7a6 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 24 Dec 2020 18:16:18 -0600 Subject: [PATCH 40/45] fix CHANGELOG --- CHANGELOG.md | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50d2676b1..4b276ed07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,19 +1,12 @@ ## Release notes -<<<<<<< HEAD -### 0.13.0 -- Dec 15, 2020 +### 0.13.0 -- Jan 11, 2020 * Re-implement query transpilation into SQL, fixing issues (#386, #449, #450, #484). PR #754 * Re-implement cascading deletes for better performance. PR #839. * Add table method `.update1` to update an existing row in its table. * Python datatypes are now enabled by default in blobs (#761). PR #785 * Added permissive join and restriction operators `@` and `^` (#785) PR #754 -### 0.13.0 -- January 11, 2020 -* Reimplement query parsing, fixing issues (#386, #449, #450, #484). PR #754 -* Add table method `.update1` to update a row in the table with new values -* Python datatypes are now enabled by default in blobs (#761). PR #785 -* Added permissive join and restriction operators `@` and `^` (#785) PR #754 - ### 0.12.8 -- Dec 22, 2020 * table.children, .parents, .descendents, and ancestors can return queryable objects. PR #833 * Load dependencies before querying dependencies. (#179) PR #833 From 403c364f38bb5d5e88a09cc80e2dff8cfb94b148 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 1 Jan 2021 14:01:53 -0600 Subject: [PATCH 41/45] fix relational restrictions applied before group by --- datajoint/expression.py | 102 ++++++++++++++++++++++++--------------- datajoint/heading.py | 9 ++-- datajoint/table.py | 4 +- tests/test_university.py | 11 ++++- 4 files changed, 81 insertions(+), 45 deletions(-) diff --git a/datajoint/expression.py b/datajoint/expression.py index 13caefea3..1b73827b2 100644 --- a/datajoint/expression.py +++ b/datajoint/expression.py @@ -93,9 +93,9 @@ def from_clause(self): using="" if not a else " USING (%s)" % ",".join('`%s`' % _ for _ in a)) return clause - @property def where_clause(self): - return '' if not self.restriction else ' WHERE(%s)' % ')AND('.join(str(s) for s in self.restriction) + return '' if not self.restriction else ' WHERE(%s)' % ')AND('.join( + str(s) for s in self.restriction) def make_sql(self, fields=None): """ @@ -106,7 +106,7 @@ def make_sql(self, fields=None): return 'SELECT {distinct}{fields} FROM {from_}{where}'.format( distinct="DISTINCT " if distinct else "", fields=self.heading.as_sql(fields or self.heading.names), - from_=self.from_clause(), where=self.where_clause) + from_=self.from_clause(), where=self.where_clause()) # --------- query operators ----------- def make_subquery(self): @@ -180,7 +180,7 @@ def restrict(self, restriction): result = self.make_subquery() else: result = copy.copy(self) - result._restriction = AndList(self.restriction) # make a copy to protect the original + result._restriction = AndList(self.restriction) # copy to preserve the original result.restriction.append(new_condition) result.restriction_attributes.update(attributes) return result @@ -421,25 +421,27 @@ def tail(self, limit=25, **fetch_kwargs): def __len__(self): """ :return: number of elements in the result set """ - what = '*' if set(self.heading.names) != set(self.primary_key) else 'DISTINCT %s' % ','.join( - (self.heading[k].attribute_expression or '`%s`' % k for k in self.primary_key)) return self.connection.query( - 'SELECT count({what}) FROM {from_}{where}'.format( - what=what, + 'SELECT count(DISTINCT {fields}) FROM {from_}{where}'.format( + fields=self.heading.as_sql(self.primary_key, include_aliases=False), from_=self.from_clause(), - where=self.where_clause)).fetchone()[0] + where=self.where_clause())).fetchone()[0] def __bool__(self): """ - :return: True if the result is not empty. Equivalent to len(rel)>0 but may be more efficient. + :return: True if the result is not empty. Equivalent to len(self) > 0 but often faster. """ - return len(self) > 0 + return bool(self.connection.query( + 'SELECT EXISTS(SELECT 1 FROM {from_}{where})'.format( + from_=self.from_clause(), + where=self.where_clause())).fetchone()[0]) def __contains__(self, item): """ returns True if item is found in the . :param item: any restriction - (item in query_expression) is equivalent to bool(query_expression & item) but may be executed more efficiently. + (item in query_expression) is equivalent to bool(query_expression & item) but may be + executed more efficiently. """ return bool(self & item) # May be optimized e.g. using an EXISTS query @@ -453,7 +455,8 @@ def __next__(self): key = self._iter_keys.pop(0) except AttributeError: # self._iter_keys is missing because __iter__ has not been called. - raise TypeError("'QueryExpression' object is not an iterator. Use iter(obj) to create an iterator.") + raise TypeError("A QueryExpression object is not an iterator. " + "Use iter(obj) to create an iterator.") except IndexError: raise StopIteration else: @@ -463,7 +466,8 @@ def __next__(self): try: return (self & key).fetch1() except DataJointError: - # The data may have been deleted since the moment the keys were fetched -- move on to next entry. + # The data may have been deleted since the moment the keys were fetched + # -- move on to next entry. return next(self) def cursor(self, offset=0, limit=None, order_by=None, as_dict=False): @@ -495,8 +499,10 @@ def _repr_html_(self): class Aggregation(QueryExpression): """ - Aggregation(rel, comp1='expr1', ..., compn='exprn') yields an entity set with the primary key specified by rel.heading. - The computed arguments comp1, ..., compn use aggregation operators on the attributes of rel. + Aggregation.create(arg, group, comp1='calc1', ..., compn='calcn') yields an entity set + with primary key from arg. + The computed arguments comp1, ..., compn use aggregation calculations on the attributes of + group or simple projections and calculations on the attributes of arg. Aggregation is used QueryExpression.aggr and U.aggr. Aggregation is a private class in DataJoint, not exposed to users. """ @@ -517,12 +523,15 @@ def create(cls, arg, group, keep_all_rows=False): result._support = join.support result._join_attributes = join._join_attributes result._left = join._left - result.initial_restriction = join.restriction # WHERE clause applied before GROUP BY + result._left_restrict = join.restriction # WHERE clause applied before GROUP BY result._grouping_attributes = result.primary_key return result + def where_clause(self): + return '' if not self._left_restrict else ' WHERE (%s)' % ')AND('.join( + str(s) for s in self._left_restrict) + def make_sql(self, fields=None): - where = '' if not self._left_restrict else ' WHERE (%s)' % ')AND('.join(self._left_restrict) fields = self.heading.as_sql(fields or self.heading.names) assert self._grouping_attributes or not self.restriction distinct = set(self.heading.names) == set(self.primary_key) @@ -530,18 +539,18 @@ def make_sql(self, fields=None): distinct="DISTINCT " if distinct else "", fields=fields, from_=self.from_clause(), - where=where, + where=self.where_clause(), group_by="" if not self.primary_key else ( " GROUP BY `%s`" % '`,`'.join(self._grouping_attributes) + ("" if not self.restriction else ' HAVING (%s)' % ')AND('.join(self.restriction)))) def __len__(self): - what = '*' if set(self.heading.names) != set(self.primary_key) else 'DISTINCT `%s`' % '`,`'.join(self.primary_key) return self.connection.query( - 'SELECT count({what}) FROM ({subquery}) as `_r{alias:x}`'.format( - what=what, - subquery=self.make_sql(), - alias=next(self.__subquery_alias_count))).fetchone()[0] + 'SELECT count(1) FROM ({sql}) `$sub`'.format(sql=self.make_sql())).fetchone()[0] + + def __bool__(self): + return bool(self.connection.query( + 'SELECT EXISTS({sql})'.format(sql=self.make_sql()))) class Union(QueryExpression): @@ -553,36 +562,52 @@ def create(cls, arg1, arg2): if inspect.isclass(arg2) and issubclass(arg2, QueryExpression): arg2 = arg2() # instantiate if a class if not isinstance(arg2, QueryExpression): - raise DataJointError('A QueryExpression can only be unioned with another QueryExpression') + raise DataJointError( + "A QueryExpression can only be unioned with another QueryExpression") if arg1.connection != arg2.connection: - raise DataJointError("Cannot operate on QueryExpressions originating from different connections.") + raise DataJointError( + "Cannot operate on QueryExpressions originating from different connections.") if set(arg1.primary_key) != set(arg2.primary_key): raise DataJointError("The operands of a union must share the same primary key.") if set(arg1.heading.secondary_attributes) & set(arg2.heading.secondary_attributes): - raise DataJointError("The operands of a union must not share any secondary attributes.") + raise DataJointError( + "The operands of a union must not share any secondary attributes.") result = cls() result._connection = arg1.connection result._heading = arg1.heading.join(arg2.heading) result._support = [arg1, arg2] return result - def make_sql(self, select_fields=None): + def make_sql(self): arg1, arg2 = self._support - if not arg1.heading.secondary_attributes and not arg2.heading.secondary_attributes: # use UNION DISTINCT - fields = select_fields or arg1.primary_key - return "({sql1}) UNION ({sql2})".format(sql1=arg1.make_sql(fields), sql2=arg2.make_sql(fields)) - fields = select_fields or self.heading.names + if not arg1.heading.secondary_attributes and not arg2.heading.secondary_attributes: + # no secondary attributes: use UNION DISTINCT + fields = arg1.primary_key + return "({sql1}) UNION ({sql2})".format( + sql1=arg1.make_sql(fields), + sql2=arg2.make_sql(fields)) + # with secondary attributes, use union of left join with antijoin + fields = self.heading.names sql1 = arg1.join(arg2, left=True).make_sql(fields) - sql2 = (arg2 - arg1).proj(..., **{k: 'NULL' for k in arg1.heading.secondary_attributes}).make_sql(fields) + sql2 = (arg2 - arg1).proj( + ..., **{k: 'NULL' for k in arg1.heading.secondary_attributes}).make_sql(fields) return "({sql1}) UNION ({sql2})".format(sql1=sql1, sql2=sql2) def from_clause(self): - """In Union, the select clause can be used as the WHERE clause and make_sql() does not call from_clause""" - return self.make_sql() + """ The union does not use a FROM clause """ + assert False + + def where_clause(self): + """ The union does not use a WHERE clause """ + assert False def __len__(self): - return self.connection.query( - 'SELECT count(*) FROM ({sql}) `$sub`'.format(sql=self.make_sql())).fetchone()[0] + return self.connection.query('SELECT count(1) FROM ({sql}) as `$sub`'.format( + sql=self.make_sql())).fetchone()[0] + + def __bool__(self): + return bool(self.connection.query( + 'SELECT EXISTS({sql})'.format(sql=self.make_sql()))) class U: @@ -688,7 +713,8 @@ def aggr(self, group, **named_attributes): :return: The derived query expression """ if named_attributes.get('keep_all_rows', False): - raise DataJointError('Cannot set keep_all_rows=True when aggregating on a universal set.') + raise DataJointError( + 'Cannot set keep_all_rows=True when aggregating on a universal set.') return Aggregation.create(self, group=group, keep_all_rows=False).proj(**named_attributes) aggregate = aggr # alias for aggr diff --git a/datajoint/heading.py b/datajoint/heading.py index 6e4db0134..6a929b337 100644 --- a/datajoint/heading.py +++ b/datajoint/heading.py @@ -148,13 +148,14 @@ def as_dtype(self): names=self.names, formats=[v.dtype for v in self.attributes.values()])) - def as_sql(self, fields): + def as_sql(self, fields, include_aliases=True): """ represent heading as the SQL SELECT clause. """ - return ','.join('`%s`' % name if self.attributes[name].attribute_expression is None - else '%s as `%s`' % (self.attributes[name].attribute_expression, name) - for name in fields) + return ','.join( + '`%s`' % name if self.attributes[name].attribute_expression is None + else self.attributes[name].attribute_expression + (' as `%s`' % name if include_aliases else '') + for name in fields) def __iter__(self): return iter(self.attributes) diff --git a/datajoint/table.py b/datajoint/table.py index e98e73a5e..2baa036af 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -320,7 +320,7 @@ def delete_quick(self, get_count=False): Deletes the table without cascading and without user prompt. If this table has populated dependent tables, this will fail. """ - query = 'DELETE FROM ' + self.full_table_name + self.where_clause + query = 'DELETE FROM ' + self.full_table_name + self.where_clause() self.connection.query(query) count = self.connection.query("SELECT ROW_COUNT()").fetchone()[0] if get_count else None self._log(query[:255]) @@ -564,7 +564,7 @@ def _update(self, attrname, value=None): full_table_name=self.from_clause(), attrname=attrname, placeholder=placeholder, - where_clause=self.where_clause) + where_clause=self.where_clause()) self.connection.query(command, args=(value, ) if value is not None else ()) # --- private helper functions ---- diff --git a/tests/test_university.py b/tests/test_university.py index 3dd9ce1ba..e3ec82c57 100644 --- a/tests/test_university.py +++ b/tests/test_university.py @@ -83,7 +83,16 @@ def test_aggr(): assert_true(len(avg_grade_per_course) == 45) # GPA - student_gpa = Student.aggr(Course * Grade * LetterGrade, gpa='round(sum(points*credits)/sum(credits), 2)') + student_gpa = Student.aggr( + Course * Grade * LetterGrade, + gpa='round(sum(points*credits)/sum(credits), 2)') gpa = student_gpa.fetch('gpa') assert_true(len(gpa) == 261) assert_true(2 < gpa.mean() < 3) + + # Sections in biology department with zero students in them + section = (Section & {"dept": "BIOL"}).aggr( + Enroll, n='count(student_id)', keep_all_rows=True) & 'n=0' + assert_true(len(set(section.fetch('dept'))) == 1) + assert_true(len(section) == 17) + assert_true(bool(section)) From 0807f1df2112d25f2225429354165840e464c13c Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 1 Jan 2021 16:14:28 -0600 Subject: [PATCH 42/45] use consistent subquery alias names --- datajoint/expression.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/datajoint/expression.py b/datajoint/expression.py index 1b73827b2..90ba623bf 100644 --- a/datajoint/expression.py +++ b/datajoint/expression.py @@ -80,11 +80,11 @@ def restriction_attributes(self): def primary_key(self): return self.heading.primary_key - __subquery_alias_count = count() # count for alias names used in from_clause + _subquery_alias_count = count() # count for alias names used in from_clause def from_clause(self): support = ('(' + src.make_sql() + ') as `_s%x`' % next( - self.__subquery_alias_count) if isinstance(src, QueryExpression) else src for src in self.support) + self._subquery_alias_count) if isinstance(src, QueryExpression) else src for src in self.support) clause = next(support) for s, a, left in zip(support, self._join_attributes, self._left): clause += '{left} JOIN {clause}{using}'.format( @@ -507,7 +507,7 @@ class Aggregation(QueryExpression): Aggregation is a private class in DataJoint, not exposed to users. """ _left_restrict = None # the pre-GROUP BY conditions for the WHERE clause - __subquery_alias_count = count() + _subquery_alias_count = count() @classmethod def create(cls, arg, group, keep_all_rows=False): @@ -546,7 +546,9 @@ def make_sql(self, fields=None): def __len__(self): return self.connection.query( - 'SELECT count(1) FROM ({sql}) `$sub`'.format(sql=self.make_sql())).fetchone()[0] + 'SELECT count(1) FROM ({subquery}) `${alias:x}`'.format( + subquery=self.make_sql(), + alias=next(self._subquery_alias_count))).fetchone()[0] def __bool__(self): return bool(self.connection.query( @@ -602,8 +604,10 @@ def where_clause(self): assert False def __len__(self): - return self.connection.query('SELECT count(1) FROM ({sql}) as `$sub`'.format( - sql=self.make_sql())).fetchone()[0] + return self.connection.query( + 'SELECT count(1) FROM ({subquery}) `${alias:x}`'.format( + subquery=self.make_sql(), + alias=next(QueryExpression._subquery_alias_count))).fetchone()[0] def __bool__(self): return bool(self.connection.query( From e7c44dbf1c341407770e91f92df377bce74e9a63 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 1 Jan 2021 17:59:36 -0600 Subject: [PATCH 43/45] Ellipsis in aggr no longer includes attributes from the right operand --- datajoint/expression.py | 7 ++++++- tests/test_university.py | 10 +++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/datajoint/expression.py b/datajoint/expression.py index 90ba623bf..7ea62996f 100644 --- a/datajoint/expression.py +++ b/datajoint/expression.py @@ -34,7 +34,6 @@ class QueryExpression: 2. A projection is applied remapping remapped attributes 3. Subclasses: Join, Aggregation, and Union have additional specific rules. """ - _restriction = None _restriction_attributes = None _left = [] # True for left joins, False for inner joins @@ -385,6 +384,11 @@ def aggr(self, group, *attributes, keep_all_rows=False, **named_attributes): :param named_attributes: computations of the form new_attribute="sql expression on attributes of group" :return: The derived query expression """ + if Ellipsis in attributes: + # expand ellipsis to include only attributes from the left table + attributes = set(attributes) + attributes.discard(Ellipsis) + attributes.update(self.heading.secondary_attributes) return Aggregation.create( self, group=group, keep_all_rows=keep_all_rows).proj(*attributes, **named_attributes) @@ -525,6 +529,7 @@ def create(cls, arg, group, keep_all_rows=False): result._left = join._left result._left_restrict = join.restriction # WHERE clause applied before GROUP BY result._grouping_attributes = result.primary_key + return result def where_clause(self): diff --git a/tests/test_university.py b/tests/test_university.py index e3ec82c57..467a43f5c 100644 --- a/tests/test_university.py +++ b/tests/test_university.py @@ -1,4 +1,4 @@ -from nose.tools import assert_true, assert_list_equal, assert_set_equal, assert_equal +from nose.tools import assert_true, assert_list_equal, assert_false from .schema_university import * import hashlib @@ -96,3 +96,11 @@ def test_aggr(): assert_true(len(set(section.fetch('dept'))) == 1) assert_true(len(section) == 17) assert_true(bool(section)) + + # Test correct use of ellipses in a similar query + section = (Section & {"dept": "BIOL"}).aggr( + Grade, ..., n='count(student_id)', keep_all_rows=True) + assert_false('grade' in section.heading.names) + assert_true(len(set(section.fetch('dept'))) == 1) + assert_true(len(section) == 17) + assert_true(bool(section)) From 2cf8e302cd2c0acfa0a288451e7b3219b80820ba Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 1 Jan 2021 18:09:40 -0600 Subject: [PATCH 44/45] add test for aggregation with Ellipsis --- tests/test_university.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_university.py b/tests/test_university.py index 467a43f5c..323889e34 100644 --- a/tests/test_university.py +++ b/tests/test_university.py @@ -99,8 +99,9 @@ def test_aggr(): # Test correct use of ellipses in a similar query section = (Section & {"dept": "BIOL"}).aggr( - Grade, ..., n='count(student_id)', keep_all_rows=True) - assert_false('grade' in section.heading.names) + Grade, ..., n='count(student_id)', keep_all_rows=True) & 'n>1' + assert_false( + any(name in section.heading.names for name in Grade.heading.secondary_attributes)) assert_true(len(set(section.fetch('dept'))) == 1) - assert_true(len(section) == 17) + assert_true(len(section) == 168) assert_true(bool(section)) From 8ca859454fb63584e578652a6d13e3ed066bf728 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 5 Jan 2021 07:58:06 -0600 Subject: [PATCH 45/45] Update to version 0.13.dev2 --- datajoint/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datajoint/version.py b/datajoint/version.py index 8b00db41c..fca233bf7 100644 --- a/datajoint/version.py +++ b/datajoint/version.py @@ -1,3 +1,3 @@ -__version__ = "0.13.dev1" +__version__ = "0.13.dev2" assert len(__version__) <= 10 # The log table limits version to the 10 characters