diff --git a/.coveragerc b/.coveragerc index d593f5b..d83c23f 100644 --- a/.coveragerc +++ b/.coveragerc @@ -11,3 +11,5 @@ exclude_lines = ignore_errors = True omit = tests/* + sqlquerygraph.py + loader.py diff --git a/.gitignore b/.gitignore index 304ff16..c79a3f1 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,10 @@ __pycache__/ # neo4j data/databases/* data/transactions/* +data/dbms/* +neo4j/databases/* +neo4j/transactions/* +neo4j/dbms/* logs/* # tests / coverage reports diff --git a/README.md b/README.md index eec54af..6fa94aa 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,20 @@ Parse your SQL queries and represent their structure as a graph. Currently, we implement the ability of representing how each of the tables in a set of SQL query scripts depend on each other. +```cypher +MATCH p=(r:Reporting)-[:HAS_TABLE_DEPENDENCY]->()-[:HAS_TABLE_DEPENDENCY]->() +WHERE r.table_name='user_activity' +RETURN p +``` +![](./guide/img/table_dependency.png) + ## Requirements To run the code in here, ensure your system meets the following requirements: - Unix-like operating system (macOS, Linux, ...) - though it might work on Windows; - Python 3.8 or above; and - [Poetry](https://python-poetry.org/docs/) installed. - [`direnv`](https://direnv.net/) installed, including shell hooks; -- [`.envrc`](https://github.com/avisionh/sqlquerygraph/blob/main/.envrc) allowed/trusted by `direnv` to use the environment variables - see [below](#allowingtrusting-envrc); +- [`.envrc`](https://github.com/avisionh/sqlquerygraph/blob/main/.envrc) allowed/trusted by `direnv` to use the environment variables - see [below](#set-up); @@ -41,34 +48,59 @@ python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd 'github_repos' 'analytics' 're ### Run neo4j graph database We use [neo4j](https://neo4j.com/) for this project to visualise the dependencies between tables. To install neo4j locally using Docker Compose, follow the below instructions: -1. Install Docker +1. Install and open Docker + For Mac OSX, install Docker and Docker Compose together [here](https://docs.docker.com/docker-for-mac/install/). + For Linux, install Docker [here](https://docs.docker.com/engine/install/) and then follow these [instructions](https://docs.docker.com/compose/install/) to install docker-compose. + For Windows, install Docker and Docker Compose together [here](https://docs.docker.com/docker-for-windows/install/). 1. Create a new file, `.secrets`, in the directory where this `README.md` file sits, and store the following in there. This allows you to set the password for your local neo4j instance without exposing it. - ```shell script + ``` export NEO4J_AUTH=neo4j/ + export NEO4J_AUTH=neo4j + export NEO4J_AUTH= ``` -1. Within this directory that has the `docker-compose.yml` file, run the below in your shell/terminal: +1. Update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal: + ```shell script + direnv allow + ``` +1. Download the neo4j image. Within this directory that has the `docker-compose.yml` file, run the below in your shell/terminal: ```shell script - docker-compose up -d + docker-compose up ``` -1. If it's the first time you have downloaded the neo4j docker image, wait awhile (maybe an hour, depends on your machine specs). If you have downloaded the neo4j docker image before (such as going through these instructions), then wait a few minutes. Then launch neo4j locally via opening your web browser and entering the following web address: - - http://localhost:7474/browser/ +1. If it's the first time you have downloaded the neo4j docker image, wait awhile (maybe an hour, depends on your machine specs). If you have downloaded the neo4j docker image before (such as going through these instructions), then wait a few minutes. You will know when it's ready when you get the following message in your terminal: + ``` + ... + neo4j | 2021-05-26 06:40:15.270+0000 INFO Bolt enabled on 0.0.0.0:7687. + neo4j | 2021-05-26 06:40:16.412+0000 INFO Remote interface available at http://localhost:7474/ + neo4j | 2021-05-26 06:40:16.414+0000 INFO Started. + ``` + Then launch neo4j locally via opening your web browser and entering the following web address: + - http://localhost:7474/ 1. The username and password will be: ``` username: neo4j password: ``` +1. Load the data into the database through entering the following in a separate terminal: + ``` + docker exec -it neo4j bash + # move .csv files into neo4j's import/ directory + mv data/*csv import/ + ``` +1. In your local terminal: + ```shell script + python -m loader.py --file 'neo4j/ ``` *** ## Acknowledgements This builds on the excellent [moz-sql-parser](https://github.com/mozilla/moz-sql-parser) package. + +With thanks also to the [Google Cloud Public Dataset Program](https://cloud.google.com/solutions/datasets) for which the SQL queries in this repo are based off the program's [GitHub repos](https://console.cloud.google.com/marketplace/product/github/github-repos) dataset. diff --git a/data/loader.sh b/data/loader.sh deleted file mode 100644 index 37f39e0..0000000 --- a/data/loader.sh +++ /dev/null @@ -1,16 +0,0 @@ -# enter docker neo4j instance -docker exec -it neo4j bash - -# move .csv files into import/ folder -mv data/*csv import/ - -# access cypher shell -cypher-shell --username $NEO4J_USERNAME --pasword $NEO4J_PASSWORD - -# use default neo4j database -:use neo4j - -# run cypher code - -# exit cypher shell -:exit diff --git a/docker-compose.yml b/docker-compose.yml index 0986daa..8a9a306 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,5 @@ # https://thibaut-deveraux.medium.com/how-to-install-neo4j-with-docker-compose-36e3ba939af0 -version: '3.9' +version: '3.8' services: neo4j: @@ -9,15 +9,15 @@ services: # pass .env file to container env_file: .env ports: - - 7474:7474 - - 7687:7687 + - 7474:7474 # web client + - 7687:7687 # db default port volumes: - # cannot move files to import/ folder in neo4j because it's read-only - # https://neo4j.com/docs/operations-manual/current/configuration/file-locations/ - # but can move from docker neo4j bash terminal - ./neo4j:/data + - ./loader.py:/loader.py environment: - NEO4j_dbms.security.auth_enabled='true' + # listen to incoming connections + - NEO4J_dbms.connector.bolt.listen_address=0.0.0.0:7687 # Raise memory limits - NEO4J_dbms_memory_pagecache_size=2G - NEO4J_dbms.memory.heap.initial_size=2G diff --git a/guide/img/table_dependency.png b/guide/img/table_dependency.png new file mode 100644 index 0000000..5f80f05 Binary files /dev/null and b/guide/img/table_dependency.png differ diff --git a/loader.py b/loader.py new file mode 100644 index 0000000..ad19ca0 --- /dev/null +++ b/loader.py @@ -0,0 +1,29 @@ +import os +import argparse + +from py2neo import Graph + +NEO4J_AUTH = (os.getenv(key="NEO4J_USERNAME"), os.getenv(key="NEO4J_PASSWORD")) + +g = Graph(auth=NEO4J_AUTH, host="localhost", port=7687, scheme="bolt") + + +if __name__ == """__main__""": + argp = argparse.ArgumentParser() + argp.add_argument("-f", "--file", type=str, help="Path for where Cypher query is.") + args = argp.parse_args() + + print(f"Reading {args.file}\n") + print("*******************************************\n") + with open(file=args.file, mode="r") as f: + queries = f.read() + + print(f"Formatting {args.file} for importing into neo4j\n") + print("*******************************************\n") + queries = queries.split(sep=";") + queries = [txt for txt in queries if txt != "\n"] + + print(f"Executing {args.file} in neo4j\n") + print("*******************************************\n") + for query in queries: + g.evaluate(cypher=query) diff --git a/neo4j/analytics_analytics_dependency.csv b/neo4j/analytics_analytics_dependency.csv new file mode 100644 index 0000000..fa0f231 --- /dev/null +++ b/neo4j/analytics_analytics_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/analytics_github_repos_dependency.csv b/neo4j/analytics_github_repos_dependency.csv new file mode 100644 index 0000000..fa0f231 --- /dev/null +++ b/neo4j/analytics_github_repos_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/analytics_tables.csv b/neo4j/analytics_tables.csv new file mode 100644 index 0000000..0c6c848 --- /dev/null +++ b/neo4j/analytics_tables.csv @@ -0,0 +1 @@ +table_dataset,table_name diff --git a/neo4j/example_import.cypher b/neo4j/example_import.cypher new file mode 100644 index 0000000..66da787 --- /dev/null +++ b/neo4j/example_import.cypher @@ -0,0 +1,50 @@ +// Create constraints on table_name property to ensure each label has unique table_name +CREATE CONSTRAINT table_name_ConstraintReporting ON (r:Reporting) +ASSERT r.table_name IS UNIQUE; +CREATE CONSTRAINT table_name_ConstraintAnalytics ON (a:Analytics) +ASSERT a.table_name IS UNIQUE; +CREATE CONSTRAINT table_name_ConstraintGithub_Repos ON (g:Github_Repos) +ASSERT g.table_name IS UNIQUE; + +// Create table nodes to join later +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_tables.csv" AS csvLine +CREATE (:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); + +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_tables.csv" AS csvLine +CREATE (:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); + +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///github_repos_tables.csv" AS csvLine +CREATE (:GithubRepos {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); + +// Load table dependency data +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_analytics_dependency.csv" AS csvLine +MERGE (r:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (a:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (r)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(a); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_github_repos_dependency.csv" AS csvLine +MERGE (r:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (g:GithubRepos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (r)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(g); + +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_analytics_dependency.csv" AS csvLine +MERGE (a1:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (a2:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (a1)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(a2); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_github_repos_dependency.csv" AS csvLine +MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (g:GithubRepos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(g); + +// Delete all nodes with relationships +MATCH (a)-[r]->() +DELETE a, r; + +// Delete all nodes with no relationships +MATCH (a) +DELETE a; + +// Drop constraints and correspondingly, index +call db.constraints +DROP CONSTRAINT table_name_ConstraintReporting; +DROP CONSTRAINT table_name_ConstraintAnalytics; +DROP CONSTRAINT table_name_ConstraintGithub_Repos; diff --git a/neo4j/github_repos_tables.csv b/neo4j/github_repos_tables.csv new file mode 100644 index 0000000..0c6c848 --- /dev/null +++ b/neo4j/github_repos_tables.csv @@ -0,0 +1 @@ +table_dataset,table_name diff --git a/neo4j/reporting_analytics_dependency.csv b/neo4j/reporting_analytics_dependency.csv new file mode 100644 index 0000000..fa0f231 --- /dev/null +++ b/neo4j/reporting_analytics_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/reporting_github_repos_dependency.csv b/neo4j/reporting_github_repos_dependency.csv new file mode 100644 index 0000000..fa0f231 --- /dev/null +++ b/neo4j/reporting_github_repos_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/reporting_tables.csv b/neo4j/reporting_tables.csv new file mode 100644 index 0000000..0c6c848 --- /dev/null +++ b/neo4j/reporting_tables.csv @@ -0,0 +1 @@ +table_dataset,table_name diff --git a/poetry.lock b/poetry.lock index d01fee2..f5354ce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,6 +15,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" [[package]] + name = "attrs" version = "21.2.0" description = "Classes Without Boilerplate" @@ -36,6 +37,17 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "cffi" +version = "1.14.5" +description = "Foreign Function Interface for Python calling C code." +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pycparser = "*" + [[package]] name = "cfgv" version = "3.2.0" @@ -74,6 +86,25 @@ toml = {version = "*", optional = true, markers = "extra == \"toml\""} [package.extras] toml = ["toml"] +[[package]] +name = "cryptography" +version = "3.4.7" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +cffi = ">=1.12" + +[package.extras] +docs = ["sphinx (>=1.6.5,!=1.8.0,!=3.1.0,!=3.1.1)", "sphinx-rtd-theme"] +docstest = ["doc8", "pyenchant (>=1.6.11)", "twine (>=1.12.0)", "sphinxcontrib-spelling (>=4.0.1)"] +pep8test = ["black", "flake8", "flake8-import-order", "pep8-naming"] +sdist = ["setuptools-rust (>=0.11.4)"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["pytest (>=6.0)", "pytest-cov", "pytest-subtests", "pytest-xdist", "pretend", "iso8601", "pytz", "hypothesis (>=1.11.4,!=3.79.2)"] + [[package]] name = "detect-secrets" version = "1.1.0" @@ -109,6 +140,34 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "docker" +version = "5.0.0" +description = "A Python library for the Docker Engine API." +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +pywin32 = {version = "227", markers = "sys_platform == \"win32\""} +requests = ">=2.14.2,<2.18.0 || >2.18.0" +websocket-client = ">=0.32.0" + +[package.extras] +ssh = ["paramiko (>=2.4.2)"] +tls = ["pyOpenSSL (>=17.5.0)", "cryptography (>=3.4.7)", "idna (>=2.0.0)"] + +[[package]] +name = "english" +version = "2020.7.0" +description = "English language utility library for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + [[package]] name = "env-file" version = "2020.12.3" @@ -233,6 +292,14 @@ mo-future = "3.147.20327" mo-imports = "3.149.20327" mo-kwargs = "4.22.21108" +[[package]] +name = "monotonic" +version = "1.6" +description = "An implementation of time.monotonic() for Python 2 & < 3.3" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "moz-sql-parser" version = "4.40.21126" @@ -246,6 +313,18 @@ mo-dots = "4.22.21108" mo-future = "3.147.20327" mo-logs = "4.23.21108" +[[package]] +name = "neotime" +version = "1.7.4" +description = "Nanosecond resolution temporal types" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +pytz = "*" +six = "*" + [[package]] name = "nodeenv" version = "1.6.0" @@ -289,6 +368,17 @@ pytz = ">=2017.3" [package.extras] test = ["pytest (>=5.0.1)", "pytest-xdist", "hypothesis (>=3.58)"] +[[package]] +name = "pansi" +version = "2020.7.3" +description = "ANSI escape code library for Python" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +six = "*" + [[package]] name = "pluggy" version = "0.13.1" @@ -316,6 +406,17 @@ pyyaml = ">=5.1" toml = "*" virtualenv = ">=20.0.8" +[[package]] +name = "prompt-toolkit" +version = "3.0.18" +description = "Library for building powerful interactive command lines in Python" +category = "main" +optional = false +python-versions = ">=3.6.1" + +[package.dependencies] +wcwidth = "*" + [[package]] name = "py" version = "1.10.0" @@ -324,6 +425,29 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "py2neo" +version = "2021.1.3" +description = "Python client library and toolkit for Neo4j" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +certifi = "*" +cryptography = "*" +docker = "*" +english = "*" +monotonic = "*" +neotime = ">=1.7.4,<1.8.0" +packaging = "*" +pansi = ">=2020.7.3" +prompt-toolkit = {version = ">=2.0.7", markers = "python_version >= \"3.6\""} +pygments = ">=2.0.0" +pytz = "*" +six = ">=1.15.0" +urllib3 = "*" + [[package]] name = "pycodestyle" version = "2.7.0" @@ -332,6 +456,14 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "pycparser" +version = "2.20" +description = "C parser in Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + [[package]] name = "pyflakes" version = "2.3.1" @@ -340,6 +472,14 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +[[package]] +name = "pygments" +version = "2.9.0" +description = "Pygments is a syntax highlighting package written in Python." +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "pyparsing" version = "2.4.7" @@ -395,6 +535,17 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "0.17.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" +optional = false +python-versions = "*" + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2021.1" @@ -403,6 +554,14 @@ category = "main" optional = false python-versions = "*" +[[package]] +name = "pywin32" +version = "227" +description = "Python for Window Extensions" +category = "main" +optional = false +python-versions = "*" + [[package]] name = "pyyaml" version = "5.4.1" @@ -497,10 +656,26 @@ six = ">=1.9.0,<2" docs = ["proselint (>=0.10.2)", "sphinx (>=3)", "sphinx-argparse (>=0.2.5)", "sphinx-rtd-theme (>=0.4.3)", "towncrier (>=19.9.0rc1)"] testing = ["coverage (>=4)", "coverage-enable-subprocess (>=1)", "flaky (>=3)", "pytest (>=4)", "pytest-env (>=0.6.2)", "pytest-freezegun (>=0.4.1)", "pytest-mock (>=2)", "pytest-randomly (>=1)", "pytest-timeout (>=1)", "packaging (>=20.0)", "xonsh (>=0.9.16)"] +[[package]] +name = "wcwidth" +version = "0.2.5" +description = "Measures the displayed width of unicode strings in a terminal" +category = "main" +optional = false +python-versions = "*" + +[[package]] +name = "websocket-client" +version = "1.0.1" +description = "WebSocket client for Python with low level API options" +category = "main" +optional = false +python-versions = ">=3.6" + [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "ea5d17f878a09644379c9d5f63d65bd25e7d0ef87af420755e3c04d82d5c34c8" +content-hash = "478b152e227629bd4611beba39245317f09b3fd7646433f8ca466cf630f43a17" [metadata.files] appdirs = [ @@ -519,6 +694,45 @@ certifi = [ {file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"}, {file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"}, ] +cffi = [ + {file = "cffi-1.14.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:bb89f306e5da99f4d922728ddcd6f7fcebb3241fc40edebcb7284d7514741991"}, + {file = "cffi-1.14.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:34eff4b97f3d982fb93e2831e6750127d1355a923ebaeeb565407b3d2f8d41a1"}, + {file = "cffi-1.14.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:99cd03ae7988a93dd00bcd9d0b75e1f6c426063d6f03d2f90b89e29b25b82dfa"}, + {file = "cffi-1.14.5-cp27-cp27m-win32.whl", hash = "sha256:65fa59693c62cf06e45ddbb822165394a288edce9e276647f0046e1ec26920f3"}, + {file = "cffi-1.14.5-cp27-cp27m-win_amd64.whl", hash = "sha256:51182f8927c5af975fece87b1b369f722c570fe169f9880764b1ee3bca8347b5"}, + {file = "cffi-1.14.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:43e0b9d9e2c9e5d152946b9c5fe062c151614b262fda2e7b201204de0b99e482"}, + {file = "cffi-1.14.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:cbde590d4faaa07c72bf979734738f328d239913ba3e043b1e98fe9a39f8b2b6"}, + {file = "cffi-1.14.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:5de7970188bb46b7bf9858eb6890aad302577a5f6f75091fd7cdd3ef13ef3045"}, + {file = "cffi-1.14.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:a465da611f6fa124963b91bf432d960a555563efe4ed1cc403ba5077b15370aa"}, + {file = "cffi-1.14.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:d42b11d692e11b6634f7613ad8df5d6d5f8875f5d48939520d351007b3c13406"}, + {file = "cffi-1.14.5-cp35-cp35m-win32.whl", hash = "sha256:72d8d3ef52c208ee1c7b2e341f7d71c6fd3157138abf1a95166e6165dd5d4369"}, + {file = "cffi-1.14.5-cp35-cp35m-win_amd64.whl", hash = "sha256:29314480e958fd8aab22e4a58b355b629c59bf5f2ac2492b61e3dc06d8c7a315"}, + {file = "cffi-1.14.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:3d3dd4c9e559eb172ecf00a2a7517e97d1e96de2a5e610bd9b68cea3925b4892"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:48e1c69bbacfc3d932221851b39d49e81567a4d4aac3b21258d9c24578280058"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:69e395c24fc60aad6bb4fa7e583698ea6cc684648e1ffb7fe85e3c1ca131a7d5"}, + {file = "cffi-1.14.5-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:9e93e79c2551ff263400e1e4be085a1210e12073a31c2011dbbda14bda0c6132"}, + {file = "cffi-1.14.5-cp36-cp36m-win32.whl", hash = "sha256:58e3f59d583d413809d60779492342801d6e82fefb89c86a38e040c16883be53"}, + {file = "cffi-1.14.5-cp36-cp36m-win_amd64.whl", hash = "sha256:005a36f41773e148deac64b08f233873a4d0c18b053d37da83f6af4d9087b813"}, + {file = "cffi-1.14.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2894f2df484ff56d717bead0a5c2abb6b9d2bf26d6960c4604d5c48bbc30ee73"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:0857f0ae312d855239a55c81ef453ee8fd24136eaba8e87a2eceba644c0d4c06"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:cd2868886d547469123fadc46eac7ea5253ea7fcb139f12e1dfc2bbd406427d1"}, + {file = "cffi-1.14.5-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:35f27e6eb43380fa080dccf676dece30bef72e4a67617ffda586641cd4508d49"}, + {file = "cffi-1.14.5-cp37-cp37m-win32.whl", hash = "sha256:9ff227395193126d82e60319a673a037d5de84633f11279e336f9c0f189ecc62"}, + {file = "cffi-1.14.5-cp37-cp37m-win_amd64.whl", hash = "sha256:9cf8022fb8d07a97c178b02327b284521c7708d7c71a9c9c355c178ac4bbd3d4"}, + {file = "cffi-1.14.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8b198cec6c72df5289c05b05b8b0969819783f9418e0409865dac47288d2a053"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:ad17025d226ee5beec591b52800c11680fca3df50b8b29fe51d882576e039ee0"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:6c97d7350133666fbb5cf4abdc1178c812cb205dc6f41d174a7b0f18fb93337e"}, + {file = "cffi-1.14.5-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:8ae6299f6c68de06f136f1f9e69458eae58f1dacf10af5c17353eae03aa0d827"}, + {file = "cffi-1.14.5-cp38-cp38-win32.whl", hash = "sha256:b85eb46a81787c50650f2392b9b4ef23e1f126313b9e0e9013b35c15e4288e2e"}, + {file = "cffi-1.14.5-cp38-cp38-win_amd64.whl", hash = "sha256:1f436816fc868b098b0d63b8920de7d208c90a67212546d02f84fe78a9c26396"}, + {file = "cffi-1.14.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1071534bbbf8cbb31b498d5d9db0f274f2f7a865adca4ae429e147ba40f73dea"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:9de2e279153a443c656f2defd67769e6d1e4163952b3c622dcea5b08a6405322"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6e4714cc64f474e4d6e37cfff31a814b509a35cb17de4fb1999907575684479c"}, + {file = "cffi-1.14.5-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:158d0d15119b4b7ff6b926536763dc0714313aa59e320ddf787502c70c4d4bee"}, + {file = "cffi-1.14.5-cp39-cp39-win32.whl", hash = "sha256:afb29c1ba2e5a3736f1c301d9d0abe3ec8b86957d04ddfa9d7a6a42b9367e396"}, + {file = "cffi-1.14.5-cp39-cp39-win_amd64.whl", hash = "sha256:f2d45f97ab6bb54753eab54fffe75aaf3de4ff2341c9daee1987ee1837636f1d"}, + {file = "cffi-1.14.5.tar.gz", hash = "sha256:fd78e5fee591709f32ef6edb9a015b4aa1a5022598e36227500c8f4e02328d9c"}, +] cfgv = [ {file = "cfgv-3.2.0-py2.py3-none-any.whl", hash = "sha256:32e43d604bbe7896fe7c248a9c2276447dbef840feb28fe20494f62af110211d"}, {file = "cfgv-3.2.0.tar.gz", hash = "sha256:cf22deb93d4bcf92f345a5c3cd39d3d41d6340adc60c78bbbd6588c384fda6a1"}, @@ -585,6 +799,20 @@ coverage = [ {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"}, {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"}, ] +cryptography = [ + {file = "cryptography-3.4.7-cp36-abi3-macosx_10_10_x86_64.whl", hash = "sha256:3d8427734c781ea5f1b41d6589c293089704d4759e34597dce91014ac125aad1"}, + {file = "cryptography-3.4.7-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e56e16617872b0957d1c9742a3f94b43533447fd78321514abbe7db216aa250"}, + {file = "cryptography-3.4.7-cp36-abi3-manylinux2010_x86_64.whl", hash = "sha256:37340614f8a5d2fb9aeea67fd159bfe4f5f4ed535b1090ce8ec428b2f15a11f2"}, + {file = "cryptography-3.4.7-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:240f5c21aef0b73f40bb9f78d2caff73186700bf1bc6b94285699aff98cc16c6"}, + {file = "cryptography-3.4.7-cp36-abi3-manylinux2014_x86_64.whl", hash = "sha256:1e056c28420c072c5e3cb36e2b23ee55e260cb04eee08f702e0edfec3fb51959"}, + {file = "cryptography-3.4.7-cp36-abi3-win32.whl", hash = "sha256:0f1212a66329c80d68aeeb39b8a16d54ef57071bf22ff4e521657b27372e327d"}, + {file = "cryptography-3.4.7-cp36-abi3-win_amd64.whl", hash = "sha256:de4e5f7f68220d92b7637fc99847475b59154b7a1b3868fb7385337af54ac9ca"}, + {file = "cryptography-3.4.7-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:26965837447f9c82f1855e0bc8bc4fb910240b6e0d16a664bb722df3b5b06873"}, + {file = "cryptography-3.4.7-pp36-pypy36_pp73-manylinux2014_x86_64.whl", hash = "sha256:eb8cc2afe8b05acbd84a43905832ec78e7b3873fb124ca190f574dca7389a87d"}, + {file = "cryptography-3.4.7-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:7ec5d3b029f5fa2b179325908b9cd93db28ab7b85bb6c1db56b10e0b54235177"}, + {file = "cryptography-3.4.7-pp37-pypy37_pp73-manylinux2014_x86_64.whl", hash = "sha256:ee77aa129f481be46f8d92a1a7db57269a2f23052d5f2433b4621bb457081cc9"}, + {file = "cryptography-3.4.7.tar.gz", hash = "sha256:3d10de8116d25649631977cb37da6cbdd2d6fa0e0281d014a5b7d337255ca713"}, +] detect-secrets = [ {file = "detect_secrets-1.1.0-py2.py3-none-any.whl", hash = "sha256:be8cca3dc65f6fd637f5dec9f583f1cf4a680dc1a580b3d2e65a5ac7a277456a"}, {file = "detect_secrets-1.1.0.tar.gz", hash = "sha256:68250b31bc108f665f05f0ecfb34f92423280e48e65adbb887fdf721ed909627"}, @@ -596,6 +824,14 @@ distlib = [ {file = "distlib-0.3.1-py2.py3-none-any.whl", hash = "sha256:8c09de2c67b3e7deef7184574fc060ab8a793e7adbb183d942c389c8b13c52fb"}, {file = "distlib-0.3.1.zip", hash = "sha256:edf6116872c863e1aa9d5bb7cb5e05a022c519a4594dc703843343a9ddd9bff1"}, ] +docker = [ + {file = "docker-5.0.0-py2.py3-none-any.whl", hash = "sha256:fc961d622160e8021c10d1bcabc388c57d55fb1f917175afbe24af442e6879bd"}, + {file = "docker-5.0.0.tar.gz", hash = "sha256:3e8bc47534e0ca9331d72c32f2881bb13b93ded0bcdeab3c833fb7cf61c0a9a5"}, +] +english = [ + {file = "english-2020.7.0-py2.py3-none-any.whl", hash = "sha256:aeeaea58698bf703336cf63279d6709482909e2fc1d5da4540abae878ab1e292"}, + {file = "english-2020.7.0.tar.gz", hash = "sha256:7105ed1e9d22b0bd9c1841e7275d3e6e83a34cee475a7e291f70a05721732080"}, +] env-file = [ {file = "env-file-2020.12.3.tar.gz", hash = "sha256:34cbe53b99afaa81209953ee16febcd87121034aa7bf64e229802f51b9c38d66"}, ] @@ -638,9 +874,15 @@ mo-kwargs = [ mo-logs = [ {file = "mo-logs-4.23.21108.tar.gz", hash = "sha256:de4136a7ce215ecbfd7a368588be0a3f1fd8a6521dc2d4aae57cc1c3ba299aab"}, ] +monotonic = [ + {file = "monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c"}, +] moz-sql-parser = [ {file = "moz-sql-parser-4.40.21126.tar.gz", hash = "sha256:b3d37cc8ff118d86009aa12646791549537ec0ae8ac312efd4641289c8eee080"}, ] +neotime = [ + {file = "neotime-1.7.4.tar.gz", hash = "sha256:4e0477ba0f24e004de2fa79a3236de2bd941f20de0b5db8d976c52a86d7363eb"}, +] nodeenv = [ {file = "nodeenv-1.6.0-py2.py3-none-any.whl", hash = "sha256:621e6b7076565ddcacd2db0294c0381e01fd28945ab36bcf00f41c5daf63bef7"}, {file = "nodeenv-1.6.0.tar.gz", hash = "sha256:3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b"}, @@ -693,6 +935,10 @@ pandas = [ {file = "pandas-1.2.4-cp39-cp39-win_amd64.whl", hash = "sha256:2b063d41803b6a19703b845609c0b700913593de067b552a8b24dd8eeb8c9895"}, {file = "pandas-1.2.4.tar.gz", hash = "sha256:649ecab692fade3cbfcf967ff936496b0cfba0af00a55dfaacd82bdda5cb2279"}, ] +pansi = [ + {file = "pansi-2020.7.3-py2.py3-none-any.whl", hash = "sha256:ce2b8acaf06dc59dcc711f61efbe53c836877f127d73f11fdd898b994e5c4234"}, + {file = "pansi-2020.7.3.tar.gz", hash = "sha256:bd182d504528f870601acb0282aded411ad00a0148427b0e53a12162f4e74dcf"}, +] pluggy = [ {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, @@ -701,18 +947,34 @@ pre-commit = [ {file = "pre_commit-2.12.1-py2.py3-none-any.whl", hash = "sha256:70c5ec1f30406250b706eda35e868b87e3e4ba099af8787e3e8b4b01e84f4712"}, {file = "pre_commit-2.12.1.tar.gz", hash = "sha256:900d3c7e1bf4cf0374bb2893c24c23304952181405b4d88c9c40b72bda1bb8a9"}, ] +prompt-toolkit = [ + {file = "prompt_toolkit-3.0.18-py3-none-any.whl", hash = "sha256:bf00f22079f5fadc949f42ae8ff7f05702826a97059ffcc6281036ad40ac6f04"}, + {file = "prompt_toolkit-3.0.18.tar.gz", hash = "sha256:e1b4f11b9336a28fa11810bc623c357420f69dfdb6d2dac41ca2c21a55c033bc"}, +] py = [ {file = "py-1.10.0-py2.py3-none-any.whl", hash = "sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a"}, {file = "py-1.10.0.tar.gz", hash = "sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3"}, ] +py2neo = [ + {file = "py2neo-2021.1.3-py2.py3-none-any.whl", hash = "sha256:5766710590457e9489a2dc2a802a9dfd431cc3d08323f7e3e64113db9ec6b1dc"}, + {file = "py2neo-2021.1.3.tar.gz", hash = "sha256:4a2aa4e8df9a5dee46a83e05aa1b8877385cff094486ebd9f49a22701fd4c5d6"}, +] pycodestyle = [ {file = "pycodestyle-2.7.0-py2.py3-none-any.whl", hash = "sha256:514f76d918fcc0b55c6680472f0a37970994e07bbb80725808c17089be302068"}, {file = "pycodestyle-2.7.0.tar.gz", hash = "sha256:c389c1d06bf7904078ca03399a4816f974a1d590090fecea0c63ec26ebaf1cef"}, ] +pycparser = [ + {file = "pycparser-2.20-py2.py3-none-any.whl", hash = "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705"}, + {file = "pycparser-2.20.tar.gz", hash = "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0"}, +] pyflakes = [ {file = "pyflakes-2.3.1-py2.py3-none-any.whl", hash = "sha256:7893783d01b8a89811dd72d7dfd4d84ff098e5eed95cfa8905b22bbffe52efc3"}, {file = "pyflakes-2.3.1.tar.gz", hash = "sha256:f5bc8ecabc05bb9d291eb5203d6810b49040f6ff446a756326104746cc00c1db"}, ] +pygments = [ + {file = "Pygments-2.9.0-py3-none-any.whl", hash = "sha256:d66e804411278594d764fc69ec36ec13d9ae9147193a1740cd34d272ca383b8e"}, + {file = "Pygments-2.9.0.tar.gz", hash = "sha256:a18f47b506a429f6f4b9df81bb02beab9ca21d0a5fee38ed15aef65f0545519f"}, +] pyparsing = [ {file = "pyparsing-2.4.7-py2.py3-none-any.whl", hash = "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b"}, {file = "pyparsing-2.4.7.tar.gz", hash = "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1"}, @@ -729,10 +991,28 @@ python-dateutil = [ {file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"}, {file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"}, ] +python-dotenv = [ + {file = "python-dotenv-0.17.1.tar.gz", hash = "sha256:b1ae5e9643d5ed987fc57cc2583021e38db531946518130777734f9589b3141f"}, + {file = "python_dotenv-0.17.1-py2.py3-none-any.whl", hash = "sha256:00aa34e92d992e9f8383730816359647f358f4a3be1ba45e5a5cefd27ee91544"}, +] pytz = [ {file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"}, {file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"}, ] +pywin32 = [ + {file = "pywin32-227-cp27-cp27m-win32.whl", hash = "sha256:371fcc39416d736401f0274dd64c2302728c9e034808e37381b5e1b22be4a6b0"}, + {file = "pywin32-227-cp27-cp27m-win_amd64.whl", hash = "sha256:4cdad3e84191194ea6d0dd1b1b9bdda574ff563177d2adf2b4efec2a244fa116"}, + {file = "pywin32-227-cp35-cp35m-win32.whl", hash = "sha256:f4c5be1a293bae0076d93c88f37ee8da68136744588bc5e2be2f299a34ceb7aa"}, + {file = "pywin32-227-cp35-cp35m-win_amd64.whl", hash = "sha256:a929a4af626e530383a579431b70e512e736e9588106715215bf685a3ea508d4"}, + {file = "pywin32-227-cp36-cp36m-win32.whl", hash = "sha256:300a2db938e98c3e7e2093e4491439e62287d0d493fe07cce110db070b54c0be"}, + {file = "pywin32-227-cp36-cp36m-win_amd64.whl", hash = "sha256:9b31e009564fb95db160f154e2aa195ed66bcc4c058ed72850d047141b36f3a2"}, + {file = "pywin32-227-cp37-cp37m-win32.whl", hash = "sha256:47a3c7551376a865dd8d095a98deba954a98f326c6fe3c72d8726ca6e6b15507"}, + {file = "pywin32-227-cp37-cp37m-win_amd64.whl", hash = "sha256:31f88a89139cb2adc40f8f0e65ee56a8c585f629974f9e07622ba80199057511"}, + {file = "pywin32-227-cp38-cp38-win32.whl", hash = "sha256:7f18199fbf29ca99dff10e1f09451582ae9e372a892ff03a28528a24d55875bc"}, + {file = "pywin32-227-cp38-cp38-win_amd64.whl", hash = "sha256:7c1ae32c489dc012930787f06244426f8356e129184a02c25aef163917ce158e"}, + {file = "pywin32-227-cp39-cp39-win32.whl", hash = "sha256:c054c52ba46e7eb6b7d7dfae4dbd987a1bb48ee86debe3f245a2884ece46e295"}, + {file = "pywin32-227-cp39-cp39-win_amd64.whl", hash = "sha256:f27cec5e7f588c3d1051651830ecc00294f90728d19c3bf6916e6dba93ea357c"}, +] pyyaml = [ {file = "PyYAML-5.4.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922"}, {file = "PyYAML-5.4.1-cp27-cp27m-win32.whl", hash = "sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393"}, @@ -791,3 +1071,11 @@ virtualenv = [ {file = "virtualenv-20.4.6-py2.py3-none-any.whl", hash = "sha256:307a555cf21e1550885c82120eccaf5acedf42978fd362d32ba8410f9593f543"}, {file = "virtualenv-20.4.6.tar.gz", hash = "sha256:72cf267afc04bf9c86ec932329b7e94db6a0331ae9847576daaa7ca3c86b29a4"}, ] +wcwidth = [ + {file = "wcwidth-0.2.5-py2.py3-none-any.whl", hash = "sha256:beb4802a9cebb9144e99086eff703a642a13d6a0052920003a230f3294bbe784"}, + {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"}, +] +websocket-client = [ + {file = "websocket-client-1.0.1.tar.gz", hash = "sha256:3e2bf58191d4619b161389a95bdce84ce9e0b24eb8107e7e590db682c2d0ca81"}, + {file = "websocket_client-1.0.1-py2.py3-none-any.whl", hash = "sha256:abf306dc6351dcef07f4d40453037e51cc5d9da2ef60d0fc5d0fe3bcda255372"}, +] diff --git a/pyproject.toml b/pyproject.toml index 4266e61..4fdbbc2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,8 @@ pytest = "^6.2.4" pandas = "^1.2.4" numpy = "^1.20.3" pytest-cov = "^2.12.0" +py2neo = "^2021.1.3" +python-dotenv = "^0.17.1" [tool.poetry.dev-dependencies] diff --git a/sqlquerygraph.py b/sqlquerygraph.py index 7755888..7a2996a 100644 --- a/sqlquerygraph.py +++ b/sqlquerygraph.py @@ -3,6 +3,7 @@ from extractor import Extractor import exporter +import writer import numpy as np import pandas as pd @@ -92,3 +93,10 @@ print("Exporting table dependencies for relationships\n") print("*******************************************\n") exporter.export_table_dependency(data=df, path_or_buf=args.export_dir) + + print("Creating Cypher queries for neo4j database\n") + print("*******************************************\n") + datasets = [txt.title() for txt in args.reference_datasets] + writer.create_query_constraint(datasets=datasets, dir_file=args.export_dir) + writer.create_query_node_import(datasets=datasets, dir_file=args.export_dir) + writer.create_query_relationship(datasets=datasets, dir_file=args.export_dir) diff --git a/tests/conftest.py b/tests/conftest.py index 47113dc..b2c4059 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ pytest_plugins = [ "tests.fixtures.fixture_extractor", "tests.fixtures.fixture_exporter", + "tests.fixtures.fixture_writer", ] diff --git a/tests/fixtures/fixture_writer.py b/tests/fixtures/fixture_writer.py new file mode 100644 index 0000000..8837004 --- /dev/null +++ b/tests/fixtures/fixture_writer.py @@ -0,0 +1,57 @@ +import pytest + + +@pytest.fixture() +def datasets(): + return ["Reporting", "Analytics", "GitHub_Repos"] + + +@pytest.fixture() +def dir_file(): + return "neo4j" + + +@pytest.fixture() +def query_constraint(): + return ( + "CREATE CONSTRAINT table_name_ConstraintReporting ON (r:Reporting)\n" + "ASSERT r.table_name IS UNIQUE;\n" + "CREATE CONSTRAINT table_name_ConstraintAnalytics ON (a:Analytics)\n" + "ASSERT a.table_name IS UNIQUE;\n" + "CREATE CONSTRAINT table_name_ConstraintGitHub_Repos ON (g:GitHub_Repos)\n" + "ASSERT g.table_name IS UNIQUE;\n" + ) + + +@pytest.fixture() +def query_node_import(): + return ( + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_tables.csv" AS csvLine\n' + "CREATE (:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});\n" + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_tables.csv" AS csvLine\n' + "CREATE (:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});\n" + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///github_repos_tables.csv" AS csvLine\n' + "CREATE (:GitHub_Repos {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});\n" + ) + + +@pytest.fixture() +def query_rel(): + return ( + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_analytics_dependency.csv" AS csvLine\n' + "MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})\n" + "MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})\n" + "CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);\n" + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_github_repos_dependency.csv" AS csvLine\n' + "MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})\n" + "MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})\n" + "CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);\n" + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_analytics_dependency.csv" AS csvLine\n' + "MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})\n" + "MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})\n" + "CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);\n" + 'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_github_repos_dependency.csv" AS csvLine\n' + "MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})\n" + "MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})\n" + "CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);\n" + ) diff --git a/tests/unit/test_unit_writer.py b/tests/unit/test_unit_writer.py new file mode 100644 index 0000000..fd3392e --- /dev/null +++ b/tests/unit/test_unit_writer.py @@ -0,0 +1,16 @@ +import writer + + +def test_create_query_constraint(datasets, dir_file, query_constraint): + query = writer.create_query_constraint(datasets=datasets, dir_file=dir_file) + assert query == query_constraint + + +def test_create_query_node_import(datasets, dir_file, query_node_import): + query = writer.create_query_node_import(datasets=datasets, dir_file=dir_file) + assert query == query_node_import + + +def test_create_query_relationship(datasets, dir_file, query_rel): + query = writer.create_query_relationship(datasets=datasets, dir_file=dir_file) + assert query == query_rel diff --git a/writer.py b/writer.py new file mode 100644 index 0000000..5f8748c --- /dev/null +++ b/writer.py @@ -0,0 +1,77 @@ +import os + + +def create_query_constraint(datasets: list, dir_file: str) -> str: + """ + Write query to create constraints in Cypher and export as .cypher file. + + :param datasets: List of datasets/schema to create constraints from. + :param dir_file: String of the directory to store Cypher query in. + :return: String of the queries in the script exported. + """ + aliases = [txt[0].lower() for txt in datasets] + cypher = [] + for name, alias in zip(datasets, aliases): + query_constraint = ( + f"CREATE CONSTRAINT table_name_Constraint{name} ON ({alias}:{name})\n" + f"ASSERT {alias}.table_name IS UNIQUE;\n" + ) + cypher.append(query_constraint) + + cypher = "".join(cypher) + with open(file=f"{dir_file}/query_constraints.cypher", mode="w") as f: + f.write(cypher) + + return cypher + + +def create_query_node_import(datasets: list, dir_file: str) -> str: + """ + Write query to create nodes in Cypher and export as .cypher file. + + :param datasets: List of datasets/schema to create nodes from. + :param dir_file: String of the directory to store Cypher query in. + :return: String of the queries in the script exported. + """ + cypher = [] + for name in datasets: + query_nodes = ( + f'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///{name.lower()}_tables.csv" AS csvLine\n' + f"CREATE (:{name} {{table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}});\n" # noqa: E501 + ) + cypher.append(query_nodes) + cypher = "".join(cypher) + with open(file=f"{dir_file}/query_nodes.cypher", mode="w") as f: + f.write(cypher) + + return cypher + + +def create_query_relationship(datasets: list, dir_file: str) -> str: + """ + Write query to create relationship between nodes. + + :param datasets: List of datasets/schema to create nodes from. + :param dir_file: String of the directory to store Cypher query in. + :return: + """ + aliases = [txt.lower() for txt in datasets] + cypher = [] + for name, alias in zip(datasets, aliases): + for sub_name, sub_alias in zip(datasets, aliases): + file_name = f"{alias}_{sub_alias}_dependency.csv" + if file_name not in os.listdir(path=dir_file): + continue + else: + query_rel = ( + f'USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///{file_name}" AS csvLine\n' # noqa: E501 + f"MERGE (a:{name} {{table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}})\n" # noqa: E501 + f"MERGE (b:{sub_name} {{table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}})\n" # noqa: E501 + f"CREATE (a)-[:HAS_TABLE_DEPENDENCY {{import_datetime: datetime()}}]->(b);\n" + ) + cypher.append(query_rel) + cypher = "".join(cypher) + with open(file=f"{dir_file}/query_relationships.cypher", mode="w") as f: + f.write(cypher) + + return cypher