From 44d28b05a86e9b3d69a353a1e3893c911ad85034 Mon Sep 17 00:00:00 2001 From: avisionh Date: Sun, 13 Jun 2021 22:45:22 +0100 Subject: [PATCH 1/5] chore: Move example scripts to example directory This is to clearly delineate where code is stored for the example compared to actual. --- {sql => example}/analytics/author.sql | 0 {sql => example}/analytics/commit.sql | 0 {sql => example}/analytics/committer.sql | 0 {sql => example}/analytics/repo.sql | 0 {sql => example}/analytics/user.sql | 0 {sql => example}/reporting/user_activity.sql | 0 sql/.gitkeep | 0 tests/fixtures/fixture_extractor.py | 2 +- tests/integration/test_integration_extractor.py | 2 +- tests/unit/test_unit_extractor.py | 2 +- 10 files changed, 3 insertions(+), 3 deletions(-) rename {sql => example}/analytics/author.sql (100%) rename {sql => example}/analytics/commit.sql (100%) rename {sql => example}/analytics/committer.sql (100%) rename {sql => example}/analytics/repo.sql (100%) rename {sql => example}/analytics/user.sql (100%) rename {sql => example}/reporting/user_activity.sql (100%) create mode 100644 sql/.gitkeep diff --git a/sql/analytics/author.sql b/example/analytics/author.sql similarity index 100% rename from sql/analytics/author.sql rename to example/analytics/author.sql diff --git a/sql/analytics/commit.sql b/example/analytics/commit.sql similarity index 100% rename from sql/analytics/commit.sql rename to example/analytics/commit.sql diff --git a/sql/analytics/committer.sql b/example/analytics/committer.sql similarity index 100% rename from sql/analytics/committer.sql rename to example/analytics/committer.sql diff --git a/sql/analytics/repo.sql b/example/analytics/repo.sql similarity index 100% rename from sql/analytics/repo.sql rename to example/analytics/repo.sql diff --git a/sql/analytics/user.sql b/example/analytics/user.sql similarity index 100% rename from sql/analytics/user.sql rename to example/analytics/user.sql diff --git a/sql/reporting/user_activity.sql b/example/reporting/user_activity.sql similarity index 100% rename from sql/reporting/user_activity.sql rename to example/reporting/user_activity.sql diff --git a/sql/.gitkeep b/sql/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/fixture_extractor.py b/tests/fixtures/fixture_extractor.py index 6b7d59d..9181ca7 100644 --- a/tests/fixtures/fixture_extractor.py +++ b/tests/fixtures/fixture_extractor.py @@ -3,7 +3,7 @@ @pytest.fixture() def query_user_activity(): - with open(file="sql/reporting/user_activity.sql", mode="r") as f: + with open(file="example/reporting/user_activity.sql", mode="r") as f: return f.read() diff --git a/tests/integration/test_integration_extractor.py b/tests/integration/test_integration_extractor.py index aedc25f..a9280ac 100644 --- a/tests/integration/test_integration_extractor.py +++ b/tests/integration/test_integration_extractor.py @@ -13,7 +13,7 @@ def test_extract_table_dependencies_from_queries( extract = [extracted_analytics, extracted_reporting] for i, schema in enumerate(schemes): - dir_report = f"sql/{schema}" + dir_report = f"example/{schema}" remove_txt = [] for table in os.listdir(dir_report): table_name, _ = os.path.splitext(p=table) diff --git a/tests/unit/test_unit_extractor.py b/tests/unit/test_unit_extractor.py index 4fa1f25..b43e983 100644 --- a/tests/unit/test_unit_extractor.py +++ b/tests/unit/test_unit_extractor.py @@ -3,7 +3,7 @@ def test_clean_query(query_user_activity, cleaned_user_activity): schema = "reporting" - dir_report = f"sql/{schema}" + dir_report = f"example/{schema}" extractor = Extractor(script_dir=dir_report, schema=schema) txt_remove = [ f"MERGE {schema}.user_activity USING (", From 7fcd29c3cdc71ce99b592fef77607d8af994e8b8 Mon Sep 17 00:00:00 2001 From: avisionh Date: Sun, 13 Jun 2021 22:56:34 +0100 Subject: [PATCH 2/5] refactor: Generate example data at Docker container build stage This is so we can reduce the steps in the README and abstract as much away from the actual generation and loading of the example data for users. --- Dockerfile | 22 ++++++++++++++++++++-- docker-compose.yml | 8 ++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index 4dffc76..2d74c23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,12 +4,30 @@ FROM python:${PY_VER} AS sqlqeurygraph-py ## Add the wait script to the image ADD https://github.com/ufoscout/docker-compose-wait/releases/download/2.9.0/wait /wait +# download pipenv so can create requirements.txt +# for faster environment recreation +# https://pythonspeed.com/articles/pipenv-docker/ +RUN pip install --upgrade pip \ + && pip install poetry +COPY poetry.lock ./tmp +COPY pyproject.toml ./ +RUN poetry export -f requirements.txt -o requirements.txt + +# move relevant folders and files from local to container +COPY ./example ./example +COPY sqlquerygraph.py ./ +COPY exporter.py ./ +COPY extractor.py ./ +COPY writer.py ./ +COPY loader.py ./ + # install package for loader and give it executable rights -RUN pip install py2neo \ +RUN pip install -r requirements.txt \ && mkdir log \ && mkdir data \ + && mkdir neo4j \ + && python sqlquerygraph.py -sd 'example' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting' && chmod +x /wait # move relevant files so they can be executed -COPY ./loader.py ./ COPY ./neo4j/*.cypher ./data diff --git a/docker-compose.yml b/docker-compose.yml index 1670a1e..1e6b470 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,7 @@ services: - neo4j_network app: - build: . + build: . # build from Dockerfile container_name: app env_file: .env environment: @@ -42,8 +42,8 @@ services: - neo4j command: > sh -c "/wait \ - && python -m loader --file 'data/query_constraints.cypher' \ - && python -m loader --file 'data/query_nodes.cypher' \ - && python -m loader --file 'data/query_relationships.cypher'" + && python -m loader --file 'neo4j/query_constraints.cypher' \ + && python -m loader --file 'neo4j/query_nodes.cypher' \ + && python -m loader --file 'neo4j/query_relationships.cypher'" networks: - neo4j_network From 56113cf489c06abde6771e41beca7239d25ed788 Mon Sep 17 00:00:00 2001 From: avisionh Date: Mon, 14 Jun 2021 08:58:08 +0100 Subject: [PATCH 3/5] chore: Shift example stuff into example folder This is so there is a clean workspace for users to write to. --- Dockerfile | 2 +- README.md | 1 - docker-compose.yml | 8 ++++---- {neo4j => example/neo4j}/example_import.cypher | 0 example/neo4j/query_constraints.cypher | 6 ++++++ example/neo4j/query_nodes.cypher | 6 ++++++ example/neo4j/query_relationships.cypher | 16 ++++++++++++++++ example/{ => sql}/analytics/author.sql | 0 example/{ => sql}/analytics/commit.sql | 0 example/{ => sql}/analytics/committer.sql | 0 example/{ => sql}/analytics/repo.sql | 0 example/{ => sql}/analytics/user.sql | 0 example/{ => sql}/reporting/user_activity.sql | 0 neo4j/.gitkeep | 0 neo4j/analytics_analytics_dependency.csv | 1 - neo4j/analytics_github_repos_dependency.csv | 1 - neo4j/analytics_tables.csv | 1 - neo4j/github_repos_tables.csv | 1 - neo4j/reporting_analytics_dependency.csv | 1 - neo4j/reporting_github_repos_dependency.csv | 1 - neo4j/reporting_tables.csv | 1 - tests/fixtures/fixture_extractor.py | 2 +- tests/fixtures/fixture_writer.py | 2 +- tests/integration/test_integration_extractor.py | 2 +- tests/unit/test_unit_extractor.py | 2 +- 25 files changed, 37 insertions(+), 17 deletions(-) rename {neo4j => example/neo4j}/example_import.cypher (100%) mode change 100644 => 100755 create mode 100644 example/neo4j/query_constraints.cypher create mode 100644 example/neo4j/query_nodes.cypher create mode 100644 example/neo4j/query_relationships.cypher rename example/{ => sql}/analytics/author.sql (100%) rename example/{ => sql}/analytics/commit.sql (100%) rename example/{ => sql}/analytics/committer.sql (100%) rename example/{ => sql}/analytics/repo.sql (100%) rename example/{ => sql}/analytics/user.sql (100%) rename example/{ => sql}/reporting/user_activity.sql (100%) mode change 100644 => 100755 neo4j/.gitkeep delete mode 100644 neo4j/analytics_analytics_dependency.csv delete mode 100644 neo4j/analytics_github_repos_dependency.csv delete mode 100644 neo4j/analytics_tables.csv delete mode 100644 neo4j/github_repos_tables.csv delete mode 100644 neo4j/reporting_analytics_dependency.csv delete mode 100644 neo4j/reporting_github_repos_dependency.csv delete mode 100644 neo4j/reporting_tables.csv diff --git a/Dockerfile b/Dockerfile index 2d74c23..432ef80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,7 +26,7 @@ RUN pip install -r requirements.txt \ && mkdir log \ && mkdir data \ && mkdir neo4j \ - && python sqlquerygraph.py -sd 'example' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting' + && python sqlquerygraph.py -sd 'example/sql' -ed 'example/neo4j' -rd 'github_repos' 'analytics' 'reporting' \ && chmod +x /wait # move relevant files so they can be executed diff --git a/README.md b/README.md index 8f3856f..7342651 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ To run the code in here, ensure your system meets the following requirements: - Python 3.8 or above; and - [Poetry](https://python-poetry.org/docs/) installed. - [`direnv`](https://direnv.net/) installed, including shell hooks; -- [`.envrc`](https://github.com/avisionh/sqlquerygraph/blob/main/.envrc) allowed/trusted by `direnv` to use the environment variables - see [below](#set-up); diff --git a/docker-compose.yml b/docker-compose.yml index 1e6b470..d73f687 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: - 7474:7474 # web client - 7687:7687 # db default port volumes: - - ./neo4j:/var/lib/neo4j/import + - ./example/neo4j:/var/lib/neo4j/import environment: - NEO4j_dbms.security.auth_enabled='true' # install graph-data-science plugin @@ -42,8 +42,8 @@ services: - neo4j command: > sh -c "/wait \ - && python -m loader --file 'neo4j/query_constraints.cypher' \ - && python -m loader --file 'neo4j/query_nodes.cypher' \ - && python -m loader --file 'neo4j/query_relationships.cypher'" + && python -m loader --file 'example/neo4j/query_constraints.cypher' \ + && python -m loader --file 'example/neo4j/query_nodes.cypher' \ + && python -m loader --file 'example/neo4j/query_relationships.cypher'" networks: - neo4j_network diff --git a/neo4j/example_import.cypher b/example/neo4j/example_import.cypher old mode 100644 new mode 100755 similarity index 100% rename from neo4j/example_import.cypher rename to example/neo4j/example_import.cypher diff --git a/example/neo4j/query_constraints.cypher b/example/neo4j/query_constraints.cypher new file mode 100644 index 0000000..20c1e09 --- /dev/null +++ b/example/neo4j/query_constraints.cypher @@ -0,0 +1,6 @@ +CREATE CONSTRAINT table_name_ConstraintReporting ON (r:Reporting) +ASSERT r.table_name IS UNIQUE; +CREATE CONSTRAINT table_name_ConstraintAnalytics ON (a:Analytics) +ASSERT a.table_name IS UNIQUE; +CREATE CONSTRAINT table_name_ConstraintGitHub_Repos ON (g:GitHub_Repos) +ASSERT g.table_name IS UNIQUE; diff --git a/example/neo4j/query_nodes.cypher b/example/neo4j/query_nodes.cypher new file mode 100644 index 0000000..6cc8a9d --- /dev/null +++ b/example/neo4j/query_nodes.cypher @@ -0,0 +1,6 @@ +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_tables.csv" AS csvLine +CREATE (:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_tables.csv" AS csvLine +CREATE (:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///github_repos_tables.csv" AS csvLine +CREATE (:GitHub_Repos {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); diff --git a/example/neo4j/query_relationships.cypher b/example/neo4j/query_relationships.cypher new file mode 100644 index 0000000..dc75760 --- /dev/null +++ b/example/neo4j/query_relationships.cypher @@ -0,0 +1,16 @@ +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_analytics_dependency.csv" AS csvLine +MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_github_repos_dependency.csv" AS csvLine +MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_analytics_dependency.csv" AS csvLine +MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); +USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_github_repos_dependency.csv" AS csvLine +MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) +MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) +CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); diff --git a/example/analytics/author.sql b/example/sql/analytics/author.sql similarity index 100% rename from example/analytics/author.sql rename to example/sql/analytics/author.sql diff --git a/example/analytics/commit.sql b/example/sql/analytics/commit.sql similarity index 100% rename from example/analytics/commit.sql rename to example/sql/analytics/commit.sql diff --git a/example/analytics/committer.sql b/example/sql/analytics/committer.sql similarity index 100% rename from example/analytics/committer.sql rename to example/sql/analytics/committer.sql diff --git a/example/analytics/repo.sql b/example/sql/analytics/repo.sql similarity index 100% rename from example/analytics/repo.sql rename to example/sql/analytics/repo.sql diff --git a/example/analytics/user.sql b/example/sql/analytics/user.sql similarity index 100% rename from example/analytics/user.sql rename to example/sql/analytics/user.sql diff --git a/example/reporting/user_activity.sql b/example/sql/reporting/user_activity.sql similarity index 100% rename from example/reporting/user_activity.sql rename to example/sql/reporting/user_activity.sql diff --git a/neo4j/.gitkeep b/neo4j/.gitkeep old mode 100644 new mode 100755 diff --git a/neo4j/analytics_analytics_dependency.csv b/neo4j/analytics_analytics_dependency.csv deleted file mode 100644 index fa0f231..0000000 --- a/neo4j/analytics_analytics_dependency.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/analytics_github_repos_dependency.csv b/neo4j/analytics_github_repos_dependency.csv deleted file mode 100644 index fa0f231..0000000 --- a/neo4j/analytics_github_repos_dependency.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/analytics_tables.csv b/neo4j/analytics_tables.csv deleted file mode 100644 index 0c6c848..0000000 --- a/neo4j/analytics_tables.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name diff --git a/neo4j/github_repos_tables.csv b/neo4j/github_repos_tables.csv deleted file mode 100644 index 0c6c848..0000000 --- a/neo4j/github_repos_tables.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name diff --git a/neo4j/reporting_analytics_dependency.csv b/neo4j/reporting_analytics_dependency.csv deleted file mode 100644 index fa0f231..0000000 --- a/neo4j/reporting_analytics_dependency.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/reporting_github_repos_dependency.csv b/neo4j/reporting_github_repos_dependency.csv deleted file mode 100644 index fa0f231..0000000 --- a/neo4j/reporting_github_repos_dependency.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name,dependency_dataset,dependency_name diff --git a/neo4j/reporting_tables.csv b/neo4j/reporting_tables.csv deleted file mode 100644 index 0c6c848..0000000 --- a/neo4j/reporting_tables.csv +++ /dev/null @@ -1 +0,0 @@ -table_dataset,table_name diff --git a/tests/fixtures/fixture_extractor.py b/tests/fixtures/fixture_extractor.py index 9181ca7..5fa3c80 100644 --- a/tests/fixtures/fixture_extractor.py +++ b/tests/fixtures/fixture_extractor.py @@ -3,7 +3,7 @@ @pytest.fixture() def query_user_activity(): - with open(file="example/reporting/user_activity.sql", mode="r") as f: + with open(file="example/sql/reporting/user_activity.sql", mode="r") as f: return f.read() diff --git a/tests/fixtures/fixture_writer.py b/tests/fixtures/fixture_writer.py index 8837004..648b4ac 100644 --- a/tests/fixtures/fixture_writer.py +++ b/tests/fixtures/fixture_writer.py @@ -8,7 +8,7 @@ def datasets(): @pytest.fixture() def dir_file(): - return "neo4j" + return "example/neo4j" @pytest.fixture() diff --git a/tests/integration/test_integration_extractor.py b/tests/integration/test_integration_extractor.py index a9280ac..9bcb0fa 100644 --- a/tests/integration/test_integration_extractor.py +++ b/tests/integration/test_integration_extractor.py @@ -13,7 +13,7 @@ def test_extract_table_dependencies_from_queries( extract = [extracted_analytics, extracted_reporting] for i, schema in enumerate(schemes): - dir_report = f"example/{schema}" + dir_report = f"example/sql/{schema}" remove_txt = [] for table in os.listdir(dir_report): table_name, _ = os.path.splitext(p=table) diff --git a/tests/unit/test_unit_extractor.py b/tests/unit/test_unit_extractor.py index b43e983..c0e135d 100644 --- a/tests/unit/test_unit_extractor.py +++ b/tests/unit/test_unit_extractor.py @@ -3,7 +3,7 @@ def test_clean_query(query_user_activity, cleaned_user_activity): schema = "reporting" - dir_report = f"example/{schema}" + dir_report = f"example/sql/{schema}" extractor = Extractor(script_dir=dir_report, schema=schema) txt_remove = [ f"MERGE {schema}.user_activity USING (", From e599d6d02446ef80e5251a2379a58faf57d1836b Mon Sep 17 00:00:00 2001 From: avisionh Date: Mon, 14 Jun 2021 09:16:03 +0100 Subject: [PATCH 4/5] fix: Include empty data files This is so CI/D checks pass. --- example/neo4j/analytics_analytics_dependency.csv | 1 + .../neo4j/analytics_github_repos_dependency.csv | 1 + example/neo4j/analytics_tables.csv | 1 + example/neo4j/github_repos_tables.csv | 1 + example/neo4j/query_constraints.cypher | 6 ------ example/neo4j/query_nodes.cypher | 6 ------ example/neo4j/query_relationships.cypher | 16 ---------------- example/neo4j/reporting_analytics_dependency.csv | 1 + .../neo4j/reporting_github_repos_dependency.csv | 1 + example/neo4j/reporting_tables.csv | 1 + 10 files changed, 7 insertions(+), 28 deletions(-) create mode 100755 example/neo4j/analytics_analytics_dependency.csv create mode 100755 example/neo4j/analytics_github_repos_dependency.csv create mode 100755 example/neo4j/analytics_tables.csv create mode 100755 example/neo4j/github_repos_tables.csv delete mode 100644 example/neo4j/query_constraints.cypher delete mode 100644 example/neo4j/query_nodes.cypher delete mode 100644 example/neo4j/query_relationships.cypher create mode 100755 example/neo4j/reporting_analytics_dependency.csv create mode 100755 example/neo4j/reporting_github_repos_dependency.csv create mode 100755 example/neo4j/reporting_tables.csv diff --git a/example/neo4j/analytics_analytics_dependency.csv b/example/neo4j/analytics_analytics_dependency.csv new file mode 100755 index 0000000..fa0f231 --- /dev/null +++ b/example/neo4j/analytics_analytics_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/example/neo4j/analytics_github_repos_dependency.csv b/example/neo4j/analytics_github_repos_dependency.csv new file mode 100755 index 0000000..fa0f231 --- /dev/null +++ b/example/neo4j/analytics_github_repos_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/example/neo4j/analytics_tables.csv b/example/neo4j/analytics_tables.csv new file mode 100755 index 0000000..0c6c848 --- /dev/null +++ b/example/neo4j/analytics_tables.csv @@ -0,0 +1 @@ +table_dataset,table_name diff --git a/example/neo4j/github_repos_tables.csv b/example/neo4j/github_repos_tables.csv new file mode 100755 index 0000000..0c6c848 --- /dev/null +++ b/example/neo4j/github_repos_tables.csv @@ -0,0 +1 @@ +table_dataset,table_name diff --git a/example/neo4j/query_constraints.cypher b/example/neo4j/query_constraints.cypher deleted file mode 100644 index 20c1e09..0000000 --- a/example/neo4j/query_constraints.cypher +++ /dev/null @@ -1,6 +0,0 @@ -CREATE CONSTRAINT table_name_ConstraintReporting ON (r:Reporting) -ASSERT r.table_name IS UNIQUE; -CREATE CONSTRAINT table_name_ConstraintAnalytics ON (a:Analytics) -ASSERT a.table_name IS UNIQUE; -CREATE CONSTRAINT table_name_ConstraintGitHub_Repos ON (g:GitHub_Repos) -ASSERT g.table_name IS UNIQUE; diff --git a/example/neo4j/query_nodes.cypher b/example/neo4j/query_nodes.cypher deleted file mode 100644 index 6cc8a9d..0000000 --- a/example/neo4j/query_nodes.cypher +++ /dev/null @@ -1,6 +0,0 @@ -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_tables.csv" AS csvLine -CREATE (:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_tables.csv" AS csvLine -CREATE (:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///github_repos_tables.csv" AS csvLine -CREATE (:GitHub_Repos {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()}); diff --git a/example/neo4j/query_relationships.cypher b/example/neo4j/query_relationships.cypher deleted file mode 100644 index dc75760..0000000 --- a/example/neo4j/query_relationships.cypher +++ /dev/null @@ -1,16 +0,0 @@ -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_analytics_dependency.csv" AS csvLine -MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) -MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) -CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_github_repos_dependency.csv" AS csvLine -MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) -MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) -CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_analytics_dependency.csv" AS csvLine -MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) -MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) -CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); -USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_github_repos_dependency.csv" AS csvLine -MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)}) -MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)}) -CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b); diff --git a/example/neo4j/reporting_analytics_dependency.csv b/example/neo4j/reporting_analytics_dependency.csv new file mode 100755 index 0000000..fa0f231 --- /dev/null +++ b/example/neo4j/reporting_analytics_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/example/neo4j/reporting_github_repos_dependency.csv b/example/neo4j/reporting_github_repos_dependency.csv new file mode 100755 index 0000000..fa0f231 --- /dev/null +++ b/example/neo4j/reporting_github_repos_dependency.csv @@ -0,0 +1 @@ +table_dataset,table_name,dependency_dataset,dependency_name diff --git a/example/neo4j/reporting_tables.csv b/example/neo4j/reporting_tables.csv new file mode 100755 index 0000000..0c6c848 --- /dev/null +++ b/example/neo4j/reporting_tables.csv @@ -0,0 +1 @@ +table_dataset,table_name From 9bdaa2e94540d6c73ae283777106a095f8ed59d8 Mon Sep 17 00:00:00 2001 From: avisionh Date: Mon, 14 Jun 2021 09:29:42 +0100 Subject: [PATCH 5/5] docs: Expand on module instructions This is so we can explain how users can generate their own data. --- README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7342651..da8f29d 100644 --- a/README.md +++ b/README.md @@ -40,9 +40,9 @@ poetry install pre-commit install ``` -To then extract the tables and their dependencies from the example SQL scripts in the `sql/` directory, run the following in your shell/terminal: +To then extract the tables and their dependencies from the example SQL scripts in the `sql/` directory, run the below in your shell/terminal. It will generate `.csv` files of the tables and their dependencies. It will also generate `.cypher` files to enable you to import the data into neo4j, after you have added the `.csv` files to the database. ```shell script -python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting' +python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd '' ``` ### Run neo4j graph database @@ -58,19 +58,16 @@ We use [neo4j](https://neo4j.com/) for this project to visualise the dependencie export NEO4J_USERNAME=neo4j export NEO4J_PASSWORD= ``` - -1. Update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal: + Then update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal: ```shell script direnv allow ``` 1. Build the Docker image and launch the container. Within this directory that has the `docker-compose.yml` file, run the below in your shell/terminal: ```shell script - docker-compose build docker-compose up ``` - -1. If it's the first time you have downloaded the neo4j docker image, wait awhile (maybe 15 minutes, depends on your machine specs). If you have downloaded the neo4j docker image before (such as going through these instructions), then wait a few minutes. You will know when it's ready when you get the following message in your terminal: + You will know when it's ready when you get the following message in your terminal: ``` app | [INFO wait] Host [neo4j:7687] is now available! app | [INFO wait] -------------------------------------------------------- @@ -80,7 +77,7 @@ We use [neo4j](https://neo4j.com/) for this project to visualise the dependencie Then launch neo4j locally via opening your web browser and entering the following web address: - http://localhost:7474/ -1. The username and password will those specified in your `.secrets` file. + The username and password will those specified in your `.secrets` file. 1. When you have finished playing with your local neo4j instance, remember to stop it running by executing the below in your shell/terminal: ```shell script