From 44d28b05a86e9b3d69a353a1e3893c911ad85034 Mon Sep 17 00:00:00 2001
From: avisionh <a_vision@hotmail.co.uk>
Date: Sun, 13 Jun 2021 22:45:22 +0100
Subject: [PATCH 1/5] chore: Move example scripts to example directory

This is to clearly delineate where code is stored for the example compared to actual.
---
 {sql => example}/analytics/author.sql           | 0
 {sql => example}/analytics/commit.sql           | 0
 {sql => example}/analytics/committer.sql        | 0
 {sql => example}/analytics/repo.sql             | 0
 {sql => example}/analytics/user.sql             | 0
 {sql => example}/reporting/user_activity.sql    | 0
 sql/.gitkeep                                    | 0
 tests/fixtures/fixture_extractor.py             | 2 +-
 tests/integration/test_integration_extractor.py | 2 +-
 tests/unit/test_unit_extractor.py               | 2 +-
 10 files changed, 3 insertions(+), 3 deletions(-)
 rename {sql => example}/analytics/author.sql (100%)
 rename {sql => example}/analytics/commit.sql (100%)
 rename {sql => example}/analytics/committer.sql (100%)
 rename {sql => example}/analytics/repo.sql (100%)
 rename {sql => example}/analytics/user.sql (100%)
 rename {sql => example}/reporting/user_activity.sql (100%)
 create mode 100644 sql/.gitkeep

diff --git a/sql/analytics/author.sql b/example/analytics/author.sql
similarity index 100%
rename from sql/analytics/author.sql
rename to example/analytics/author.sql
diff --git a/sql/analytics/commit.sql b/example/analytics/commit.sql
similarity index 100%
rename from sql/analytics/commit.sql
rename to example/analytics/commit.sql
diff --git a/sql/analytics/committer.sql b/example/analytics/committer.sql
similarity index 100%
rename from sql/analytics/committer.sql
rename to example/analytics/committer.sql
diff --git a/sql/analytics/repo.sql b/example/analytics/repo.sql
similarity index 100%
rename from sql/analytics/repo.sql
rename to example/analytics/repo.sql
diff --git a/sql/analytics/user.sql b/example/analytics/user.sql
similarity index 100%
rename from sql/analytics/user.sql
rename to example/analytics/user.sql
diff --git a/sql/reporting/user_activity.sql b/example/reporting/user_activity.sql
similarity index 100%
rename from sql/reporting/user_activity.sql
rename to example/reporting/user_activity.sql
diff --git a/sql/.gitkeep b/sql/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/tests/fixtures/fixture_extractor.py b/tests/fixtures/fixture_extractor.py
index 6b7d59d..9181ca7 100644
--- a/tests/fixtures/fixture_extractor.py
+++ b/tests/fixtures/fixture_extractor.py
@@ -3,7 +3,7 @@
 
 @pytest.fixture()
 def query_user_activity():
-    with open(file="sql/reporting/user_activity.sql", mode="r") as f:
+    with open(file="example/reporting/user_activity.sql", mode="r") as f:
         return f.read()
 
 
diff --git a/tests/integration/test_integration_extractor.py b/tests/integration/test_integration_extractor.py
index aedc25f..a9280ac 100644
--- a/tests/integration/test_integration_extractor.py
+++ b/tests/integration/test_integration_extractor.py
@@ -13,7 +13,7 @@ def test_extract_table_dependencies_from_queries(
     extract = [extracted_analytics, extracted_reporting]
 
     for i, schema in enumerate(schemes):
-        dir_report = f"sql/{schema}"
+        dir_report = f"example/{schema}"
         remove_txt = []
         for table in os.listdir(dir_report):
             table_name, _ = os.path.splitext(p=table)
diff --git a/tests/unit/test_unit_extractor.py b/tests/unit/test_unit_extractor.py
index 4fa1f25..b43e983 100644
--- a/tests/unit/test_unit_extractor.py
+++ b/tests/unit/test_unit_extractor.py
@@ -3,7 +3,7 @@
 
 def test_clean_query(query_user_activity, cleaned_user_activity):
     schema = "reporting"
-    dir_report = f"sql/{schema}"
+    dir_report = f"example/{schema}"
     extractor = Extractor(script_dir=dir_report, schema=schema)
     txt_remove = [
         f"MERGE {schema}.user_activity USING (",

From 7fcd29c3cdc71ce99b592fef77607d8af994e8b8 Mon Sep 17 00:00:00 2001
From: avisionh <a_vision@hotmail.co.uk>
Date: Sun, 13 Jun 2021 22:56:34 +0100
Subject: [PATCH 2/5] refactor: Generate example data at Docker container build
 stage

This is so we can reduce the steps in the README and abstract as much away from the actual generation and loading of the example data for users.
---
 Dockerfile         | 22 ++++++++++++++++++++--
 docker-compose.yml |  8 ++++----
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 4dffc76..2d74c23 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,12 +4,30 @@ FROM python:${PY_VER} AS sqlqeurygraph-py
 ## Add the wait script to the image
 ADD https://github.com/ufoscout/docker-compose-wait/releases/download/2.9.0/wait /wait
 
+# download pipenv so can create requirements.txt
+# for faster environment recreation
+# https://pythonspeed.com/articles/pipenv-docker/
+RUN pip install --upgrade pip \
+    && pip install poetry
+COPY poetry.lock ./tmp
+COPY pyproject.toml ./
+RUN poetry export -f requirements.txt -o requirements.txt
+
+# move relevant folders and files from local to container
+COPY ./example ./example
+COPY sqlquerygraph.py ./
+COPY exporter.py ./
+COPY extractor.py ./
+COPY writer.py ./
+COPY loader.py ./
+
 # install package for loader and give it executable rights
-RUN pip install py2neo \
+RUN pip install -r requirements.txt \
     && mkdir log \
     && mkdir data \
+    && mkdir neo4j \
+    && python sqlquerygraph.py -sd 'example' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting'
     && chmod +x /wait
 
 # move relevant files so they can be executed
-COPY ./loader.py ./
 COPY ./neo4j/*.cypher ./data
diff --git a/docker-compose.yml b/docker-compose.yml
index 1670a1e..1e6b470 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -30,7 +30,7 @@ services:
       - neo4j_network
 
   app:
-    build: .
+    build: . # build from Dockerfile
     container_name: app
     env_file: .env
     environment:
@@ -42,8 +42,8 @@ services:
       - neo4j
     command: >
       sh -c "/wait \
-             && python -m loader --file 'data/query_constraints.cypher' \
-             && python -m loader --file 'data/query_nodes.cypher' \
-             && python -m loader --file 'data/query_relationships.cypher'"
+             && python -m loader --file 'neo4j/query_constraints.cypher' \
+             && python -m loader --file 'neo4j/query_nodes.cypher' \
+             && python -m loader --file 'neo4j/query_relationships.cypher'"
     networks:
       - neo4j_network

From 56113cf489c06abde6771e41beca7239d25ed788 Mon Sep 17 00:00:00 2001
From: avisionh <a_vision@hotmail.co.uk>
Date: Mon, 14 Jun 2021 08:58:08 +0100
Subject: [PATCH 3/5] chore: Shift example stuff into example folder

This is so there is a clean workspace for users to write to.
---
 Dockerfile                                      |  2 +-
 README.md                                       |  1 -
 docker-compose.yml                              |  8 ++++----
 {neo4j => example/neo4j}/example_import.cypher  |  0
 example/neo4j/query_constraints.cypher          |  6 ++++++
 example/neo4j/query_nodes.cypher                |  6 ++++++
 example/neo4j/query_relationships.cypher        | 16 ++++++++++++++++
 example/{ => sql}/analytics/author.sql          |  0
 example/{ => sql}/analytics/commit.sql          |  0
 example/{ => sql}/analytics/committer.sql       |  0
 example/{ => sql}/analytics/repo.sql            |  0
 example/{ => sql}/analytics/user.sql            |  0
 example/{ => sql}/reporting/user_activity.sql   |  0
 neo4j/.gitkeep                                  |  0
 neo4j/analytics_analytics_dependency.csv        |  1 -
 neo4j/analytics_github_repos_dependency.csv     |  1 -
 neo4j/analytics_tables.csv                      |  1 -
 neo4j/github_repos_tables.csv                   |  1 -
 neo4j/reporting_analytics_dependency.csv        |  1 -
 neo4j/reporting_github_repos_dependency.csv     |  1 -
 neo4j/reporting_tables.csv                      |  1 -
 tests/fixtures/fixture_extractor.py             |  2 +-
 tests/fixtures/fixture_writer.py                |  2 +-
 tests/integration/test_integration_extractor.py |  2 +-
 tests/unit/test_unit_extractor.py               |  2 +-
 25 files changed, 37 insertions(+), 17 deletions(-)
 rename {neo4j => example/neo4j}/example_import.cypher (100%)
 mode change 100644 => 100755
 create mode 100644 example/neo4j/query_constraints.cypher
 create mode 100644 example/neo4j/query_nodes.cypher
 create mode 100644 example/neo4j/query_relationships.cypher
 rename example/{ => sql}/analytics/author.sql (100%)
 rename example/{ => sql}/analytics/commit.sql (100%)
 rename example/{ => sql}/analytics/committer.sql (100%)
 rename example/{ => sql}/analytics/repo.sql (100%)
 rename example/{ => sql}/analytics/user.sql (100%)
 rename example/{ => sql}/reporting/user_activity.sql (100%)
 mode change 100644 => 100755 neo4j/.gitkeep
 delete mode 100644 neo4j/analytics_analytics_dependency.csv
 delete mode 100644 neo4j/analytics_github_repos_dependency.csv
 delete mode 100644 neo4j/analytics_tables.csv
 delete mode 100644 neo4j/github_repos_tables.csv
 delete mode 100644 neo4j/reporting_analytics_dependency.csv
 delete mode 100644 neo4j/reporting_github_repos_dependency.csv
 delete mode 100644 neo4j/reporting_tables.csv

diff --git a/Dockerfile b/Dockerfile
index 2d74c23..432ef80 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,7 @@ RUN pip install -r requirements.txt \
     && mkdir log \
     && mkdir data \
     && mkdir neo4j \
-    && python sqlquerygraph.py -sd 'example' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting'
+    && python sqlquerygraph.py -sd 'example/sql' -ed 'example/neo4j' -rd 'github_repos' 'analytics' 'reporting' \
     && chmod +x /wait
 
 # move relevant files so they can be executed
diff --git a/README.md b/README.md
index 8f3856f..7342651 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,6 @@ To run the code in here, ensure your system meets the following requirements:
 - Python 3.8 or above; and
 - [Poetry](https://python-poetry.org/docs/) installed.
 - [`direnv`](https://direnv.net/) installed, including shell hooks;
-- [`.envrc`](https://github.com/avisionh/sqlquerygraph/blob/main/.envrc) allowed/trusted by `direnv` to use the environment variables - see [below](#set-up);
 
 <!--Note there may be some Python IDE-specific requirements around loading environment variables, which are not considered here. -->
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 1e6b470..d73f687 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,7 +14,7 @@ services:
       - 7474:7474 # web client
       - 7687:7687 # db default port
     volumes:
-      - ./neo4j:/var/lib/neo4j/import
+      - ./example/neo4j:/var/lib/neo4j/import
     environment:
       - NEO4j_dbms.security.auth_enabled='true'
       # install graph-data-science plugin
@@ -42,8 +42,8 @@ services:
       - neo4j
     command: >
       sh -c "/wait \
-             && python -m loader --file 'neo4j/query_constraints.cypher' \
-             && python -m loader --file 'neo4j/query_nodes.cypher' \
-             && python -m loader --file 'neo4j/query_relationships.cypher'"
+             && python -m loader --file 'example/neo4j/query_constraints.cypher' \
+             && python -m loader --file 'example/neo4j/query_nodes.cypher' \
+             && python -m loader --file 'example/neo4j/query_relationships.cypher'"
     networks:
       - neo4j_network
diff --git a/neo4j/example_import.cypher b/example/neo4j/example_import.cypher
old mode 100644
new mode 100755
similarity index 100%
rename from neo4j/example_import.cypher
rename to example/neo4j/example_import.cypher
diff --git a/example/neo4j/query_constraints.cypher b/example/neo4j/query_constraints.cypher
new file mode 100644
index 0000000..20c1e09
--- /dev/null
+++ b/example/neo4j/query_constraints.cypher
@@ -0,0 +1,6 @@
+CREATE CONSTRAINT table_name_ConstraintReporting ON (r:Reporting)
+ASSERT r.table_name IS UNIQUE;
+CREATE CONSTRAINT table_name_ConstraintAnalytics ON (a:Analytics)
+ASSERT a.table_name IS UNIQUE;
+CREATE CONSTRAINT table_name_ConstraintGitHub_Repos ON (g:GitHub_Repos)
+ASSERT g.table_name IS UNIQUE;
diff --git a/example/neo4j/query_nodes.cypher b/example/neo4j/query_nodes.cypher
new file mode 100644
index 0000000..6cc8a9d
--- /dev/null
+++ b/example/neo4j/query_nodes.cypher
@@ -0,0 +1,6 @@
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_tables.csv" AS csvLine
+CREATE (:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_tables.csv" AS csvLine
+CREATE (:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///github_repos_tables.csv" AS csvLine
+CREATE (:GitHub_Repos {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});
diff --git a/example/neo4j/query_relationships.cypher b/example/neo4j/query_relationships.cypher
new file mode 100644
index 0000000..dc75760
--- /dev/null
+++ b/example/neo4j/query_relationships.cypher
@@ -0,0 +1,16 @@
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_analytics_dependency.csv" AS csvLine
+MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
+MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
+CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_github_repos_dependency.csv" AS csvLine
+MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
+MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
+CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_analytics_dependency.csv" AS csvLine
+MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
+MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
+CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
+USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_github_repos_dependency.csv" AS csvLine
+MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
+MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
+CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
diff --git a/example/analytics/author.sql b/example/sql/analytics/author.sql
similarity index 100%
rename from example/analytics/author.sql
rename to example/sql/analytics/author.sql
diff --git a/example/analytics/commit.sql b/example/sql/analytics/commit.sql
similarity index 100%
rename from example/analytics/commit.sql
rename to example/sql/analytics/commit.sql
diff --git a/example/analytics/committer.sql b/example/sql/analytics/committer.sql
similarity index 100%
rename from example/analytics/committer.sql
rename to example/sql/analytics/committer.sql
diff --git a/example/analytics/repo.sql b/example/sql/analytics/repo.sql
similarity index 100%
rename from example/analytics/repo.sql
rename to example/sql/analytics/repo.sql
diff --git a/example/analytics/user.sql b/example/sql/analytics/user.sql
similarity index 100%
rename from example/analytics/user.sql
rename to example/sql/analytics/user.sql
diff --git a/example/reporting/user_activity.sql b/example/sql/reporting/user_activity.sql
similarity index 100%
rename from example/reporting/user_activity.sql
rename to example/sql/reporting/user_activity.sql
diff --git a/neo4j/.gitkeep b/neo4j/.gitkeep
old mode 100644
new mode 100755
diff --git a/neo4j/analytics_analytics_dependency.csv b/neo4j/analytics_analytics_dependency.csv
deleted file mode 100644
index fa0f231..0000000
--- a/neo4j/analytics_analytics_dependency.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/neo4j/analytics_github_repos_dependency.csv b/neo4j/analytics_github_repos_dependency.csv
deleted file mode 100644
index fa0f231..0000000
--- a/neo4j/analytics_github_repos_dependency.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/neo4j/analytics_tables.csv b/neo4j/analytics_tables.csv
deleted file mode 100644
index 0c6c848..0000000
--- a/neo4j/analytics_tables.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name
diff --git a/neo4j/github_repos_tables.csv b/neo4j/github_repos_tables.csv
deleted file mode 100644
index 0c6c848..0000000
--- a/neo4j/github_repos_tables.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name
diff --git a/neo4j/reporting_analytics_dependency.csv b/neo4j/reporting_analytics_dependency.csv
deleted file mode 100644
index fa0f231..0000000
--- a/neo4j/reporting_analytics_dependency.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/neo4j/reporting_github_repos_dependency.csv b/neo4j/reporting_github_repos_dependency.csv
deleted file mode 100644
index fa0f231..0000000
--- a/neo4j/reporting_github_repos_dependency.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/neo4j/reporting_tables.csv b/neo4j/reporting_tables.csv
deleted file mode 100644
index 0c6c848..0000000
--- a/neo4j/reporting_tables.csv
+++ /dev/null
@@ -1 +0,0 @@
-table_dataset,table_name
diff --git a/tests/fixtures/fixture_extractor.py b/tests/fixtures/fixture_extractor.py
index 9181ca7..5fa3c80 100644
--- a/tests/fixtures/fixture_extractor.py
+++ b/tests/fixtures/fixture_extractor.py
@@ -3,7 +3,7 @@
 
 @pytest.fixture()
 def query_user_activity():
-    with open(file="example/reporting/user_activity.sql", mode="r") as f:
+    with open(file="example/sql/reporting/user_activity.sql", mode="r") as f:
         return f.read()
 
 
diff --git a/tests/fixtures/fixture_writer.py b/tests/fixtures/fixture_writer.py
index 8837004..648b4ac 100644
--- a/tests/fixtures/fixture_writer.py
+++ b/tests/fixtures/fixture_writer.py
@@ -8,7 +8,7 @@ def datasets():
 
 @pytest.fixture()
 def dir_file():
-    return "neo4j"
+    return "example/neo4j"
 
 
 @pytest.fixture()
diff --git a/tests/integration/test_integration_extractor.py b/tests/integration/test_integration_extractor.py
index a9280ac..9bcb0fa 100644
--- a/tests/integration/test_integration_extractor.py
+++ b/tests/integration/test_integration_extractor.py
@@ -13,7 +13,7 @@ def test_extract_table_dependencies_from_queries(
     extract = [extracted_analytics, extracted_reporting]
 
     for i, schema in enumerate(schemes):
-        dir_report = f"example/{schema}"
+        dir_report = f"example/sql/{schema}"
         remove_txt = []
         for table in os.listdir(dir_report):
             table_name, _ = os.path.splitext(p=table)
diff --git a/tests/unit/test_unit_extractor.py b/tests/unit/test_unit_extractor.py
index b43e983..c0e135d 100644
--- a/tests/unit/test_unit_extractor.py
+++ b/tests/unit/test_unit_extractor.py
@@ -3,7 +3,7 @@
 
 def test_clean_query(query_user_activity, cleaned_user_activity):
     schema = "reporting"
-    dir_report = f"example/{schema}"
+    dir_report = f"example/sql/{schema}"
     extractor = Extractor(script_dir=dir_report, schema=schema)
     txt_remove = [
         f"MERGE {schema}.user_activity USING (",

From e599d6d02446ef80e5251a2379a58faf57d1836b Mon Sep 17 00:00:00 2001
From: avisionh <a_vision@hotmail.co.uk>
Date: Mon, 14 Jun 2021 09:16:03 +0100
Subject: [PATCH 4/5] fix: Include empty data files

This is so CI/D checks pass.
---
 example/neo4j/analytics_analytics_dependency.csv |  1 +
 .../neo4j/analytics_github_repos_dependency.csv  |  1 +
 example/neo4j/analytics_tables.csv               |  1 +
 example/neo4j/github_repos_tables.csv            |  1 +
 example/neo4j/query_constraints.cypher           |  6 ------
 example/neo4j/query_nodes.cypher                 |  6 ------
 example/neo4j/query_relationships.cypher         | 16 ----------------
 example/neo4j/reporting_analytics_dependency.csv |  1 +
 .../neo4j/reporting_github_repos_dependency.csv  |  1 +
 example/neo4j/reporting_tables.csv               |  1 +
 10 files changed, 7 insertions(+), 28 deletions(-)
 create mode 100755 example/neo4j/analytics_analytics_dependency.csv
 create mode 100755 example/neo4j/analytics_github_repos_dependency.csv
 create mode 100755 example/neo4j/analytics_tables.csv
 create mode 100755 example/neo4j/github_repos_tables.csv
 delete mode 100644 example/neo4j/query_constraints.cypher
 delete mode 100644 example/neo4j/query_nodes.cypher
 delete mode 100644 example/neo4j/query_relationships.cypher
 create mode 100755 example/neo4j/reporting_analytics_dependency.csv
 create mode 100755 example/neo4j/reporting_github_repos_dependency.csv
 create mode 100755 example/neo4j/reporting_tables.csv

diff --git a/example/neo4j/analytics_analytics_dependency.csv b/example/neo4j/analytics_analytics_dependency.csv
new file mode 100755
index 0000000..fa0f231
--- /dev/null
+++ b/example/neo4j/analytics_analytics_dependency.csv
@@ -0,0 +1 @@
+table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/example/neo4j/analytics_github_repos_dependency.csv b/example/neo4j/analytics_github_repos_dependency.csv
new file mode 100755
index 0000000..fa0f231
--- /dev/null
+++ b/example/neo4j/analytics_github_repos_dependency.csv
@@ -0,0 +1 @@
+table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/example/neo4j/analytics_tables.csv b/example/neo4j/analytics_tables.csv
new file mode 100755
index 0000000..0c6c848
--- /dev/null
+++ b/example/neo4j/analytics_tables.csv
@@ -0,0 +1 @@
+table_dataset,table_name
diff --git a/example/neo4j/github_repos_tables.csv b/example/neo4j/github_repos_tables.csv
new file mode 100755
index 0000000..0c6c848
--- /dev/null
+++ b/example/neo4j/github_repos_tables.csv
@@ -0,0 +1 @@
+table_dataset,table_name
diff --git a/example/neo4j/query_constraints.cypher b/example/neo4j/query_constraints.cypher
deleted file mode 100644
index 20c1e09..0000000
--- a/example/neo4j/query_constraints.cypher
+++ /dev/null
@@ -1,6 +0,0 @@
-CREATE CONSTRAINT table_name_ConstraintReporting ON (r:Reporting)
-ASSERT r.table_name IS UNIQUE;
-CREATE CONSTRAINT table_name_ConstraintAnalytics ON (a:Analytics)
-ASSERT a.table_name IS UNIQUE;
-CREATE CONSTRAINT table_name_ConstraintGitHub_Repos ON (g:GitHub_Repos)
-ASSERT g.table_name IS UNIQUE;
diff --git a/example/neo4j/query_nodes.cypher b/example/neo4j/query_nodes.cypher
deleted file mode 100644
index 6cc8a9d..0000000
--- a/example/neo4j/query_nodes.cypher
+++ /dev/null
@@ -1,6 +0,0 @@
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_tables.csv" AS csvLine
-CREATE (:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_tables.csv" AS csvLine
-CREATE (:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///github_repos_tables.csv" AS csvLine
-CREATE (:GitHub_Repos {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset), import_datetime: datetime()});
diff --git a/example/neo4j/query_relationships.cypher b/example/neo4j/query_relationships.cypher
deleted file mode 100644
index dc75760..0000000
--- a/example/neo4j/query_relationships.cypher
+++ /dev/null
@@ -1,16 +0,0 @@
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_analytics_dependency.csv" AS csvLine
-MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
-MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
-CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///reporting_github_repos_dependency.csv" AS csvLine
-MERGE (a:Reporting {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
-MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
-CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_analytics_dependency.csv" AS csvLine
-MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
-MERGE (b:Analytics {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
-CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
-USING PERIODIC COMMIT 500 LOAD CSV WITH HEADERS FROM "file:///analytics_github_repos_dependency.csv" AS csvLine
-MERGE (a:Analytics {table_name: toString(csvLine.table_name), table_dataset: toString(csvLine.table_dataset)})
-MERGE (b:GitHub_Repos {table_name: toString(csvLine.dependency_name), table_dataset: toString(csvLine.dependency_dataset)})
-CREATE (a)-[:HAS_TABLE_DEPENDENCY {import_datetime: datetime()}]->(b);
diff --git a/example/neo4j/reporting_analytics_dependency.csv b/example/neo4j/reporting_analytics_dependency.csv
new file mode 100755
index 0000000..fa0f231
--- /dev/null
+++ b/example/neo4j/reporting_analytics_dependency.csv
@@ -0,0 +1 @@
+table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/example/neo4j/reporting_github_repos_dependency.csv b/example/neo4j/reporting_github_repos_dependency.csv
new file mode 100755
index 0000000..fa0f231
--- /dev/null
+++ b/example/neo4j/reporting_github_repos_dependency.csv
@@ -0,0 +1 @@
+table_dataset,table_name,dependency_dataset,dependency_name
diff --git a/example/neo4j/reporting_tables.csv b/example/neo4j/reporting_tables.csv
new file mode 100755
index 0000000..0c6c848
--- /dev/null
+++ b/example/neo4j/reporting_tables.csv
@@ -0,0 +1 @@
+table_dataset,table_name

From 9bdaa2e94540d6c73ae283777106a095f8ed59d8 Mon Sep 17 00:00:00 2001
From: avisionh <a_vision@hotmail.co.uk>
Date: Mon, 14 Jun 2021 09:29:42 +0100
Subject: [PATCH 5/5] docs: Expand on module instructions

This is so we can explain how users can generate their own data.
---
 README.md | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 7342651..da8f29d 100644
--- a/README.md
+++ b/README.md
@@ -40,9 +40,9 @@ poetry install
 pre-commit install
 ```
 
-To then extract the tables and their dependencies from the example SQL scripts in the `sql/` directory, run the following in your shell/terminal:
+To then extract the tables and their dependencies from the example SQL scripts in the `sql/` directory, run the below in your shell/terminal. It will generate `.csv` files of the tables and their dependencies. It will also generate `.cypher` files to enable you to import the data into neo4j, after you have added the `.csv` files to the database.
 ```shell script
-python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting'
+python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd '<datasets, individually quoted and separated by commas, of tables in sql/ scripts>'
 ```
 
 ### Run neo4j graph database
@@ -58,19 +58,16 @@ We use [neo4j](https://neo4j.com/) for this project to visualise the dependencie
    export NEO4J_USERNAME=neo4j
    export NEO4J_PASSWORD=<your_password>
    ```
-
-1. Update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal:
+   Then update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal:
    ```shell script
    direnv allow
    ```
 
 1. Build the Docker image and launch the container. Within this directory that has the `docker-compose.yml` file, run the below in your shell/terminal:
    ```shell script
-   docker-compose build
    docker-compose up
    ```
-
-1. If it's the first time you have downloaded the neo4j docker image, wait awhile (maybe 15 minutes, depends on your machine specs). If you have downloaded the neo4j docker image before (such as going through these instructions), then wait a few minutes. You will know when it's ready when you get the following message in your terminal:
+   You will know when it's ready when you get the following message in your terminal:
    ```
    app      | [INFO  wait] Host [neo4j:7687] is now available!
    app      | [INFO  wait] --------------------------------------------------------
@@ -80,7 +77,7 @@ We use [neo4j](https://neo4j.com/) for this project to visualise the dependencie
    Then launch neo4j locally via opening your web browser and entering the following web address:
     - http://localhost:7474/
 
-1. The username and password will those specified in your `.secrets` file.
+   The username and password will those specified in your `.secrets` file.
 
 1. When you have finished playing with your local neo4j instance, remember to stop it running by executing the below in your shell/terminal:
    ```shell script