diff --git a/Dockerfile b/Dockerfile index 4dffc76..432ef80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,12 +4,30 @@ FROM python:${PY_VER} AS sqlqeurygraph-py ## Add the wait script to the image ADD https://github.com/ufoscout/docker-compose-wait/releases/download/2.9.0/wait /wait +# download pipenv so can create requirements.txt +# for faster environment recreation +# https://pythonspeed.com/articles/pipenv-docker/ +RUN pip install --upgrade pip \ + && pip install poetry +COPY poetry.lock ./tmp +COPY pyproject.toml ./ +RUN poetry export -f requirements.txt -o requirements.txt + +# move relevant folders and files from local to container +COPY ./example ./example +COPY sqlquerygraph.py ./ +COPY exporter.py ./ +COPY extractor.py ./ +COPY writer.py ./ +COPY loader.py ./ + # install package for loader and give it executable rights -RUN pip install py2neo \ +RUN pip install -r requirements.txt \ && mkdir log \ && mkdir data \ + && mkdir neo4j \ + && python sqlquerygraph.py -sd 'example/sql' -ed 'example/neo4j' -rd 'github_repos' 'analytics' 'reporting' \ && chmod +x /wait # move relevant files so they can be executed -COPY ./loader.py ./ COPY ./neo4j/*.cypher ./data diff --git a/README.md b/README.md index 8f3856f..da8f29d 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ To run the code in here, ensure your system meets the following requirements: - Python 3.8 or above; and - [Poetry](https://python-poetry.org/docs/) installed. - [`direnv`](https://direnv.net/) installed, including shell hooks; -- [`.envrc`](https://github.com/avisionh/sqlquerygraph/blob/main/.envrc) allowed/trusted by `direnv` to use the environment variables - see [below](#set-up); @@ -41,9 +40,9 @@ poetry install pre-commit install ``` -To then extract the tables and their dependencies from the example SQL scripts in the `sql/` directory, run the following in your shell/terminal: +To then extract the tables and their dependencies from the example SQL scripts in the `sql/` directory, run the below in your shell/terminal. It will generate `.csv` files of the tables and their dependencies. It will also generate `.cypher` files to enable you to import the data into neo4j, after you have added the `.csv` files to the database. ```shell script -python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd 'github_repos' 'analytics' 'reporting' +python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd '' ``` ### Run neo4j graph database @@ -59,19 +58,16 @@ We use [neo4j](https://neo4j.com/) for this project to visualise the dependencie export NEO4J_USERNAME=neo4j export NEO4J_PASSWORD= ``` - -1. Update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal: + Then update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal: ```shell script direnv allow ``` 1. Build the Docker image and launch the container. Within this directory that has the `docker-compose.yml` file, run the below in your shell/terminal: ```shell script - docker-compose build docker-compose up ``` - -1. If it's the first time you have downloaded the neo4j docker image, wait awhile (maybe 15 minutes, depends on your machine specs). If you have downloaded the neo4j docker image before (such as going through these instructions), then wait a few minutes. You will know when it's ready when you get the following message in your terminal: + You will know when it's ready when you get the following message in your terminal: ``` app | [INFO wait] Host [neo4j:7687] is now available! app | [INFO wait] -------------------------------------------------------- @@ -81,7 +77,7 @@ We use [neo4j](https://neo4j.com/) for this project to visualise the dependencie Then launch neo4j locally via opening your web browser and entering the following web address: - http://localhost:7474/ -1. The username and password will those specified in your `.secrets` file. + The username and password will those specified in your `.secrets` file. 1. When you have finished playing with your local neo4j instance, remember to stop it running by executing the below in your shell/terminal: ```shell script diff --git a/docker-compose.yml b/docker-compose.yml index 1670a1e..d73f687 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: - 7474:7474 # web client - 7687:7687 # db default port volumes: - - ./neo4j:/var/lib/neo4j/import + - ./example/neo4j:/var/lib/neo4j/import environment: - NEO4j_dbms.security.auth_enabled='true' # install graph-data-science plugin @@ -30,7 +30,7 @@ services: - neo4j_network app: - build: . + build: . # build from Dockerfile container_name: app env_file: .env environment: @@ -42,8 +42,8 @@ services: - neo4j command: > sh -c "/wait \ - && python -m loader --file 'data/query_constraints.cypher' \ - && python -m loader --file 'data/query_nodes.cypher' \ - && python -m loader --file 'data/query_relationships.cypher'" + && python -m loader --file 'example/neo4j/query_constraints.cypher' \ + && python -m loader --file 'example/neo4j/query_nodes.cypher' \ + && python -m loader --file 'example/neo4j/query_relationships.cypher'" networks: - neo4j_network diff --git a/neo4j/analytics_analytics_dependency.csv b/example/neo4j/analytics_analytics_dependency.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/analytics_analytics_dependency.csv rename to example/neo4j/analytics_analytics_dependency.csv diff --git a/neo4j/analytics_github_repos_dependency.csv b/example/neo4j/analytics_github_repos_dependency.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/analytics_github_repos_dependency.csv rename to example/neo4j/analytics_github_repos_dependency.csv diff --git a/neo4j/analytics_tables.csv b/example/neo4j/analytics_tables.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/analytics_tables.csv rename to example/neo4j/analytics_tables.csv diff --git a/neo4j/example_import.cypher b/example/neo4j/example_import.cypher old mode 100644 new mode 100755 similarity index 100% rename from neo4j/example_import.cypher rename to example/neo4j/example_import.cypher diff --git a/neo4j/github_repos_tables.csv b/example/neo4j/github_repos_tables.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/github_repos_tables.csv rename to example/neo4j/github_repos_tables.csv diff --git a/neo4j/reporting_analytics_dependency.csv b/example/neo4j/reporting_analytics_dependency.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/reporting_analytics_dependency.csv rename to example/neo4j/reporting_analytics_dependency.csv diff --git a/neo4j/reporting_github_repos_dependency.csv b/example/neo4j/reporting_github_repos_dependency.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/reporting_github_repos_dependency.csv rename to example/neo4j/reporting_github_repos_dependency.csv diff --git a/neo4j/reporting_tables.csv b/example/neo4j/reporting_tables.csv old mode 100644 new mode 100755 similarity index 100% rename from neo4j/reporting_tables.csv rename to example/neo4j/reporting_tables.csv diff --git a/sql/analytics/author.sql b/example/sql/analytics/author.sql similarity index 100% rename from sql/analytics/author.sql rename to example/sql/analytics/author.sql diff --git a/sql/analytics/commit.sql b/example/sql/analytics/commit.sql similarity index 100% rename from sql/analytics/commit.sql rename to example/sql/analytics/commit.sql diff --git a/sql/analytics/committer.sql b/example/sql/analytics/committer.sql similarity index 100% rename from sql/analytics/committer.sql rename to example/sql/analytics/committer.sql diff --git a/sql/analytics/repo.sql b/example/sql/analytics/repo.sql similarity index 100% rename from sql/analytics/repo.sql rename to example/sql/analytics/repo.sql diff --git a/sql/analytics/user.sql b/example/sql/analytics/user.sql similarity index 100% rename from sql/analytics/user.sql rename to example/sql/analytics/user.sql diff --git a/sql/reporting/user_activity.sql b/example/sql/reporting/user_activity.sql similarity index 100% rename from sql/reporting/user_activity.sql rename to example/sql/reporting/user_activity.sql diff --git a/neo4j/.gitkeep b/neo4j/.gitkeep old mode 100644 new mode 100755 diff --git a/sql/.gitkeep b/sql/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/tests/fixtures/fixture_extractor.py b/tests/fixtures/fixture_extractor.py index 6b7d59d..5fa3c80 100644 --- a/tests/fixtures/fixture_extractor.py +++ b/tests/fixtures/fixture_extractor.py @@ -3,7 +3,7 @@ @pytest.fixture() def query_user_activity(): - with open(file="sql/reporting/user_activity.sql", mode="r") as f: + with open(file="example/sql/reporting/user_activity.sql", mode="r") as f: return f.read() diff --git a/tests/fixtures/fixture_writer.py b/tests/fixtures/fixture_writer.py index 8837004..648b4ac 100644 --- a/tests/fixtures/fixture_writer.py +++ b/tests/fixtures/fixture_writer.py @@ -8,7 +8,7 @@ def datasets(): @pytest.fixture() def dir_file(): - return "neo4j" + return "example/neo4j" @pytest.fixture() diff --git a/tests/integration/test_integration_extractor.py b/tests/integration/test_integration_extractor.py index aedc25f..9bcb0fa 100644 --- a/tests/integration/test_integration_extractor.py +++ b/tests/integration/test_integration_extractor.py @@ -13,7 +13,7 @@ def test_extract_table_dependencies_from_queries( extract = [extracted_analytics, extracted_reporting] for i, schema in enumerate(schemes): - dir_report = f"sql/{schema}" + dir_report = f"example/sql/{schema}" remove_txt = [] for table in os.listdir(dir_report): table_name, _ = os.path.splitext(p=table) diff --git a/tests/unit/test_unit_extractor.py b/tests/unit/test_unit_extractor.py index 4fa1f25..c0e135d 100644 --- a/tests/unit/test_unit_extractor.py +++ b/tests/unit/test_unit_extractor.py @@ -3,7 +3,7 @@ def test_clean_query(query_user_activity, cleaned_user_activity): schema = "reporting" - dir_report = f"sql/{schema}" + dir_report = f"example/sql/{schema}" extractor = Extractor(script_dir=dir_report, schema=schema) txt_remove = [ f"MERGE {schema}.user_activity USING (",