Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
# django
__pycache__/

# logs
*.log

# data
*.csv

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd 'github_repos' 'analytics' 're

### Run neo4j graph database
We use [neo4j](https://neo4j.com/) for this project to visualise the dependencies between tables. To install neo4j locally using Docker Compose, follow the below instructions:
1. Install and open Docker
1. Install and open Docker (if already installed, just open the program).
+ For Mac OSX, install Docker and Docker Compose together [here](https://docs.docker.com/docker-for-mac/install/).
+ For Linux, install Docker [here](https://docs.docker.com/engine/install/) and then follow these [instructions](https://docs.docker.com/compose/install/) to install docker-compose.
+ For Windows, install Docker and Docker Compose together [here](https://docs.docker.com/docker-for-windows/install/).
1. Create a new file, `.secrets`, in the directory where this `README.md` file sits, and store the following in there. This allows you to set the password for your local neo4j instance without exposing it.
```
export NEO4J_AUTH=neo4j/<your_password>
export NEO4J_AUTH=neo4j
export NEO4J_AUTH=<your_password>
export NEO4J_USERNAME=neo4j
export NEO4J_PASSWORD=<your_password>
```
1. Update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal:
```shell script
Expand Down
18 changes: 12 additions & 6 deletions loader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import logging
import os
import argparse

from py2neo import Graph


logging.basicConfig(
level=logging.INFO,
filename="log/loader.log",
filemode="w",
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

NEO4J_AUTH = (os.getenv(key="NEO4J_USERNAME"), os.getenv(key="NEO4J_PASSWORD"))

g = Graph(auth=NEO4J_AUTH, host="localhost", port=7687, scheme="bolt")
Expand All @@ -13,17 +22,14 @@
argp.add_argument("-f", "--file", type=str, help="Path for where Cypher query is.")
args = argp.parse_args()

print(f"Reading {args.file}\n")
print("*******************************************\n")
logging.info(f"Reading {args.file}\n")
with open(file=args.file, mode="r") as f:
queries = f.read()

print(f"Formatting {args.file} for importing into neo4j\n")
print("*******************************************\n")
logging.info(f"Formatting {args.file} for importing into neo4j\n")
queries = queries.split(sep=";")
queries = [txt for txt in queries if txt != "\n"]

print(f"Executing {args.file} in neo4j\n")
print("*******************************************\n")
logging.info(f"Executing {args.file} in neo4j\n")
for query in queries:
g.evaluate(cypher=query)
Empty file added log/.gitkeep
Empty file.
29 changes: 17 additions & 12 deletions sqlquerygraph.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
import argparse

Expand All @@ -9,6 +10,13 @@
import pandas as pd


logging.basicConfig(
level=logging.INFO,
filename="log/sqlquerygraph.log",
filemode="w",
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)

if __name__ == """__main__""":
argp = argparse.ArgumentParser()
argp.add_argument(
Expand Down Expand Up @@ -55,9 +63,10 @@
print(subdir)

for i, dataset in enumerate(subdir):
print(f"Extracting {dataset} tables and their dependencies from scripts\n")
print("*******************************************\n")

logging.info(
f"Extracting {dataset} tables and their dependencies from scripts\n"
)
# create text to remove
dir_report = f"{args.script_dir}/{dataset}"
remove_txt = []
Expand All @@ -75,27 +84,23 @@
str_to_remove=remove_txt,
verbose=args.verbose,
)
print(f"Converting {dataset} dictionaries to dataframes\n")
print("*******************************************\n")

logging.info(f"Converting {dataset} dictionaries to dataframes\n")
df_tables = exporter.convert_dict_to_df(data=table_dependencies)
df_tables = df_tables.to_numpy()
arr = np.concatenate((arr, df_tables), axis=0)

print("Splitting tables from their dependencies\n")
print("*******************************************\n")
logging.info("Splitting tables from their dependencies\n")
df = pd.DataFrame(data=arr, columns=["table", "dependency"])
df = exporter.separate_dataset_table(data=df)

print("Exporting unique table names for nodes\n")
print("*******************************************\n")
logging.info("Exporting unique table names for nodes\n")
exporter.export_unique_names(data=df, path_or_buf=args.export_dir)

print("Exporting table dependencies for relationships\n")
print("*******************************************\n")
logging.info("Exporting table dependencies for relationships\n")
exporter.export_table_dependency(data=df, path_or_buf=args.export_dir)

print("Creating Cypher queries for neo4j database\n")
print("*******************************************\n")
logging.info("Creating Cypher queries for neo4j database\n")
datasets = [txt.title() for txt in args.reference_datasets]
writer.create_query_constraint(datasets=datasets, dir_file=args.export_dir)
writer.create_query_node_import(datasets=datasets, dir_file=args.export_dir)
Expand Down