diff --git a/.gitignore b/.gitignore index c79a3f1..570b227 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,9 @@ # django __pycache__/ +# logs +*.log + # data *.csv diff --git a/README.md b/README.md index 6fa94aa..b0bdbd7 100644 --- a/README.md +++ b/README.md @@ -48,15 +48,15 @@ python sqlquerygraph.py -sd 'sql' -ed 'neo4j' -rd 'github_repos' 'analytics' 're ### Run neo4j graph database We use [neo4j](https://neo4j.com/) for this project to visualise the dependencies between tables. To install neo4j locally using Docker Compose, follow the below instructions: -1. Install and open Docker +1. Install and open Docker (if already installed, just open the program). + For Mac OSX, install Docker and Docker Compose together [here](https://docs.docker.com/docker-for-mac/install/). + For Linux, install Docker [here](https://docs.docker.com/engine/install/) and then follow these [instructions](https://docs.docker.com/compose/install/) to install docker-compose. + For Windows, install Docker and Docker Compose together [here](https://docs.docker.com/docker-for-windows/install/). 1. Create a new file, `.secrets`, in the directory where this `README.md` file sits, and store the following in there. This allows you to set the password for your local neo4j instance without exposing it. ``` export NEO4J_AUTH=neo4j/ - export NEO4J_AUTH=neo4j - export NEO4J_AUTH= + export NEO4J_USERNAME=neo4j + export NEO4J_PASSWORD= ``` 1. Update your `.env` file to take in the new `.secrets` file you created by entering the below in your shell/terminal: ```shell script diff --git a/loader.py b/loader.py index ad19ca0..66ff189 100644 --- a/loader.py +++ b/loader.py @@ -1,8 +1,17 @@ +import logging import os import argparse from py2neo import Graph + +logging.basicConfig( + level=logging.INFO, + filename="log/loader.log", + filemode="w", + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + NEO4J_AUTH = (os.getenv(key="NEO4J_USERNAME"), os.getenv(key="NEO4J_PASSWORD")) g = Graph(auth=NEO4J_AUTH, host="localhost", port=7687, scheme="bolt") @@ -13,17 +22,14 @@ argp.add_argument("-f", "--file", type=str, help="Path for where Cypher query is.") args = argp.parse_args() - print(f"Reading {args.file}\n") - print("*******************************************\n") + logging.info(f"Reading {args.file}\n") with open(file=args.file, mode="r") as f: queries = f.read() - print(f"Formatting {args.file} for importing into neo4j\n") - print("*******************************************\n") + logging.info(f"Formatting {args.file} for importing into neo4j\n") queries = queries.split(sep=";") queries = [txt for txt in queries if txt != "\n"] - print(f"Executing {args.file} in neo4j\n") - print("*******************************************\n") + logging.info(f"Executing {args.file} in neo4j\n") for query in queries: g.evaluate(cypher=query) diff --git a/log/.gitkeep b/log/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/sqlquerygraph.py b/sqlquerygraph.py index 7a2996a..a2050ef 100644 --- a/sqlquerygraph.py +++ b/sqlquerygraph.py @@ -1,3 +1,4 @@ +import logging import os import argparse @@ -9,6 +10,13 @@ import pandas as pd +logging.basicConfig( + level=logging.INFO, + filename="log/sqlquerygraph.log", + filemode="w", + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) + if __name__ == """__main__""": argp = argparse.ArgumentParser() argp.add_argument( @@ -55,9 +63,10 @@ print(subdir) for i, dataset in enumerate(subdir): - print(f"Extracting {dataset} tables and their dependencies from scripts\n") - print("*******************************************\n") + logging.info( + f"Extracting {dataset} tables and their dependencies from scripts\n" + ) # create text to remove dir_report = f"{args.script_dir}/{dataset}" remove_txt = [] @@ -75,27 +84,23 @@ str_to_remove=remove_txt, verbose=args.verbose, ) - print(f"Converting {dataset} dictionaries to dataframes\n") - print("*******************************************\n") + + logging.info(f"Converting {dataset} dictionaries to dataframes\n") df_tables = exporter.convert_dict_to_df(data=table_dependencies) df_tables = df_tables.to_numpy() arr = np.concatenate((arr, df_tables), axis=0) - print("Splitting tables from their dependencies\n") - print("*******************************************\n") + logging.info("Splitting tables from their dependencies\n") df = pd.DataFrame(data=arr, columns=["table", "dependency"]) df = exporter.separate_dataset_table(data=df) - print("Exporting unique table names for nodes\n") - print("*******************************************\n") + logging.info("Exporting unique table names for nodes\n") exporter.export_unique_names(data=df, path_or_buf=args.export_dir) - print("Exporting table dependencies for relationships\n") - print("*******************************************\n") + logging.info("Exporting table dependencies for relationships\n") exporter.export_table_dependency(data=df, path_or_buf=args.export_dir) - print("Creating Cypher queries for neo4j database\n") - print("*******************************************\n") + logging.info("Creating Cypher queries for neo4j database\n") datasets = [txt.title() for txt in args.reference_datasets] writer.create_query_constraint(datasets=datasets, dir_file=args.export_dir) writer.create_query_node_import(datasets=datasets, dir_file=args.export_dir)