Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 54 additions & 13 deletions astrodbkit2/astrodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import os
import sqlite3
import shutil

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -736,25 +737,32 @@ def save_json(self, name, directory):
with open(os.path.join(directory, filename), "w", encoding="utf-8") as f:
f.write(json.dumps(data, indent=4, default=json_serializer))

def save_reference_table(self, table, directory):
def save_reference_table(self, table: str, directory: str, reference_directory: str="reference"):
"""
Save the reference table to disk

Parameters
----------
table : str
Name of reference table to output
directory : str
Name of directory in which to save the output JSON
reference_directory : str
Name of sub-directory to use for reference JSON files (eg, data/reference)
"""

# Create directory if not already present
if not os.path.isdir(os.path.join(directory, reference_directory)):
os.makedirs(os.path.join(directory, reference_directory))

results = self.session.query(self.metadata.tables[table]).all()
data = [row._asdict() for row in results]
filename = table + ".json"
if len(data) > 0:
with open(os.path.join(directory, filename), "w", encoding="utf-8") as f:
with open(os.path.join(directory, reference_directory, filename), "w", encoding="utf-8") as f:
f.write(json.dumps(data, indent=4, default=json_serializer))

def save_database(self, directory, clear_first=True):
def save_database(self, directory: str, clear_first: bool=True, reference_directory: str="reference", source_directory: str="source"):
"""
Output contents of the database into the specified directory as JSON files.
Source objects have individual JSON files with all data for that object.
Expand All @@ -763,28 +771,45 @@ def save_database(self, directory, clear_first=True):
Parameters
----------
directory : str
Name of directory in which to save the output JSON
Name of top-level directory in which to save the output JSON
clear_first : bool
First clear the directory of all existing JSON (useful to capture DB deletions). Default: True
reference_directory : str
Name of sub-directory to use for reference JSON files (eg, data/reference)
source_directory : str
Name of sub-directory to use for source JSON files (eg, data/source)
"""

# Clear existing files first from that directory
if clear_first:
print("Clearing existing JSON files...")
for filename in os.listdir(directory):
os.remove(os.path.join(directory, filename))
for file in os.listdir(directory):
file_path = os.path.join(directory, file)
if os.path.isfile(file_path):
os.remove(file_path)
elif os.path.isdir(file_path):
# This is to handle the reference and source directories
shutil.rmtree(file_path)

# Create sub-directories if not already present
if not os.path.isdir(os.path.join(directory, reference_directory)):
os.makedirs(os.path.join(directory, reference_directory))
if not os.path.isdir(os.path.join(directory, source_directory)):
os.makedirs(os.path.join(directory, source_directory))

# Output reference tables
print(f"Storing reference tables to {os.path.join(directory, reference_directory)}...")
for table in self._reference_tables:
# Skip reference tables that are not actually in the database
if table not in self.metadata.tables.keys():
continue

self.save_reference_table(table, directory)
self.save_reference_table(table, directory, reference_directory=reference_directory)

# Output primary objects
print(f"Storing individual sources to {os.path.join(directory, source_directory)}...")
for row in tqdm(self.query(self.metadata.tables[self._primary_table])):
self.save_json(row, directory)
self.save_json(row, os.path.join(directory, source_directory))

# Object input methods
def add_table_data(self, data, table, fmt="csv"):
Expand Down Expand Up @@ -892,17 +917,21 @@ def load_json(self, filename):
temp_dict[self._foreign_key] = source
conn.execute(self.metadata.tables[key].insert().values(temp_dict))

def load_database(self, directory, verbose=False):
def load_database(self, directory: str, verbose: bool=False, reference_directory: str="reference", source_directory: str="source"):
"""
Reload entire database from a directory of JSON files.
Note that this will first clear existing tables.

Parameters
----------
directory : str
Name of directory containing the JSON files
Name of top-level directory containing the JSON files
verbose : bool
Flag to enable diagnostic messages
reference_directory : str
Relative path to sub-directory to use for reference JSON files (eg, data/reference)
source_directory : str
Relative path to sub-directory to use for source JSON files (eg, data/source)
"""

# Clear existing database contents
Expand All @@ -917,12 +946,24 @@ def load_database(self, directory, verbose=False):
for table in self._reference_tables:
if verbose:
print(f"Loading {table} table")
self.load_table(table, directory, verbose=verbose)
# Check if the reference table is in the sub-directory
if os.path.exists(os.path.join(directory, reference_directory, table+".json")):
self.load_table(table, os.path.join(directory, reference_directory), verbose=verbose)
else:
self.load_table(table, directory, verbose=verbose)

# Load object data
if verbose:
print("Loading object tables")
for file in tqdm(os.listdir(directory)):

# Check if the sources are in the sub-directory
if os.path.exists(os.path.join(directory, source_directory)):
directory_of_sources = os.path.join(directory, source_directory)
else:
directory_of_sources = directory

# Scan selected directory for JSON source files
for file in tqdm(os.listdir(directory_of_sources)):
# Skip reference tables
core_name = file.replace(".json", "")
if core_name in self._reference_tables:
Expand All @@ -932,7 +973,7 @@ def load_database(self, directory, verbose=False):
if not file.endswith(".json") or file.startswith("."):
continue

self.load_json(os.path.join(directory, file))
self.load_json(os.path.join(directory_of_sources, file))

def dump_sqlite(self, database_name):
"""Output database as a sqlite file"""
Expand Down
35 changes: 22 additions & 13 deletions astrodbkit2/tests/test_astrodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import io
import json
import os
import shutil

import pandas as pd
import pytest
Expand Down Expand Up @@ -413,31 +414,35 @@ def test_views(db):

def test_save_reference_table(db, db_dir):
# Test saving a reference table
if os.path.exists(os.path.join(db_dir, 'Publications.json')):
os.remove(os.path.join(db_dir, 'Publications.json'))
db.save_reference_table('Publications', db_dir)
assert os.path.exists(os.path.join(db_dir, 'Publications.json'))
os.remove(os.path.join(db_dir, 'Publications.json')) # explicitly removing so that the next step will get verified
ref_dir = "reference"
if os.path.exists(os.path.join(db_dir, ref_dir, 'Publications.json')):
os.remove(os.path.join(db_dir, ref_dir, 'Publications.json'))
db.save_reference_table('Publications', db_dir, reference_directory=ref_dir)
assert os.path.exists(os.path.join(db_dir, ref_dir, 'Publications.json'))
os.remove(os.path.join(db_dir, ref_dir, 'Publications.json')) # explicitly removing so that the next step will get verified


def test_save_database(db, db_dir):
# Test saving the database to JSON files

# Clear temporary directory first
# if not os.path.exists(DB_DIR):
# os.mkdir(DB_DIR)
for file in os.listdir(db_dir):
os.remove(os.path.join(db_dir, file))
file_path = os.path.join(db_dir, file)
if os.path.isfile(file_path):
os.remove(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)

db.save_database(db_dir)

# Check JSON data
assert os.path.exists(os.path.join(db_dir, 'Publications.json'))
assert os.path.exists(os.path.join(db_dir, '2mass_j13571237+1428398.json'))
assert os.path.exists(os.path.join(db_dir, "reference", 'Publications.json'))
assert os.path.exists(os.path.join(db_dir, "source", '2mass_j13571237+1428398.json'))
assert not os.path.exists(os.path.join(db_dir, '2mass_j13571237+1428398 2.json'))
assert not os.path.exists(os.path.join(db_dir, "source", '2mass_j13571237+1428398 2.json'))

# Load source and confirm it is the same
with open(os.path.join(db_dir, '2mass_j13571237+1428398.json'), 'r') as f:
with open(os.path.join(db_dir, "source", '2mass_j13571237+1428398.json'), 'r') as f:
data = json.load(f)
assert data == db.inventory('2MASS J13571237+1428398')

Expand All @@ -457,7 +462,7 @@ def test_load_database(db, db_dir):

# Reload the database and check DB contents
assert os.path.exists(db_dir)
assert os.path.exists(os.path.join(db_dir, 'Publications.json'))
assert os.path.exists(os.path.join(db_dir, "reference", 'Publications.json'))
db.load_database(db_dir, verbose=True)
assert db.query(db.Publications).count() == 2
assert db.query(db.Photometry).count() == 3
Expand All @@ -466,7 +471,11 @@ def test_load_database(db, db_dir):

# Clear temporary directory and files
for file in os.listdir(db_dir):
os.remove(os.path.join(db_dir, file))
file_path = os.path.join(db_dir, file)
if os.path.isfile(file_path):
os.remove(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)


def test_copy_database_schema():
Expand Down
21 changes: 14 additions & 7 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,18 @@ Loading the Database
--------------------

**Astrodbkit2** contains methods to output the full contents of the database as a list of JSON files.
It can likewise read in a directory of these files to populate the database.
This is how SIMPLE is currently version controlled. To load a database of this form, do the following::
It can likewise read in a directory of these files to populate the database.
By default, reference tables (eg, Publications, Telescopes, etc) and source tables are respectively stored in `reference/` and `source/` sub-directories of `data/`.
This is how SIMPLE is currently version controlled.

To load a database of this form, do the following::

from astrodbkit2.astrodb import Database

connection_string = 'sqlite:///SIMPLE.db' # SQLite connection string
db_dir = 'data' # directory where JSON files are located
db = Database(connection_string)
db.load_database(db_dir)
db.load_database(directory=db_dir, reference_directory="reference")

.. note:: Database contents are cleared when loading from JSON files to ensure that the database only contains
sources from on-disk files. We describe later how to use the :py:meth:`~astrodbkit2.astrodb.Database.save_db` method
Expand Down Expand Up @@ -406,17 +409,21 @@ Saving the Database
===================

If users perform changes to a database, they will want to output this to disk to be version controlled.
**Astrodbkit2** provides methods to save an individual source or reference table as well as the entire data.
We recommend the later to output the entire contents to disk::
**Astrodbkit2** provides methods to save an individual source or reference table as well as all of the data stored in the database.
By default, reference tables are stored in a sub-directory of `data/` called "reference"; this can be overwritten by
supplying a `reference_directory` variable into `save_database` or `save_reference_table`.
Similarly, source/object tables are stored in a sub-directory of `data/` called "source" which can be overwritten by supplying a `source_directory` variable.

We recommend using `save_database` as that outputs the entire database contents to disk::

# Save single object
db.save_json('2MASS J13571237+1428398', 'data')

# Save single reference table
db.save_reference_table('Publications', 'data')

# Save entire database to directory 'data'
db.save_database('data')
# Save entire database to directory 'data/' with 'reference/' and 'source/' subdirectories.
db.save_database(directory='data', reference_directory='reference', source_directory='source')

.. note:: To properly capture database deletes, the contents of the specified directory is first cleared before
creating JSON files representing the current state of the database.
Expand Down