From 46ff133adbfa3b2928de065ba09fd0d2c54fdb7c Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 21 Feb 2024 10:55:43 -0500 Subject: [PATCH] Adding example schema validation and example on how to use ORM in basic ingests --- astrodbkit2/schema_example.py | 92 ++++++++++++++++++++----------- astrodbkit2/tests/test_astrodb.py | 25 +++++++++ docs/index.rst | 52 +++++++++++++++++ setup.cfg | 4 -- 4 files changed, 138 insertions(+), 35 deletions(-) diff --git a/astrodbkit2/schema_example.py b/astrodbkit2/schema_example.py index 469c6c1..cfb7658 100644 --- a/astrodbkit2/schema_example.py +++ b/astrodbkit2/schema_example.py @@ -2,6 +2,7 @@ import sqlalchemy as sa from sqlalchemy import Boolean, Column, Float, ForeignKey, Integer, String, BigInteger, Enum, Date, DateTime +from sqlalchemy.orm import validates import enum from astrodbkit2.astrodb import Base from astrodbkit2.views import view @@ -13,7 +14,8 @@ class Publications(Base): """ORM for publications table. This stores reference information (DOI, bibcodes, etc) and has shortname as the primary key """ - __tablename__ = 'Publications' + + __tablename__ = "Publications" name = Column(String(30), primary_key=True, nullable=False) bibcode = Column(String(100)) doi = Column(String(100)) @@ -21,80 +23,108 @@ class Publications(Base): class Telescopes(Base): - __tablename__ = 'Telescopes' + __tablename__ = "Telescopes" name = Column(String(30), primary_key=True, nullable=False) - reference = Column(String(30), ForeignKey('Publications.name', ondelete='cascade')) + reference = Column(String(30), ForeignKey("Publications.name", ondelete="cascade")) class Instruments(Base): - __tablename__ = 'Instruments' + __tablename__ = "Instruments" name = Column(String(30), primary_key=True, nullable=False) - reference = Column(String(30), ForeignKey('Publications.name', ondelete='cascade')) + reference = Column(String(30), ForeignKey("Publications.name", ondelete="cascade")) # ------------------------------------------------------------------------------------------------------------------- # Enumerations tables class Regime(enum.Enum): """Enumeration for spectral type regime""" - optical = 'optical' - infrared = 'infrared' - ultraviolet = 'ultraviolet' - radio = 'radio' + + optical = "optical" + infrared = "infrared" + ultraviolet = "ultraviolet" + radio = "radio" # ------------------------------------------------------------------------------------------------------------------- # Main tables class Sources(Base): """ORM for the sources table. This stores the main identifiers for our objects along with ra and dec""" - __tablename__ = 'Sources' + + __tablename__ = "Sources" source = Column(String(100), primary_key=True, nullable=False) ra = Column(Float) dec = Column(Float) shortname = Column(String(30)) # not needed? - reference = Column(String(30), ForeignKey('Publications.name', ondelete='cascade'), nullable=False) + reference = Column(String(30), ForeignKey("Publications.name", ondelete="cascade"), nullable=False) comments = Column(String(1000)) + @validates("ra") + def validate_ra(self, key, value): + if value > 360 or value < 0: + raise ValueError("RA not in allowed range (0..360)") + return value + + @validates("dec") + def validate_dec(self, key, value): + if value > 90 or value < -90: + raise ValueError("Dec not in allowed range (-90..90)") + return value + class Names(Base): - __tablename__ = 'Names' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade'), nullable=False, primary_key=True) + __tablename__ = "Names" + source = Column(String(100), ForeignKey("Sources.source", ondelete="cascade"), nullable=False, primary_key=True) other_name = Column(String(100), primary_key=True, nullable=False) class Photometry(Base): - __tablename__ = 'Photometry' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), nullable=False, primary_key=True) + __tablename__ = "Photometry" + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) band = Column(String(30), primary_key=True) ucd = Column(String(100)) magnitude = Column(Float) magnitude_error = Column(Float) - telescope = Column(String(30), ForeignKey('Telescopes.name', ondelete='cascade')) - instrument = Column(String(30), ForeignKey('Instruments.name', ondelete='cascade')) + telescope = Column(String(30), ForeignKey("Telescopes.name", ondelete="cascade")) + instrument = Column(String(30), ForeignKey("Instruments.name", ondelete="cascade")) epoch = Column(String(30)) comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.name', ondelete='cascade'), primary_key=True) + reference = Column(String(30), ForeignKey("Publications.name", ondelete="cascade"), primary_key=True) class SpectralTypes(Base): - __tablename__ = 'SpectralTypes' - source = Column(String(100), ForeignKey('Sources.source', ondelete='cascade', onupdate='cascade'), nullable=False, primary_key=True) + __tablename__ = "SpectralTypes" + source = Column( + String(100), + ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), + nullable=False, + primary_key=True, + ) spectral_type = Column(Float) spectral_type_error = Column(Float) - regime = Column(Enum(Regime, create_constraint=True), primary_key=True) # restricts to a few values: Optical, Infrared + regime = Column( + Enum(Regime, create_constraint=True), primary_key=True + ) # restricts to a few values: Optical, Infrared best = Column(Boolean) # flag for indicating if this is the best measurement or not comments = Column(String(1000)) - reference = Column(String(30), ForeignKey('Publications.name', ondelete='cascade'), primary_key=True) + reference = Column(String(30), ForeignKey("Publications.name", ondelete="cascade"), primary_key=True) # ------------------------------------------------------------------------------------------------------------------- # Views SampleView = view( - "SampleView", - Base.metadata, - sa.select( - Sources.source.label("source"), - Sources.ra.label("s_ra"), - Sources.dec.label("s_dec"), - SpectralTypes.spectral_type.label("spectral_type"), - ).select_from(Sources).join(SpectralTypes, Sources.source == SpectralTypes.source) - ) + "SampleView", + Base.metadata, + sa.select( + Sources.source.label("source"), + Sources.ra.label("s_ra"), + Sources.dec.label("s_dec"), + SpectralTypes.spectral_type.label("spectral_type"), + ) + .select_from(Sources) + .join(SpectralTypes, Sources.source == SpectralTypes.source), +) diff --git a/astrodbkit2/tests/test_astrodb.py b/astrodbkit2/tests/test_astrodb.py index 49425f6..4973dcc 100644 --- a/astrodbkit2/tests/test_astrodb.py +++ b/astrodbkit2/tests/test_astrodb.py @@ -123,6 +123,31 @@ def test_add_data(db): conn.commit() +def test_orm_use(db): + # Tests using the SQLAlchemy ORM + + # Adding and removing a basic source + s = Sources(source="V4046 Sgr", ra=273.54, dec=-32.79, reference="Schm10") + with db.session as session: + session.add(s) + # session.add_all([s]) # if adding a list of entries + session.commit() + + assert db.query(db.Sources).filter(db.Sources.c.source == "V4046 Sgr").count() == 1 + + # Remove added source so other tests don't include it + with db.session as session: + session.delete(s) + session.commit() + + assert db.query(db.Sources).filter(db.Sources.c.source == "V4046 Sgr").count() == 0 + + # Adding a source with problematic ra/dec to test validation + with pytest.raises(ValueError): + s2 = Sources(source="V4046 Sgr", ra=9999, dec=-32.79, reference="Schm10") + with pytest.raises(ValueError): + s2 = Sources(source="V4046 Sgr", ra=273.54, dec=-9999, reference="Schm10") + def test_add_table_data(db): # Test the add_table_data method diff --git a/docs/index.rst b/docs/index.rst index 82aed15..a00ac4e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -417,6 +417,58 @@ We recommend the later to output the entire contents to disk:: .. note:: To properly capture database deletes, the contents of the specified directory is first cleared before creating JSON files representing the current state of the database. +Using the SQLAlchemy ORM +======================== + +The SQLAlchemy ORM (Object Relational Mapping) can be used for many of the examples provided above. +This also allows for adding extra functionality to your schema, such as validation. + +For example, the schema of your sources table could be written to validate RA/Dec as follows:: + + from sqlalchemy import Column, Float, String + from sqlalchemy.orm import validates + + class Sources(Base): + """ORM for the sources table. This stores the main identifiers for our objects along with ra and dec""" + + __tablename__ = "Sources" + source = Column(String(100), primary_key=True, nullable=False) + ra = Column(Float) + dec = Column(Float) + + @validates("ra") + def validate_ra(self, key, value): + if value > 360 or value < 0: + raise ValueError("RA not in allowed range (0..360)") + return value + + @validates("dec") + def validate_dec(self, key, value): + if value > 90 or value < -90: + raise ValueError("Dec not in allowed range (-90..90)") + return value + +In your scripts, you can then create objects and populate them accordingly. +For example:: + + from astrodbkit2.astrodb import Database + from schema import Sources + + db = Database(connection_string) + + # Create a new object and insert to database + s = Sources(source="V4046 Sgr", ra=273.54, dec=-32.79) + with db.session as session: + session.add(s) + session.commit() + +If the RA or Dec fail the validation checks, the creation of the object will raise a ValueError exception. + +One can also use `session.add_all()` to insert a list of table entries and `session.delete()` to delete a single one. +These options can facilitate the creation of robust ingest scripts that are both intuitive (ie, instantiating objects in a pythonic way) +and that can take care of validating input values before they get to the database. + + Reference/API ============= diff --git a/setup.cfg b/setup.cfg index dc31cc0..1b885cd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,10 +40,6 @@ astrodbkit2 = data/* [tool:pytest] testpaths = "astrodbkit2" -astropy_header = true -doctest_plus = enabled -text_file_format = rst -; addopts = --doctest-rst [coverage:run] omit =