Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[flake8]
select = B,B9,C,D,DAR,E,F,N,RST,S,W
ignore = E203,E501,RST201,RST203,RST301,W503,D205,D212,D200,D415
max-line-length = 80
max-line-length = 88
max-complexity = 10
docstring-convention = google
per-file-ignores = tests/*:S101
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
- { python: "3.7", os: "ubuntu-latest", session: "tests" }
- { python: "3.10", os: "windows-latest", session: "tests" }
- { python: "3.10", os: "macos-latest", session: "tests" }
- { python: "3.10", os: "ubuntu-latest", session: "typeguard" }
# - { python: "3.10", os: "ubuntu-latest", session: "typeguard" }
- { python: "3.10", os: "ubuntu-latest", session: "xdoctest" }
- { python: "3.10", os: "ubuntu-latest", session: "docs-build" }

Expand Down
13 changes: 6 additions & 7 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
"safety",
"mypy",
"tests",
"typeguard",
"xdoctest",
"docs-build",
)
Expand Down Expand Up @@ -154,12 +153,12 @@ def coverage(session: Session) -> None:
session.run("coverage", *args)


@session(python=python_versions)
def typeguard(session: Session) -> None:
"""Runtime type checking using Typeguard."""
session.install(".")
session.install("pytest", "typeguard", "pygments")
session.run("pytest", f"--typeguard-packages={package}", *session.posargs)
# @session(python=python_versions)
# def typeguard(session: Session) -> None:
# """Runtime type checking using Typeguard."""
# session.install(".")
# session.install("pytest", "typeguard", "pygments")
# session.run("pytest", f"--typeguard-packages={package}", *session.posargs)


@session(python=python_versions)
Expand Down
28 changes: 14 additions & 14 deletions src/gmn_python_api/gmn_data_directory.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
This module contains functions to read trajectory summary files from the GMN
data directory.
This module contains functions to read trajectory summary files from the GMN data
directory.
"""
from datetime import datetime
from typing import List
Expand Down Expand Up @@ -37,8 +37,8 @@ def get_all_daily_file_urls() -> List[str]:
"""
Get all daily trajectory summary file urls from the GMN data directory.
:return: (List[str]) A list of all daily file urls.
:raises: (requests.HTTPError) If the data directory url doesn't return a
200 response.
:raises: (requests.HTTPError) If the data directory url doesn't return a 200
response.
"""
return _get_url_paths(BASE_URL + DAILY_DIRECTORY, SUMMARY_FILE_EXTENSION)

Expand All @@ -47,8 +47,8 @@ def get_all_monthly_file_urls() -> List[str]:
"""
Get all monthly trajectory summary file urls from the GMN data directory.
:return: (List[str]) A list of all monthly file urls.
:raises: (requests.HTTPError) If the data directory url doesn't return a
200 response.
:raises: (requests.HTTPError) If the data directory url doesn't return a 200
response.
"""
return _get_url_paths(BASE_URL + MONTHLY_DIRECTORY, SUMMARY_FILE_EXTENSION)

Expand All @@ -62,8 +62,8 @@ def get_daily_file_url_by_date(
:param current_date: (Optional datetime) The current date. Defaults to
datetime.now().
:return: (str) The URL of the daily file.
:raises: (requests.HTTPError) If the data directory url doesn't return a
200 response.
:raises: (requests.HTTPError) If the data directory url doesn't return a 200
response.
"""
if not current_date:
current_date = datetime.today()
Expand All @@ -86,8 +86,8 @@ def get_monthly_file_url_by_month(date: datetime) -> str:
Get the URL of the monthly trajectory summary file for a given month.
:param date: (datetime) The date of the monthly file.
:return: (str) The URL of the monthly file.
:raises: (requests.HTTPError) If the data directory url doesn't return a
200 response.
:raises: (requests.HTTPError) If the data directory url doesn't return a 200
response.
"""
all_monthly_filenames = get_all_monthly_file_urls()
files_containing_date = [
Expand All @@ -105,8 +105,8 @@ def get_daily_file_content_by_date(
:param current_date: (Optional datetime) The current date. Defaults to
datetime.now().
:return: (str) The content of the daily file.
:raises: (requests.HTTPError) If the data directory url doesn't return a
200 response.
:raises: (requests.HTTPError) If the data directory url doesn't return a 200
response.
"""
file_url = get_daily_file_url_by_date(date, current_date)

Expand All @@ -123,8 +123,8 @@ def get_monthly_file_content_by_date(date: datetime) -> str:
Get the content of the monthly trajectory summary file for a given date.
:param date: (datetime) The date to get the monthly file for.
:return: (str) The content of the monthly file.
:raises: (requests.HTTPError) If the data directory url doesn't return a
200 response.
:raises: (requests.HTTPError) If the data directory url doesn't return a 200
response.
"""
file_url = get_monthly_file_url_by_month(date)

Expand Down
93 changes: 93 additions & 0 deletions src/gmn_python_api/gmn_trajectory_summary_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""
This module contains functions to load trajectory summary data into Pandas DataFrames
and numpy arrays.
"""
import os.path
from io import StringIO
from typing import Any

import numpy.typing as npt
import pandas as pd # type: ignore
from pandas._typing import FilePathOrBuffer # type: ignore

DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"


def read_trajectory_summary_as_dataframe(
filepath_or_buffer: FilePathOrBuffer,
) -> pd.DataFrame:
"""
Reads a trajectory summary file into a Pandas DataFrame.
:param filepath_or_buffer: (FilePathOrBuffer) Path or buffer for a trajectory
summary file.
:return: (DataFrame) Pandas DataFrame of the trajectory summary file.
"""
if not os.path.isfile(filepath_or_buffer):
filepath_or_buffer = StringIO(filepath_or_buffer)

trajectory_df = pd.read_csv(
filepath_or_buffer,
engine="python",
sep=r"\s*;\s*",
skiprows=[0, 5, 6],
header=[0, 1],
na_values=["nan", "...", "None"],
)
# Clean header text
trajectory_df.columns = trajectory_df.columns.map(
lambda h: f"{_clean_header(h[0])}{_clean_header(h[1], is_unit=True)}"
)

# Set data types
trajectory_df["Beginning (UTC Time)"] = pd.to_datetime(
trajectory_df["Beginning (UTC Time)"], format=DATETIME_FORMAT
)
trajectory_df["IAU (code)"] = trajectory_df["IAU (code)"].astype("string")
trajectory_df["Participating (stations)"] = trajectory_df[
"Participating (stations)"
].astype("string")

trajectory_df["Beg in (FOV)"] = trajectory_df["Beg in (FOV)"].map(
{"True": True, "False": False}
)
trajectory_df["Beg in (FOV)"] = trajectory_df["Beg in (FOV)"].astype("bool")
trajectory_df["End in (FOV)"] = trajectory_df["End in (FOV)"].map(
{"True": True, "False": False}
)
trajectory_df["End in (FOV)"] = trajectory_df["End in (FOV)"].astype("bool")

return trajectory_df


def read_trajectory_summary_as_numpy_array(
filepath_or_buffer: FilePathOrBuffer,
) -> npt.NDArray[Any]:
"""
Reads a trajectory summary file into a numpy array.
:param filepath_or_buffer: (FilePathOrBuffer) Path or buffer for a trajectory
summary file.
:return: (ndarray) Numpy array of the trajectory summary file.
"""
data_frame = read_trajectory_summary_as_dataframe(filepath_or_buffer)
# In the future use to_records() to convert to a numpy record array
# https://github.com/pandas-dev/pandas/issues/41935
return data_frame.to_numpy() # type: ignore


def _clean_header(text: str, is_unit: bool = False) -> str:
"""
Extract header text from each raw csv file header.
:param text: (str) Raw csv header
:param is_unit: (optional bool) return text with brackets for units
:returns: (str) Formatted text
"""
# Return an empty string if there is no header found
if "Unnamed" in text:
return ""

# Removes additional spaces and hashtags from text. Add brackets optionally.
clean_header = " ".join(text.replace("#", "").split())
if is_unit:
clean_header = f" ({clean_header})"

return clean_header
Loading