rickybassom · rickybassom · Feb 13, 2022 · Feb 13, 2022 · Feb 13, 2022 · Feb 13, 2022
diff --git a/.flake8 b/.flake8
@@ -1,7 +1,7 @@
 [flake8]
 select = B,B9,C,D,DAR,E,F,N,RST,S,W
 ignore = E203,E501,RST201,RST203,RST301,W503,D205,D212,D200,D415
-max-line-length = 80
+max-line-length = 88
 max-complexity = 10
 docstring-convention = google
 per-file-ignores = tests/*:S101

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -24,7 +24,7 @@ jobs:
           - { python: "3.7", os: "ubuntu-latest", session: "tests" }
           - { python: "3.10", os: "windows-latest", session: "tests" }
           - { python: "3.10", os: "macos-latest", session: "tests" }
-          - { python: "3.10", os: "ubuntu-latest", session: "typeguard" }
+          #          - { python: "3.10", os: "ubuntu-latest", session: "typeguard" }
           - { python: "3.10", os: "ubuntu-latest", session: "xdoctest" }
           - { python: "3.10", os: "ubuntu-latest", session: "docs-build" }
 

diff --git a/noxfile.py b/noxfile.py
@@ -28,7 +28,6 @@
     "safety",
     "mypy",
     "tests",
-    "typeguard",
     "xdoctest",
     "docs-build",
 )
@@ -154,12 +153,12 @@ def coverage(session: Session) -> None:
     session.run("coverage", *args)
 
 
-@session(python=python_versions)
-def typeguard(session: Session) -> None:
-    """Runtime type checking using Typeguard."""
-    session.install(".")
-    session.install("pytest", "typeguard", "pygments")
-    session.run("pytest", f"--typeguard-packages={package}", *session.posargs)
+# @session(python=python_versions)
+# def typeguard(session: Session) -> None:
+#     """Runtime type checking using Typeguard."""
+#     session.install(".")
+#     session.install("pytest", "typeguard", "pygments")
+#     session.run("pytest", f"--typeguard-packages={package}", *session.posargs)
 
 
 @session(python=python_versions)

diff --git a/src/gmn_python_api/gmn_data_directory.py b/src/gmn_python_api/gmn_data_directory.py
@@ -1,6 +1,6 @@
 """
-This module contains functions to read trajectory summary files from the GMN
-data directory.
+This module contains functions to read trajectory summary files from the GMN data
+directory.
 """
 from datetime import datetime
 from typing import List
@@ -37,8 +37,8 @@ def get_all_daily_file_urls() -> List[str]:
     """
     Get all daily trajectory summary file urls from the GMN data directory.
     :return: (List[str]) A list of all daily file urls.
-    :raises: (requests.HTTPError) If the data directory url doesn't return a
-    200 response.
+    :raises: (requests.HTTPError) If the data directory url doesn't return a 200
+    response.
     """
     return _get_url_paths(BASE_URL + DAILY_DIRECTORY, SUMMARY_FILE_EXTENSION)
 
@@ -47,8 +47,8 @@ def get_all_monthly_file_urls() -> List[str]:
     """
     Get all monthly trajectory summary file urls from the GMN data directory.
     :return: (List[str]) A list of all monthly file urls.
-    :raises: (requests.HTTPError) If the data directory url doesn't return a
-    200 response.
+    :raises: (requests.HTTPError) If the data directory url doesn't return a 200
+    response.
     """
     return _get_url_paths(BASE_URL + MONTHLY_DIRECTORY, SUMMARY_FILE_EXTENSION)
 
@@ -62,8 +62,8 @@ def get_daily_file_url_by_date(
     :param current_date: (Optional datetime) The current date. Defaults to
     datetime.now().
     :return: (str) The URL of the daily file.
-    :raises: (requests.HTTPError) If the data directory url doesn't return a
-    200 response.
+    :raises: (requests.HTTPError) If the data directory url doesn't return a 200
+    response.
     """
     if not current_date:
         current_date = datetime.today()
@@ -86,8 +86,8 @@ def get_monthly_file_url_by_month(date: datetime) -> str:
     Get the URL of the monthly trajectory summary file for a given month.
     :param date: (datetime) The date of the monthly file.
     :return: (str) The URL of the monthly file.
-    :raises: (requests.HTTPError) If the data directory url doesn't return a
-    200 response.
+    :raises: (requests.HTTPError) If the data directory url doesn't return a 200
+    response.
     """
     all_monthly_filenames = get_all_monthly_file_urls()
     files_containing_date = [
@@ -105,8 +105,8 @@ def get_daily_file_content_by_date(
     :param current_date: (Optional datetime) The current date. Defaults to
     datetime.now().
     :return: (str) The content of the daily file.
-    :raises: (requests.HTTPError) If the data directory url doesn't return a
-    200 response.
+    :raises: (requests.HTTPError) If the data directory url doesn't return a 200
+    response.
     """
     file_url = get_daily_file_url_by_date(date, current_date)
 
@@ -123,8 +123,8 @@ def get_monthly_file_content_by_date(date: datetime) -> str:
     Get the content of the monthly trajectory summary file for a given date.
     :param date: (datetime) The date to get the monthly file for.
     :return: (str) The content of the monthly file.
-    :raises: (requests.HTTPError) If the data directory url doesn't return a
-    200 response.
+    :raises: (requests.HTTPError) If the data directory url doesn't return a 200
+    response.
     """
     file_url = get_monthly_file_url_by_month(date)
 

diff --git a/src/gmn_python_api/gmn_trajectory_summary_reader.py b/src/gmn_python_api/gmn_trajectory_summary_reader.py
@@ -0,0 +1,93 @@
+"""
+This module contains functions to load trajectory summary data into Pandas DataFrames
+and numpy arrays.
+"""
+import os.path
+from io import StringIO
+from typing import Any
+
+import numpy.typing as npt
+import pandas as pd  # type: ignore
+from pandas._typing import FilePathOrBuffer  # type: ignore
+
+DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S"
+
+
+def read_trajectory_summary_as_dataframe(
+    filepath_or_buffer: FilePathOrBuffer,
+) -> pd.DataFrame:
+    """
+    Reads a trajectory summary file into a Pandas DataFrame.
+    :param filepath_or_buffer: (FilePathOrBuffer) Path or buffer for a trajectory
+    summary file.
+    :return: (DataFrame) Pandas DataFrame of the trajectory summary file.
+    """
+    if not os.path.isfile(filepath_or_buffer):
+        filepath_or_buffer = StringIO(filepath_or_buffer)
+
+    trajectory_df = pd.read_csv(
+        filepath_or_buffer,
+        engine="python",
+        sep=r"\s*;\s*",
+        skiprows=[0, 5, 6],
+        header=[0, 1],
+        na_values=["nan", "...", "None"],
+    )
+    # Clean header text
+    trajectory_df.columns = trajectory_df.columns.map(
+        lambda h: f"{_clean_header(h[0])}{_clean_header(h[1], is_unit=True)}"
+    )
+
+    # Set data types
+    trajectory_df["Beginning (UTC Time)"] = pd.to_datetime(
+        trajectory_df["Beginning (UTC Time)"], format=DATETIME_FORMAT
+    )
+    trajectory_df["IAU (code)"] = trajectory_df["IAU (code)"].astype("string")
+    trajectory_df["Participating (stations)"] = trajectory_df[
+        "Participating (stations)"
+    ].astype("string")
+
+    trajectory_df["Beg in (FOV)"] = trajectory_df["Beg in (FOV)"].map(
+        {"True": True, "False": False}
+    )
+    trajectory_df["Beg in (FOV)"] = trajectory_df["Beg in (FOV)"].astype("bool")
+    trajectory_df["End in (FOV)"] = trajectory_df["End in (FOV)"].map(
+        {"True": True, "False": False}
+    )
+    trajectory_df["End in (FOV)"] = trajectory_df["End in (FOV)"].astype("bool")
+
+    return trajectory_df
+
+
+def read_trajectory_summary_as_numpy_array(
+    filepath_or_buffer: FilePathOrBuffer,
+) -> npt.NDArray[Any]:
+    """
+    Reads a trajectory summary file into a numpy array.
+    :param filepath_or_buffer: (FilePathOrBuffer) Path or buffer for a trajectory
+    summary file.
+    :return: (ndarray) Numpy array of the trajectory summary file.
+    """
+    data_frame = read_trajectory_summary_as_dataframe(filepath_or_buffer)
+    # In the future use to_records() to convert to a numpy record array
+    # https://github.com/pandas-dev/pandas/issues/41935
+    return data_frame.to_numpy()  # type: ignore
+
+
+def _clean_header(text: str, is_unit: bool = False) -> str:
+    """
+    Extract header text from each raw csv file header.
+    :param text: (str) Raw csv header
+    :param is_unit: (optional bool) return text with brackets for units
+    :returns: (str) Formatted text
+    """
+    # Return an empty string if there is no header found
+    if "Unnamed" in text:
+        return ""
+
+    # Removes additional spaces and hashtags from text. Add brackets optionally.
+    clean_header = " ".join(text.replace("#", "").split())
+    if is_unit:
+        clean_header = f" ({clean_header})"
+
+    return clean_header