diff --git a/README.md b/README.md index 0d4890c..66405bb 100644 --- a/README.md +++ b/README.md @@ -232,3 +232,32 @@ Add multiple metric data points to the database. ## `get_metrics(?start_at, ?end_at)` Return a list of metrics that had at least one transition in the given [`start_at`, `end_at`) interval. All metrics are returned if no `start_at` and `end_at` are given. + +## `get_modeling_metrics(?start_at, ?end_at)` + +Fetch modeling metrics from the modeling database. This method returns a list of available modeling metrics that had transitions in the specified time range. + +- `start_at`: Optional start date for the query range (uses class default if not specified) +- `end_at`: Optional end date for the query range (uses class default if not specified) + +Returns a list of `Metric` objects containing modeling metrics. + +## `get_modeling_metrics_data(?names, ?start_at, ?end_at)` + +Fetch actual data values from modeling metrics within a time range. This method returns the actual data points (values) for the specified modeling metrics, similar to `get_metric_data()` but for the modeling database. + +- `names`: Optional list of modeling metric names to filter by +- `start_at`: Optional start time for the query (uses class default if not specified) +- `end_at`: Optional end time for the query (uses class default if not specified) + +Returns a list of `MetricDataPoint` objects containing the actual data values. + +## `get_modeling_metrics_data_arrow(?names, ?start_at, ?end_at)` + +Fetch actual data values from modeling metrics within a time range in Apache Arrow format. This method returns the actual data points (values) for the specified modeling metrics in Arrow IPC format, which is more efficient for large datasets. + +- `names`: Optional list of modeling metric names to filter by +- `start_at`: Optional start time for the query (uses class default if not specified) +- `end_at`: Optional end time for the query (uses class default if not specified) + +Returns Arrow IPC format data that can be read using `pyarrow.ipc.open_file()`. diff --git a/examples/get_modeling_metrics_data_arrow_example.py b/examples/get_modeling_metrics_data_arrow_example.py new file mode 100644 index 0000000..6af3c43 --- /dev/null +++ b/examples/get_modeling_metrics_data_arrow_example.py @@ -0,0 +1,26 @@ +from cvec import CVec +import io +import pyarrow.ipc as ipc # type: ignore[import-untyped] +import os + + +def main() -> None: + cvec = CVec( + host=os.environ.get("CVEC_HOST", "https://your-subdomain.cvector.dev"), + api_key=os.environ.get("CVEC_API_KEY", "your-api-key"), + ) + test_metric_name = "Data_Marketplace/PROD/Miso_5min/ILLINOIS_HUB_lmp" + print("\nGetting modeling metrics data as Arrow...") + arrow_data = cvec.get_modeling_metrics_data_arrow(names=[test_metric_name]) + reader = ipc.open_file(io.BytesIO(arrow_data)) + table = reader.read_all() + print(f"Arrow table shape: {len(table)} rows") + print("\nFirst few rows:") + for i in range(min(5, len(table))): + print( + f"- {table['name'][i].as_py()}: {table['value_double'][i].as_py() or table['value_string'][i].as_py()} at {table['time'][i].as_py()}" + ) + + +if __name__ == "__main__": + main() diff --git a/examples/get_modeling_metrics_data_example.py b/examples/get_modeling_metrics_data_example.py new file mode 100644 index 0000000..36ba526 --- /dev/null +++ b/examples/get_modeling_metrics_data_example.py @@ -0,0 +1,33 @@ +from cvec import CVec +import os +from datetime import datetime, timedelta + + +def main() -> None: + cvec = CVec( + host=os.environ.get("CVEC_HOST", "https://your-subdomain.cvector.dev"), + api_key=os.environ.get("CVEC_API_KEY", "your-api-key"), + ) + end_date = datetime.now() + start_date = end_date - timedelta(hours=3) + print("\nGetting modeling metrics data...") + modeling_data = cvec.get_modeling_metrics_data( + names=["Data_Marketplace/PROD/Miso_5min/ILLINOIS_HUB_lmp"], + start_at=start_date, + end_at=end_date, + ) + print(f"Found {len(modeling_data)} data points") + + if modeling_data: + print("\nFirst few data points:") + for i, point in enumerate(modeling_data[:5]): + print( + f"- {point.name}: {point.value_double or point.value_string} at {point.time}" + ) + + if len(modeling_data) > 5: + print(f"... and {len(modeling_data) - 5} more data points") + + +if __name__ == "__main__": + main() diff --git a/examples/get_modeling_metrics_example.py b/examples/get_modeling_metrics_example.py new file mode 100644 index 0000000..711e7fe --- /dev/null +++ b/examples/get_modeling_metrics_example.py @@ -0,0 +1,18 @@ +from cvec import CVec +import os + + +def main() -> None: + cvec = CVec( + host=os.environ.get("CVEC_HOST", "https://your-subdomain.cvector.dev"), + api_key=os.environ.get("CVEC_API_KEY", "your-api-key"), + ) + print("\nGetting available modeling metrics...") + modeling_metrics = cvec.get_modeling_metrics() + print(f"Found {len(modeling_metrics)} modeling metrics") + for metric in modeling_metrics: + print(f"- {metric.name}") + + +if __name__ == "__main__": + main() diff --git a/src/cvec/cvec.py b/src/cvec/cvec.py index 5590515..ec37f92 100644 --- a/src/cvec/cvec.py +++ b/src/cvec/cvec.py @@ -305,6 +305,97 @@ def add_metric_data( ] self._make_request("POST", endpoint, json=data_dicts) # type: ignore[arg-type] + def get_modeling_metrics( + self, + start_at: Optional[datetime] = None, + end_at: Optional[datetime] = None, + ) -> List[Metric]: + """ + Return a list of modeling metrics that had at least one transition in the given [start_at, end_at) interval. + All metrics are returned if no start_at and end_at are given. + + Args: + start_at: Optional start time for the query (uses class default if not specified) + end_at: Optional end time for the query (uses class default if not specified) + + Returns: + List of Metric objects containing modeling metrics + """ + _start_at = start_at or self.default_start_at + _end_at = end_at or self.default_end_at + + params: Dict[str, Any] = { + "start_at": _start_at.isoformat() if _start_at else None, + "end_at": _end_at.isoformat() if _end_at else None, + } + + response_data = self._make_request( + "GET", "/api/modeling/metrics", params=params + ) + return [Metric.model_validate(metric_data) for metric_data in response_data] + + def get_modeling_metrics_data( + self, + names: Optional[List[str]] = None, + start_at: Optional[datetime] = None, + end_at: Optional[datetime] = None, + ) -> List[MetricDataPoint]: + """ + Return all data-points within a given [start_at, end_at) interval, + optionally selecting a given list of modeling metric names. + Returns a list of MetricDataPoint objects, one for each metric value transition. + + Args: + names: Optional list of modeling metric names to filter by + start_at: Optional start time for the query + end_at: Optional end time for the query + """ + _start_at = start_at or self.default_start_at + _end_at = end_at or self.default_end_at + + params: Dict[str, Any] = { + "start_at": _start_at.isoformat() if _start_at else None, + "end_at": _end_at.isoformat() if _end_at else None, + "names": ",".join(names) if names else None, + } + + response_data = self._make_request( + "GET", "/api/modeling/metrics/data", params=params + ) + return [ + MetricDataPoint.model_validate(point_data) for point_data in response_data + ] + + def get_modeling_metrics_data_arrow( + self, + names: Optional[List[str]] = None, + start_at: Optional[datetime] = None, + end_at: Optional[datetime] = None, + ) -> bytes: + """ + Return all data-points within a given [start_at, end_at) interval, + optionally selecting a given list of modeling metric names. + Returns Arrow IPC format data that can be read using pyarrow.ipc.open_file. + + Args: + names: Optional list of modeling metric names to filter by + start_at: Optional start time for the query + end_at: Optional end time for the query + """ + _start_at = start_at or self.default_start_at + _end_at = end_at or self.default_end_at + + params: Dict[str, Any] = { + "start_at": _start_at.isoformat() if _start_at else None, + "end_at": _end_at.isoformat() if _end_at else None, + "names": ",".join(names) if names else None, + } + + endpoint = "/api/modeling/metrics/data/arrow" + result = self._make_request("GET", endpoint, params=params) + assert isinstance(result, bytes) + return result + def _login_with_supabase(self, email: str, password: str) -> None: """ Login to Supabase and get access/refresh tokens. diff --git a/src/cvec/models/__init__.py b/src/cvec/models/__init__.py index d986a8d..e3172fd 100644 --- a/src/cvec/models/__init__.py +++ b/src/cvec/models/__init__.py @@ -1,4 +1,8 @@ from .metric import Metric, MetricDataPoint from .span import Span -__all__ = ["Metric", "MetricDataPoint", "Span"] +__all__ = [ + "Metric", + "MetricDataPoint", + "Span", +] diff --git a/tests/test_modeling.py b/tests/test_modeling.py new file mode 100644 index 0000000..8eea9bd --- /dev/null +++ b/tests/test_modeling.py @@ -0,0 +1,166 @@ +""" +Tests for the modeling functionality in the CVec client. +""" + +import pytest +from datetime import datetime +from unittest.mock import Mock, patch + + +from cvec.cvec import CVec + + +class TestModelingMethods: + """Test the modeling methods in the CVec class.""" + + @patch("cvec.cvec.CVec._fetch_publishable_key") + @patch("cvec.cvec.CVec._login_with_supabase") + @patch("cvec.cvec.CVec._make_request") + def test_get_modeling_metrics( + self, mock_make_request: Mock, mock_login: Mock, mock_fetch_key: Mock + ) -> None: + """Test get_modeling_metrics method.""" + # Mock the publishable key fetch + mock_fetch_key.return_value = "test_publishable_key" + + # Mock the login method + mock_login.return_value = None + + # Mock the response + mock_response = [ + { + "id": 1, + "name": "test_metric", + "birth_at": "2024-01-01T12:00:00", + "death_at": None, + } + ] + mock_make_request.return_value = mock_response + + # Create CVec instance + cvec = CVec( + host="http://test.com", api_key="cva_test12345678901234567890123456789012" + ) + + # Call the method + start_date = datetime(2024, 1, 1, 12, 0, 0) + end_date = datetime(2024, 1, 1, 13, 0, 0) + result = cvec.get_modeling_metrics( + start_at=start_date, + end_at=end_date, + ) + + # Verify the result + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].id == 1 + assert result[0].name == "test_metric" + + # Verify the request was made correctly + mock_make_request.assert_called_once() + call_args = mock_make_request.call_args + assert call_args[0][0] == "GET" + assert call_args[0][1] == "/api/modeling/metrics" + assert call_args[1]["params"]["start_at"] == "2024-01-01T12:00:00" + assert call_args[1]["params"]["end_at"] == "2024-01-01T13:00:00" + + @patch("cvec.cvec.CVec._fetch_publishable_key") + @patch("cvec.cvec.CVec._login_with_supabase") + @patch("cvec.cvec.CVec._make_request") + def test_get_modeling_metrics_data( + self, mock_make_request: Mock, mock_login: Mock, mock_fetch_key: Mock + ) -> None: + """Test get_modeling_metrics_data method.""" + # Mock the publishable key fetch + mock_fetch_key.return_value = "test_publishable_key" + + # Mock the login method + mock_login.return_value = None + + # Mock the response + mock_response = [ + { + "name": "test_metric", + "time": "2024-01-01T12:00:00", + "value_double": 42.5, + "value_string": None, + } + ] + mock_make_request.return_value = mock_response + + # Create CVec instance + cvec = CVec( + host="http://test.com", api_key="cva_test12345678901234567890123456789012" + ) + + # Call the method + start_date = datetime(2024, 1, 1, 12, 0, 0) + end_date = datetime(2024, 1, 1, 13, 0, 0) + result = cvec.get_modeling_metrics_data( + names=["test_metric"], + start_at=start_date, + end_at=end_date, + ) + + # Verify the result + assert isinstance(result, list) + assert len(result) == 1 + assert result[0].name == "test_metric" + assert result[0].value_double == 42.5 + + # Verify the request was made correctly + mock_make_request.assert_called_once() + call_args = mock_make_request.call_args + assert call_args[0][0] == "GET" + assert call_args[0][1] == "/api/modeling/metrics/data" + assert call_args[1]["params"]["names"] == "test_metric" + assert call_args[1]["params"]["start_at"] == "2024-01-01T12:00:00" + assert call_args[1]["params"]["end_at"] == "2024-01-01T13:00:00" + + @patch("cvec.cvec.CVec._fetch_publishable_key") + @patch("cvec.cvec.CVec._login_with_supabase") + @patch("cvec.cvec.CVec._make_request") + def test_get_modeling_metrics_data_arrow( + self, mock_make_request: Mock, mock_login: Mock, mock_fetch_key: Mock + ) -> None: + """Test get_modeling_metrics_data_arrow method.""" + # Mock the publishable key fetch + mock_fetch_key.return_value = "test_publishable_key" + + # Mock the login method + mock_login.return_value = None + + # Mock the response (Arrow data as bytes) + mock_response = b"fake_arrow_data" + mock_make_request.return_value = mock_response + + # Create CVec instance + cvec = CVec( + host="http://test.com", api_key="cva_test12345678901234567890123456789012" + ) + + # Call the method + start_date = datetime(2024, 1, 1, 12, 0, 0) + end_date = datetime(2024, 1, 1, 13, 0, 0) + result = cvec.get_modeling_metrics_data_arrow( + names=["test_metric"], + start_at=start_date, + end_at=end_date, + ) + + # Verify the result + assert isinstance(result, bytes) + assert result == b"fake_arrow_data" + + # Verify the request was made correctly + mock_make_request.assert_called_once() + call_args = mock_make_request.call_args + assert call_args[0][0] == "GET" + assert call_args[0][1] == "/api/modeling/metrics/data/arrow" + assert call_args[1]["params"]["names"] == "test_metric" + assert call_args[1]["params"]["start_at"] == "2024-01-01T12:00:00" + assert call_args[1]["params"]["end_at"] == "2024-01-01T13:00:00" + + +if __name__ == "__main__": + pytest.main([__file__])