-
Notifications
You must be signed in to change notification settings - Fork 555
MIMIC3 Initialize Test Case for Tests/Core #527
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,85 @@ | ||||||||||||||||
| import unittest | ||||||||||||||||
| import tempfile | ||||||||||||||||
| import shutil | ||||||||||||||||
| import subprocess | ||||||||||||||||
| import os | ||||||||||||||||
| from pathlib import Path | ||||||||||||||||
|
|
||||||||||||||||
| from pyhealth.datasets import MIMIC3Dataset | ||||||||||||||||
|
|
||||||||||||||||
|
|
||||||||||||||||
| class TestMIMIC3Demo(unittest.TestCase): | ||||||||||||||||
| """Test MIMIC3 dataset with demo data downloaded from PhysioNet.""" | ||||||||||||||||
|
|
||||||||||||||||
| def setUp(self): | ||||||||||||||||
| """Download and set up demo dataset for each test.""" | ||||||||||||||||
| self.temp_dir = tempfile.mkdtemp() | ||||||||||||||||
| self._download_demo_dataset() | ||||||||||||||||
| self._load_dataset() | ||||||||||||||||
|
|
||||||||||||||||
| def tearDown(self): | ||||||||||||||||
| """Clean up downloaded dataset after each test.""" | ||||||||||||||||
| if self.temp_dir and os.path.exists(self.temp_dir): | ||||||||||||||||
| shutil.rmtree(self.temp_dir) | ||||||||||||||||
|
|
||||||||||||||||
| def _download_demo_dataset(self): | ||||||||||||||||
| """Download MIMIC-III demo dataset using wget.""" | ||||||||||||||||
| download_url = "https://physionet.org/files/mimiciii-demo/1.4/" | ||||||||||||||||
|
|
||||||||||||||||
| # Use wget to download the demo dataset recursively | ||||||||||||||||
| cmd = [ | ||||||||||||||||
| "wget", | ||||||||||||||||
| "-r", | ||||||||||||||||
| "-N", | ||||||||||||||||
| "-c", | ||||||||||||||||
| "-np", | ||||||||||||||||
| "--directory-prefix", | ||||||||||||||||
| self.temp_dir, | ||||||||||||||||
| download_url, | ||||||||||||||||
| ] | ||||||||||||||||
|
|
||||||||||||||||
| try: | ||||||||||||||||
| subprocess.run(cmd, check=True, capture_output=True, text=True) | ||||||||||||||||
| except subprocess.CalledProcessError as e: | ||||||||||||||||
| raise unittest.SkipTest(f"Failed to download MIMIC-III demo dataset: {e}") | ||||||||||||||||
| except FileNotFoundError: | ||||||||||||||||
| raise unittest.SkipTest("wget not available - skipping download test") | ||||||||||||||||
|
|
||||||||||||||||
| # Find the downloaded dataset path | ||||||||||||||||
| physionet_dir = ( | ||||||||||||||||
| Path(self.temp_dir) / "physionet.org" / "files" / "mimiciii-demo" / "1.4" | ||||||||||||||||
| ) | ||||||||||||||||
| if physionet_dir.exists(): | ||||||||||||||||
| self.demo_dataset_path = str(physionet_dir) | ||||||||||||||||
| else: | ||||||||||||||||
| raise unittest.SkipTest("Downloaded dataset not found in expected location") | ||||||||||||||||
|
|
||||||||||||||||
| def _load_dataset(self): | ||||||||||||||||
| """Load the dataset for testing.""" | ||||||||||||||||
| tables = ["diagnoses_icd", "procedures_icd", "prescriptions", "noteevents"] | ||||||||||||||||
|
||||||||||||||||
| tables = ["diagnoses_icd", "procedures_icd", "prescriptions", "noteevents"] | |
| tables = [ | |
| cls.TABLE_DIAGNOSES_ICD, | |
| cls.TABLE_PROCEDURES_ICD, | |
| cls.TABLE_PRESCRIPTIONS, | |
| cls.TABLE_NOTEEVENTS, | |
| ] |
Copilot
AI
Jul 27, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The patient ID "10006" is a magic number. Consider defining it as a class constant (e.g., TEST_PATIENT_ID = "10006") to improve maintainability and make it clear why this specific patient ID is used.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The download URL is hardcoded. Consider defining it as a class constant to make it easier to update if the URL changes and to improve maintainability.