diff --git a/0.download_data/README.md b/0.download_data/README.md
index bece3893..bc593ace 100644
--- a/0.download_data/README.md
+++ b/0.download_data/README.md
@@ -14,6 +14,34 @@ The version of mitocheck_data used is specified by the hash corresponding to a c
 The current hash being used is `e1f86cd007657f8247310b78df92891b22e51621` which corresponds to [mitocheck_data/e1f86cd](https://github.com/WayScience/mitocheck_data/tree/e1f86cd007657f8247310b78df92891b22e51621).
 The `hash` variable can be set in [download_data.ipynb](download_data.ipynb) to change which version of mitocheck_data is being accessed.
 
+### Data Preview
+
+The labeled dataset includes CellProfiler (CP) and DeepProfiler (DP) features as well as metadata (location, perturbation, etc) for cells from the original MitoCheck project.
+The breakdown of cell counts by phenotypic class (as labeled manually by MitoCheck) is as follows:
+
+| Phenotypic Class    | Cell Count |
+|---------------------|-------|
+| Interphase          | 420   |
+| Polylobed           | 367   |
+| Prometaphase        | 345   |
+| OutOfFocus          | 304   |
+| Apoptosis           | 273   |
+| Binuclear           | 184   |
+| MetaphaseAlignment  | 175   |
+| SmallIrregular      | 164   |
+| Hole                | 114   |
+| Elongated           | 110   |
+| ADCCM               | 95    |
+| Anaphase            | 84    |
+| Large               | 79    |
+| Grape               | 74    |
+| Metaphase           | 74    |
+| Folded              | 54    |
+
+**Note**: The `get_features_data()` function (defined in [split_utils.py](../utils/split_utils.py)) used to load the labeled cell dataset excludes cells from the `Folded` phenotypic class when loading the labeled cells.
+In our testing, the low representation of `Folded` cells leads to significantly low classification accuracy for this class (only tested with multi-class models).
+Thus, we opt to exclude these cells from all training and testing.
+
 ## Step 1: Download Data
 
 Use the commands below to download labeled training dataset:
diff --git a/3.evaluate_model/class_PR_curves.ipynb b/3.evaluate_model/class_PR_curves.ipynb
index 6c44c722..3f2284ab 100644
--- a/3.evaluate_model/class_PR_curves.ipynb
+++ b/3.evaluate_model/class_PR_curves.ipynb
@@ -24,7 +24,7 @@
     "sys.path.append(\"../utils\")\n",
     "from split_utils import get_features_data\n",
     "from train_utils import get_dataset\n",
-    "from evaluate_utils import class_PR_curves, class_PR_curves_SCM\n"
+    "from evaluate_utils import class_PR_curves, class_PR_curves_SCM"
    ]
   },
   {
@@ -44,7 +44,7 @@
     "data_split_path = pathlib.Path(\"../1.split_data/indexes/data_split_indexes.tsv\")\n",
     "data_split_indexes = pd.read_csv(data_split_path, sep=\"\\t\", index_col=0)\n",
     "features_dataframe_path = pathlib.Path(\"../0.download_data/data/labeled_data.csv.gz\")\n",
-    "features_dataframe = get_features_data(features_dataframe_path)"
+    "features_dataframe = get_features_data(features_dataframe_path)\n"
    ]
   },
   {
@@ -329,7 +329,7 @@
     "        PR_data[\"feature_type\"] = feature_type\n",
     "\n",
     "        # add this score data to the tidy scores compiling list\n",
-    "        compiled_class_PR_curves.append(PR_data)"
+    "        compiled_class_PR_curves.append(PR_data)\n"
    ]
   },
   {
@@ -545,14 +545,14 @@
     "compiled_class_PR_curves.to_csv(compiled_PR_data_save_path, sep=\"\\t\")\n",
     "\n",
     "# preview tidy data\n",
-    "compiled_class_PR_curves"
+    "compiled_class_PR_curves\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Evaluate each model on each dataset (multiclass)\n"
+    "### Evaluate each model on each dataset (single class)\n"
    ]
   },
   {
@@ -618,7 +618,6 @@
     "    for feature_type, evaluation_type, phenotypic_class in itertools.product(\n",
     "        feature_types, evaluation_types, phenotypic_classes\n",
     "    ):\n",
-    "\n",
     "        # load single class model for this combination of model type, feature type, and phenotypic class\n",
     "        single_class_model_path = pathlib.Path(\n",
     "            f\"{single_class_models_dir}/{phenotypic_class}_models/{model_type}__{feature_type}.joblib\"\n",
@@ -658,7 +657,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Save PR curves from each evaluation (multiclass)\n"
+    "### Save PR curves from each evaluation (single class)\n"
    ]
   },
   {
@@ -865,7 +864,7 @@
     "compiled_SCM_PR_data.to_csv(compiled_PR_data_save_path, sep=\"\\t\")\n",
     "\n",
     "# preview tidy data\n",
-    "compiled_SCM_PR_data"
+    "compiled_SCM_PR_data\n"
    ]
   }
  ],
diff --git a/3.evaluate_model/get_LOIO_probabilities.ipynb b/3.evaluate_model/get_LOIO_probabilities.ipynb
index c83cfc15..5baf9f5f 100644
--- a/3.evaluate_model/get_LOIO_probabilities.ipynb
+++ b/3.evaluate_model/get_LOIO_probabilities.ipynb
@@ -29,7 +29,7 @@
     "sys.path.append(\"../utils\")\n",
     "from split_utils import get_features_data\n",
     "from train_utils import get_X_y_data\n",
-    "from evaluate_utils import get_SCM_model_data\n"
+    "from evaluate_utils import get_SCM_model_data"
    ]
   },
   {
@@ -286,7 +286,7 @@
     "\n",
     "# preview labeled data\n",
     "print(labeled_data.shape)\n",
-    "labeled_data.head(5)\n"
+    "labeled_data.head(5)"
    ]
   },
   {
@@ -305,14 +305,14 @@
    "source": [
     "# see number of images to\n",
     "num_images = labeled_data[\"Metadata_DNA\"].unique().shape[0]\n",
-    "print(f\"There are {num_images} images to perform LOIO evaluation on per model.\")"
+    "print(f\"There are {num_images} images to perform LOIO evaluation on per model.\")\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Get LOIO probabilities\n"
+    "### Get LOIO probabilities (multi class models)\n"
    ]
   },
   {
@@ -406,14 +406,14 @@
     "        test_cells_wide_data = pd.concat([metadata_dataframe, probas_dataframe], axis=1)\n",
     "\n",
     "        # add tidy long data to compiled data\n",
-    "        compiled_LOIO_wide_data.append(test_cells_wide_data)"
+    "        compiled_LOIO_wide_data.append(test_cells_wide_data)\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Format and save LOIO probabilities\n"
+    "### Format and save LOIO probabilities (multi class models)\n"
    ]
   },
   {
@@ -657,7 +657,7 @@
     "compiled_LOIO_tidy_long_data.to_csv(compiled_LOIO_save_path, sep=\"\\t\")\n",
     "\n",
     "# preview tidy long data\n",
-    "compiled_LOIO_tidy_long_data"
+    "compiled_LOIO_tidy_long_data\n"
    ]
   },
   {
@@ -819,14 +819,14 @@
     "        test_cells_wide_data = pd.concat([metadata_dataframe, probas_dataframe], axis=1)\n",
     "\n",
     "        # add tidy long data to compiled data\n",
-    "        compiled_LOIO_wide_data.append(test_cells_wide_data)"
+    "        compiled_LOIO_wide_data.append(test_cells_wide_data)\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Format and save LOIO probabilities\n"
+    "### Format and save LOIO probabilities (single class models)\n"
    ]
   },
   {
@@ -1082,7 +1082,7 @@
     "compiled_LOIO_tidy_long_data.to_csv(compiled_LOIO_save_path, sep=\"\\t\")\n",
     "\n",
     "# preview tidy long data\n",
-    "compiled_LOIO_tidy_long_data\n"
+    "compiled_LOIO_tidy_long_data"
    ]
   }
  ],
diff --git a/3.evaluate_model/get_model_predictions.ipynb b/3.evaluate_model/get_model_predictions.ipynb
index 675f6d34..e1a3535e 100644
--- a/3.evaluate_model/get_model_predictions.ipynb
+++ b/3.evaluate_model/get_model_predictions.ipynb
@@ -25,7 +25,7 @@
     "sys.path.append(\"../utils\")\n",
     "from split_utils import get_features_data\n",
     "from train_utils import get_dataset, get_X_y_data\n",
-    "from evaluate_utils import get_SCM_model_data\n"
+    "from evaluate_utils import get_SCM_model_data"
    ]
   },
   {
@@ -45,14 +45,14 @@
     "data_split_path = pathlib.Path(\"../1.split_data/indexes/data_split_indexes.tsv\")\n",
     "data_split_indexes = pd.read_csv(data_split_path, sep=\"\\t\", index_col=0)\n",
     "features_dataframe_path = pathlib.Path(\"../0.download_data/data/labeled_data.csv.gz\")\n",
-    "features_dataframe = get_features_data(features_dataframe_path)"
+    "features_dataframe = get_features_data(features_dataframe_path)\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Get Each Model Predictions on Each Dataset (Multi Class Models)\n"
+    "### Get Each Model Predictions on Each Dataset (multi class models)\n"
    ]
   },
   {
@@ -125,14 +125,14 @@
     "            }\n",
     "        )\n",
     "\n",
-    "        compiled_predictions.append(predictions_df)\n"
+    "        compiled_predictions.append(predictions_df)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Compile and Save Predictions\n"
+    "### Compile and Save Predictions (multi class models)\n"
    ]
   },
   {
@@ -321,14 +321,14 @@
     "compiled_predictions.to_csv(compiled_predictions_save_path, sep=\"\\t\")\n",
     "\n",
     "# preview compiled predictions\n",
-    "compiled_predictions"
+    "compiled_predictions\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Get Each Model Predictions on Each Dataset (Single Class Models)\n"
+    "### Get Each Model Predictions on Each Dataset (single class models)\n"
    ]
   },
   {
@@ -577,14 +577,14 @@
     "        }\n",
     "    )\n",
     "\n",
-    "    compiled_predictions.append(predictions_df)"
+    "    compiled_predictions.append(predictions_df)\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Compile and Save Predictions\n"
+    "### Compile and Save Predictions (single class models)\n"
    ]
   },
   {
@@ -775,7 +775,7 @@
     "compiled_predictions.to_csv(compiled_predictions_save_path, sep=\"\\t\")\n",
     "\n",
     "# preview compiled predictions\n",
-    "compiled_predictions"
+    "compiled_predictions\n"
    ]
   }
  ],
diff --git a/3.evaluate_model/scripts/nbconverted/class_PR_curves.py b/3.evaluate_model/scripts/nbconverted/class_PR_curves.py
index 1fd6e871..ec3dc3ac 100644
--- a/3.evaluate_model/scripts/nbconverted/class_PR_curves.py
+++ b/3.evaluate_model/scripts/nbconverted/class_PR_curves.py
@@ -111,7 +111,7 @@
 compiled_class_PR_curves
 
 
-# ### Evaluate each model on each dataset (multiclass)
+# ### Evaluate each model on each dataset (single class)
 # 
 
 # In[5]:
@@ -188,7 +188,7 @@
     plt.show()
 
 
-# ### Save PR curves from each evaluation (multiclass)
+# ### Save PR curves from each evaluation (single class)
 # 
 
 # In[6]:
@@ -212,3 +212,4 @@
 
 # preview tidy data
 compiled_SCM_PR_data
+
diff --git a/3.evaluate_model/scripts/nbconverted/get_LOIO_probabilities.py b/3.evaluate_model/scripts/nbconverted/get_LOIO_probabilities.py
index 791b8e94..9173b907 100644
--- a/3.evaluate_model/scripts/nbconverted/get_LOIO_probabilities.py
+++ b/3.evaluate_model/scripts/nbconverted/get_LOIO_probabilities.py
@@ -49,7 +49,7 @@
 print(f"There are {num_images} images to perform LOIO evaluation on per model.")
 
 
-# ### Get LOIO probabilities
+# ### Get LOIO probabilities (multi class models)
 # 
 
 # In[4]:
@@ -133,7 +133,7 @@
         compiled_LOIO_wide_data.append(test_cells_wide_data)
 
 
-# ### Format and save LOIO probabilities
+# ### Format and save LOIO probabilities (multi class models)
 # 
 
 # In[5]:
@@ -274,7 +274,7 @@
         compiled_LOIO_wide_data.append(test_cells_wide_data)
 
 
-# ### Format and save LOIO probabilities
+# ### Format and save LOIO probabilities (single class models)
 # 
 
 # In[7]:
diff --git a/3.evaluate_model/scripts/nbconverted/get_model_predictions.py b/3.evaluate_model/scripts/nbconverted/get_model_predictions.py
index 80b44e49..a5d68818 100644
--- a/3.evaluate_model/scripts/nbconverted/get_model_predictions.py
+++ b/3.evaluate_model/scripts/nbconverted/get_model_predictions.py
@@ -35,7 +35,7 @@
 features_dataframe = get_features_data(features_dataframe_path)
 
 
-# ### Get Each Model Predictions on Each Dataset (Multi Class Models)
+# ### Get Each Model Predictions on Each Dataset (multi class models)
 # 
 
 # In[3]:
@@ -89,7 +89,7 @@
         compiled_predictions.append(predictions_df)
 
 
-# ### Compile and Save Predictions
+# ### Compile and Save Predictions (multi class models)
 # 
 
 # In[4]:
@@ -109,7 +109,8 @@
 compiled_predictions
 
 
-# ### Get Each Model Predictions on Each Dataset (Single Class Models)
+# ### Get Each Model Predictions on Each Dataset (single class models)
+# 
 
 # In[5]:
 
@@ -121,7 +122,10 @@
 compiled_predictions = []
 
 # define combinations to test over
-model_types = ["final", "shuffled_baseline"] # only perform LOIO with hyper params from final models so skip shuffled_baseline models
+model_types = [
+    "final",
+    "shuffled_baseline",
+]  # only perform LOIO with hyper params from final models so skip shuffled_baseline models
 feature_types = ["CP", "DP", "CP_and_DP"]
 evaluation_types = ["train", "test"]
 phenotypic_classes = features_dataframe["Mitocheck_Phenotypic_Class"].unique()
@@ -135,20 +139,20 @@
         f"{models_dir}/{phenotypic_class}_models/{model_type}__{feature_type}.joblib"
     )
     model = load(single_class_model_path)
-    
+
     print(
-            f"Getting predictions for {phenotypic_class} model: {model_type}, trained with features: {feature_type}, on dataset: {evaluation_type}"
-        )
-    
+        f"Getting predictions for {phenotypic_class} model: {model_type}, trained with features: {feature_type}, on dataset: {evaluation_type}"
+    )
+
     # load dataset (train, test, etc)
     data = get_SCM_model_data(features_dataframe, phenotypic_class, evaluation_type)
-    
+
     # get features and labels dataframe
     X, y = get_X_y_data(data, feature_type)
-    
+
     # get predictions from model
     y_pred = model.predict(X)
-    
+
     # create dataframe with dataset index of cell being predicted,
     # predicted phenotypic class,
     # true phenotypic class,
@@ -167,7 +171,8 @@
     compiled_predictions.append(predictions_df)
 
 
-# ### Compile and Save Predictions
+# ### Compile and Save Predictions (single class models)
+# 
 
 # In[6]:
 
@@ -176,7 +181,9 @@
 compiled_predictions = pd.concat(compiled_predictions).reset_index(drop=True)
 
 # specify save path
-compiled_predictions_save_path = pathlib.Path("predictions/compiled_SCM_predictions.tsv")
+compiled_predictions_save_path = pathlib.Path(
+    "predictions/compiled_SCM_predictions.tsv"
+)
 compiled_predictions_save_path.parent.mkdir(parents=True, exist_ok=True)
 
 # save data as tsv
diff --git a/utils/split_utils.py b/utils/split_utils.py
index 2dbe584a..6cce2e6f 100644
--- a/utils/split_utils.py
+++ b/utils/split_utils.py
@@ -13,14 +13,14 @@
 def get_features_data(load_path: pathlib.Path) -> pd.DataFrame:
     """get features data from csv at load path
     Args:
-        load_path (pathlib.Path): path to training data csv
+        load_path (pathlib.Path): path to labeled data csv
     Returns:
-        pd.DataFrame: training dataframe
+        pd.DataFrame: labeled cells dataframe
     """
     # read dataset into pandas dataframe
     features_data = pd.read_csv(load_path, index_col=0)
 
-    # remove fold class that has low representation
+    # exclude folded class that has significantly low representation/classification accuracy
     features_data = features_data[
         features_data["Mitocheck_Phenotypic_Class"] != "Folded"
     ]