diff --git a/pyodi/apps/ground_truth.py b/pyodi/apps/ground_truth.py
index c66ebdf..e20d28d 100644
--- a/pyodi/apps/ground_truth.py
+++ b/pyodi/apps/ground_truth.py
@@ -51,11 +51,7 @@
 from loguru import logger
 
 from pyodi.core.boxes import add_centroids
-from pyodi.core.utils import (
-    coco_ground_truth_to_dfs,
-    join_annotations_with_image_sizes,
-    load_ground_truth_file,
-)
+from pyodi.core.utils import coco_ground_truth_to_df
 from pyodi.plots.boxes import get_centroids_heatmap, plot_heatmap
 from pyodi.plots.common import plot_scatter_with_histograms
 
@@ -81,12 +77,16 @@ def ground_truth(
         output = str(Path(output) / Path(ground_truth_file).stem)
         Path(output).mkdir(parents=True, exist_ok=True)
 
-    coco_ground_truth = load_ground_truth_file(ground_truth_file)
+    df_annotations = coco_ground_truth_to_df(ground_truth_file)
 
-    df_images, df_annotations = coco_ground_truth_to_dfs(coco_ground_truth)
+    df_images = df_annotations.loc[
+        :, df_annotations.columns.str.startswith("img_")
+    ].drop_duplicates()
 
     plot_scatter_with_histograms(
         df_images,
+        x="img_width",
+        y="img_height",
         title=f"{Path(ground_truth_file).stem}: Image Shapes",
         show=show,
         output=output,
@@ -95,8 +95,6 @@ def ground_truth(
         histogram_ybins=dict(size=10),
     )
 
-    df_annotations = join_annotations_with_image_sizes(df_annotations, df_images)
-
     df_annotations = add_centroids(df_annotations)
 
     df_annotations["absolute_height"] = (
diff --git a/pyodi/apps/train_config/train_config_evaluation.py b/pyodi/apps/train_config/train_config_evaluation.py
index 799d631..dd6c45c 100644
--- a/pyodi/apps/train_config/train_config_evaluation.py
+++ b/pyodi/apps/train_config/train_config_evaluation.py
@@ -85,11 +85,7 @@
     scale_bbox_dimensions,
 )
 from pyodi.core.clustering import get_max_overlap
-from pyodi.core.utils import (
-    coco_ground_truth_to_dfs,
-    join_annotations_with_image_sizes,
-    load_ground_truth_file,
-)
+from pyodi.core.utils import coco_ground_truth_to_df
 from pyodi.plots.evaluation import plot_overlap_result
 
 
@@ -158,11 +154,7 @@ def train_config_evaluation(
         Path(output).mkdir(parents=True, exist_ok=True)
 
     if isinstance(ground_truth_file, str):
-        coco_ground_truth = load_ground_truth_file(ground_truth_file)
-
-        df_images, df_annotations = coco_ground_truth_to_dfs(coco_ground_truth)
-
-        df_annotations = join_annotations_with_image_sizes(df_annotations, df_images)
+        df_annotations = coco_ground_truth_to_df(ground_truth_file)
 
         df_annotations = filter_zero_area_bboxes(df_annotations)
 
diff --git a/pyodi/apps/train_config/train_config_generation.py b/pyodi/apps/train_config/train_config_generation.py
index 26d779c..a8dcc8e 100644
--- a/pyodi/apps/train_config/train_config_generation.py
+++ b/pyodi/apps/train_config/train_config_generation.py
@@ -112,11 +112,7 @@
     scale_bbox_dimensions,
 )
 from pyodi.core.clustering import find_pyramid_level, kmeans_euclidean
-from pyodi.core.utils import (
-    coco_ground_truth_to_dfs,
-    join_annotations_with_image_sizes,
-    load_ground_truth_file,
-)
+from pyodi.core.utils import coco_ground_truth_to_df
 from pyodi.plots.clustering import plot_clustering_results
 
 
@@ -158,11 +154,7 @@ def train_config_generation(
     if output is not None:
         Path(output).mkdir(parents=True, exist_ok=True)
 
-    coco_ground_truth = load_ground_truth_file(ground_truth_file)
-
-    df_images, df_annotations = coco_ground_truth_to_dfs(coco_ground_truth)
-
-    df_annotations = join_annotations_with_image_sizes(df_annotations, df_images)
+    df_annotations = coco_ground_truth_to_df(ground_truth_file)
 
     df_annotations = filter_zero_area_bboxes(df_annotations)
 
diff --git a/pyodi/core/utils.py b/pyodi/core/utils.py
index 819e324..ed5f402 100644
--- a/pyodi/core/utils.py
+++ b/pyodi/core/utils.py
@@ -1,8 +1,6 @@
 import json
-from collections import defaultdict
-from typing import Any, Dict, TextIO, Tuple
+from typing import TextIO
 
-import numpy as np
 import pandas as pd
 from loguru import logger
 from pycocotools.coco import COCO
@@ -24,92 +22,55 @@ def load_coco_ground_truth_from_StringIO(string_io: TextIO) -> COCO:
     return coco_ground_truth
 
 
-def load_ground_truth_file(ground_truth_file: str) -> Dict:
-    """Loads ground truth file.
+def coco_ground_truth_to_df(
+    ground_truth_file: str, max_images: int = 200000
+) -> pd.DataFrame:
+    """Load and transforms COCO ground truth data to pd.DataFrame object.
 
     Args:
         ground_truth_file: Path of ground truth file.
+        max_images: Maximum number of images to process.
 
     Returns:
-        Dictionary with the ground truth data.
+        pd.DataFrame with df_annotations keys and image sizes.
 
     """
     logger.info("Loading Ground Truth File")
-    coco_ground_truth = json.load(open(ground_truth_file))
-    return coco_ground_truth
+    with open(ground_truth_file) as gt:
+        coco_ground_truth = json.load(gt)
 
-
-def coco_ground_truth_to_dfs(
-    coco_ground_truth: Dict, max_images: int = 200000
-) -> Tuple[pd.DataFrame, pd.DataFrame]:
-    """Transforms COCO ground truth data to pd.DataFrame objects.
-
-    Args:
-        coco_ground_truth: COCO ground truth data.
-        max_images: Maximum number of images to process.
-
-    Returns:
-        Images and annotations pd.DataFrames.
-
-    """
-    logger.info("Converting COCO Ground Truth to pd.DataFrame")
-    dict_images: Dict[str, Any] = defaultdict(list)
-    categories = {x["id"]: x["name"] for x in coco_ground_truth["categories"]}
-    image_id_to_name = {}
     if len(coco_ground_truth["images"]) > max_images:
         logger.warning(
             f"Number of images {len(coco_ground_truth['images'])} exceeds maximum: "
             f"{max_images}.\nAll the exceeding images will be ignored."
         )
-    for image in coco_ground_truth["images"][:max_images]:
-        for k, v in image.items():
-            dict_images[k].append(v)
-        image_id_to_name[image["id"]] = image["file_name"]
-
-    df_images = pd.DataFrame(dict_images)
 
-    df_images["ratio"] = df_images["height"] / df_images["width"]
-    df_images["scale"] = np.sqrt(df_images["height"] * df_images["width"])
-
-    image_id_to_count = {x: 0 for x in df_images["id"]}
-    dict_annotations: Dict[str, Any] = defaultdict(list)
-    for annotation in coco_ground_truth["annotations"]:
-        if annotation["image_id"] not in image_id_to_name:
-            # Annotation of one of the exceeding images
-            continue
-        image_id_to_count[annotation["image_id"]] += 1
-        dict_annotations["file_name"].append(image_id_to_name[annotation["image_id"]])
-        dict_annotations["category"].append(categories[annotation["category_id"]])
-        dict_annotations["area"].append(annotation["area"])
-        dict_annotations["col_left"].append(int(annotation["bbox"][0]))
-        dict_annotations["row_top"].append(int(annotation["bbox"][1]))
-        dict_annotations["width"].append(int(annotation["bbox"][2]))
-        dict_annotations["height"].append(int(annotation["bbox"][3]))
-
-    df_images["bounding_box_count"] = image_id_to_count.values()
-
-    df_annotations = pd.DataFrame(dict_annotations)
-
-    return df_images, df_annotations
+    logger.info("Converting COCO Ground Truth to pd.DataFrame")
+    df_images = pd.DataFrame(coco_ground_truth["images"][:max_images])[
+        ["id", "file_name", "width", "height"]
+    ]
+    df_images = df_images.add_prefix("img_")
 
+    df_annotations = pd.DataFrame(coco_ground_truth["annotations"])
 
-def join_annotations_with_image_sizes(
-    df_annotations: pd.DataFrame, df_images: pd.DataFrame
-) -> pd.DataFrame:
-    """Left join between annotations pd.DataFrame and images.
+    # Replace label with category name
+    categories = {x["id"]: x["name"] for x in coco_ground_truth["categories"]}
+    df_annotations["category"] = df_annotations["category_id"].replace(categories)
 
-    It only keeps df_annotations keys and image sizes.
+    # Add bbox columns
+    bbox_columns = ["col_left", "row_top", "width", "height"]
+    df_annotations[bbox_columns] = pd.DataFrame(
+        df_annotations.bbox.tolist(), index=df_annotations.index
+    )
 
-    Args:
-        df_annotations: pd.DataFrame with COCO annotations.
-        df_images: pd.DataFrame with images.
+    # Filter columns by name
+    column_names = ["image_id", "area", "id", "category"] + bbox_columns
+    if "iscrowd" in df_annotations.columns:
+        column_names.append("iscrowd")
 
-    Returns:
-        pd.DataFrame with df_annotations keys and image sizes.
+    # Join with images
+    df_annotations = df_annotations[column_names].join(
+        df_images.set_index("img_id"), how="inner", on="image_id"
+    )
 
-    """
-    column_names = list(df_annotations.columns) + ["img_width", "img_height"]
-    df_images = df_images.add_prefix("img_")
-    return df_annotations.join(df_images.set_index("img_file_name"), on="file_name")[
-        column_names
-    ]
+    return df_annotations
diff --git a/pyodi/plots/common.py b/pyodi/plots/common.py
index 1c9e840..5281792 100644
--- a/pyodi/plots/common.py
+++ b/pyodi/plots/common.py
@@ -76,7 +76,7 @@ def plot_scatter_with_histograms(
             y=filtered_df[y],
             mode="markers",
             name=str(c or "Images Shape"),
-            text=filtered_df["file_name"],
+            text=filtered_df["img_file_name"],
             marker=dict(color=colors[i % len(colors)] if colors else None),
             legendgroup=f"legendgroup_{i}" if legendgroup else None,
             **kwargs,
diff --git a/tests/core/test_utils.py b/tests/core/test_utils.py
new file mode 100644
index 0000000..c8fcd0d
--- /dev/null
+++ b/tests/core/test_utils.py
@@ -0,0 +1,48 @@
+import json
+
+import numpy as np
+import pytest
+
+from pyodi.core.utils import coco_ground_truth_to_df
+
+
+@pytest.fixture(scope="session")
+def annotations_file(tmpdir_factory):
+    images = [
+        {"id": 1, "file_name": "1.png", "width": 1280, "height": 720},
+        {"id": 2, "file_name": "2.png", "width": 1280, "height": 720},
+    ]
+    annotations = [
+        {"id": 1, "image_id": 1, "bbox": [0, 0, 2, 2], "area": 4, "category_id": 1},
+        {"id": 1, "image_id": 1, "bbox": [0, 0, 2, 2], "area": 4, "category_id": 2},
+        {"id": 1, "image_id": 2, "bbox": [0, 0, 2, 2], "area": 4, "category_id": 3},
+    ]
+    categories = [
+        {"supercategory": "person", "id": 1, "name": "person"},
+        {"supercategory": "animal", "id": 2, "name": "cat"},
+        {"supercategory": "animal", "id": 3, "name": "dog"},
+    ]
+
+    fn = tmpdir_factory.mktemp("data").join("ground_truth.json")
+    data = dict(images=images, annotations=annotations, categories=categories)
+
+    with open(str(fn), "w") as f:
+        json.dump(data, f)
+
+    return fn
+
+
+def test_coco_ground_truth_to_df(annotations_file):
+    df_annotations = coco_ground_truth_to_df(annotations_file)
+    assert len(df_annotations) == 3
+    np.testing.assert_array_equal(
+        df_annotations["col_left"].to_numpy(), np.array([0, 0, 0])
+    )
+    np.testing.assert_array_equal(
+        df_annotations["category"].to_numpy(), np.array(["person", "cat", "dog"])
+    )
+
+
+def test_coco_ground_truth_to_df_with_max_images(annotations_file):
+    df_annotations = coco_ground_truth_to_df(annotations_file, max_images=1)
+    assert len(df_annotations) == 2