From 0c6a43e56dbf9aaad1801167985e879a5b60d2cf Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:06:16 +0100
Subject: [PATCH 1/7] build: loosen package deps

---
 pyproject.toml | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index bf6737126..ac2f0578d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,12 +34,12 @@ classifiers = [
 
 dependencies = [
     "GPUtil",
-    "numpy<2.0.0",
-    "peft>=0.11.0,<0.12.0",
-    "pillow>=9.2.0,<11.0.0",
+    "numpy",
+    "peft>=0.11.0",
+    "pillow>=9.2.0",
     "requests",
     "torch>=2.2.0",
-    "transformers>=4.46.1,<4.47.0",
+    "transformers>=4.46.1",
 ]
 
 [project.optional-dependencies]
@@ -49,7 +49,9 @@ train = [
     "configue>=5.0.0",
     "datasets>=2.19.1",
     "mteb>=1.16.3,<1.17.0",
-    "typer>=0.12.3, <1.0.0",
+    "peft>=0.11.0,<0.12.0",
+    "transformers>=4.46.1,<4.47.0",
+    "typer>=0.15.1",
 ]
 
 interpretability = [
@@ -58,9 +60,13 @@ interpretability = [
     "seaborn>=0.13.2,<1.0.0",
 ]
 
-dev = ["pytest>=8.0.0", "ruff>=0.4.0"]
+dev = ["datasets>=2.19.1", "pytest>=8.0.0", "ruff>=0.4.0"]
 
-all = ["colpali-engine[dev]", "colpali-engine[train]"]
+all = [
+    "colpali-engine[dev]",
+    "colpali-engine[interpretability]",
+    "colpali-engine[train]",
+]
 
 [project.urls]
 homepage = "https://github.com/illuin-tech/colpali"

From 5fc6e58b22a53d5164f725114357d78da51ded79 Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:06:18 +0100
Subject: [PATCH 2/7] test: add expected scores in E2E test for ColPali

---
 .../paligemma/colpali/test_colpali_e2e.py     | 67 +++++++++++--------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/tests/models/paligemma/colpali/test_colpali_e2e.py b/tests/models/paligemma/colpali/test_colpali_e2e.py
index 7adb62b43..05e9d9f0a 100644
--- a/tests/models/paligemma/colpali/test_colpali_e2e.py
+++ b/tests/models/paligemma/colpali/test_colpali_e2e.py
@@ -2,7 +2,7 @@
 
 import pytest
 import torch
-from PIL import Image
+from datasets import load_dataset
 
 from colpali_engine.models import ColPali, ColPaliProcessor
 from colpali_engine.utils.torch_utils import get_torch_device
@@ -15,6 +15,7 @@ def model_name() -> str:
 
 @pytest.mark.slow
 def test_e2e_retrieval_and_scoring(model_name: str):
+    # Load the model and processor
     model = cast(
         ColPali,
         ColPali.from_pretrained(
@@ -23,31 +24,39 @@ def test_e2e_retrieval_and_scoring(model_name: str):
             device_map=get_torch_device("auto"),
         ),
     ).eval()
-
-    try:
-        processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
-
-        # Your inputs
-        images = [
-            Image.new("RGB", (480, 480), color="white"),
-            Image.new("RGB", (250, 250), color="black"),
-        ]
-        queries = [
-            "Is attention really all you need?",
-            "Are Benjamin, Antoine, Merve, and Jo best friends?",
-        ]
-
-        # Process the inputs
-        batch_images = processor.process_images(images).to(model.device)
-        batch_queries = processor.process_queries(queries).to(model.device)
-
-        # Forward pass
-        with torch.no_grad():
-            image_embeddings = model(**batch_images)
-            query_embeddings = model(**batch_queries)
-
-        scores = processor.score_multi_vector(query_embeddings, image_embeddings)
-        assert isinstance(scores, torch.Tensor)
-
-    except Exception as e:
-        pytest.fail(f"Code raised an exception: {e}")
+    processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name))
+
+    # Load the test dataset
+    ds = load_dataset("hf-internal-testing/document-visual-retrieval-test", split="test")
+
+    # Preprocess the examples
+    batch_images = processor.process_images(images=ds["image"]).to(model.device)
+    batch_queries = processor.process_queries(queries=ds["query"]).to(model.device)
+
+    # Run inference
+    with torch.inference_mode():
+        image_embeddings = model(**batch_images)
+        query_embeddings = model(**batch_queries)
+
+    # Compute retrieval scores
+    scores = processor.score_multi_vector(
+        qs=query_embeddings,
+        ps=image_embeddings,
+    )  # (len(qs), len(ps))
+
+    assert scores.ndim == 2, f"Expected 2D tensor, got {scores.ndim}"
+    assert scores.shape == (len(ds), len(ds)), f"Expected shape {(len(ds), len(ds))}, got {scores.shape}"
+
+    # Check if the maximum scores per row are in the diagonal of the matrix score
+    assert (scores.argmax(dim=1) == torch.arange(len(ds), device=scores.device)).all()
+
+    # Further validation: fine-grained check, with a hardcoded score from the original implementation
+    expected_scores = torch.tensor(
+        [
+            [16.5000, 7.5938, 15.6875],
+            [12.0625, 16.2500, 11.1250],
+            [15.2500, 12.6250, 21.0000],
+        ],
+        dtype=scores.dtype,
+    )
+    assert torch.allclose(scores, expected_scores, atol=1), f"Expected scores {expected_scores}, got {scores}"

From 62518cc0331e2ed5da11ccab89159fb7c0c4863d Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:24:26 +0100
Subject: [PATCH 3/7] chore: update CHANGELOG

---
 CHANGELOG.md | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d99058f05..3e87c1036 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,17 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
-## ## [0.3.5] - 2024-12-13
+## [Unreleased]
+
+## Added
+
+- Add expected scores in ColPali E2E test
+
+## Changed
+
+- Loosen package dependencies
+
+## [0.3.5] - 2024-12-13
 
 ## Added
 
@@ -22,7 +32,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 - General `CorpusQueryCollator` for BEIR style dataset training or hard negative training. This deprecates `HardNegCollator` but all changes to the training loop are made for a seemless update.
 
-
 ### Changed
 
 - Updates BiPali config files
@@ -31,7 +40,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 - Removed `add_suffix` in the VisualRetrieverCollator and let the `suffix` be added in the individual processors.
 - Changed the incorrect `<pad>` token to `<|endoftext|>` fo query augmentation `ColQwen2Processor`. Note that previous models were trained with `<|endoftext|>` so this is simply a non-breaking inference upgrade patch.
 
-
 ## [0.3.3] - 2024-10-29
 
 ### Added

From d45ba87b32bc29c762ae2a9464d6e61aab0e45eb Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Fri, 10 Jan 2025 13:35:10 +0100
Subject: [PATCH 4/7] build: restore transformers dep's upper bound

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ac2f0578d..db03aa21f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,7 +39,7 @@ dependencies = [
     "pillow>=9.2.0",
     "requests",
     "torch>=2.2.0",
-    "transformers>=4.46.1",
+    "transformers>=4.46.1,<4.47.0",
 ]
 
 [project.optional-dependencies]

From a8d5b4805aa4bc91a890a3876e5305deea2ea162 Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Fri, 10 Jan 2025 13:38:57 +0100
Subject: [PATCH 5/7] build: restore pillow dep's upper bound

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index db03aa21f..876270e56 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,6 +50,7 @@ train = [
     "datasets>=2.19.1",
     "mteb>=1.16.3,<1.17.0",
     "peft>=0.11.0,<0.12.0",
+    "pillow>=9.2.0,<11.0.0",
     "transformers>=4.46.1,<4.47.0",
     "typer>=0.15.1",
 ]

From 3a809830842f7281afcabc3a626d7d4d136f61b1 Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Fri, 10 Jan 2025 14:04:14 +0100
Subject: [PATCH 6/7] build: remove duplicate dep

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 876270e56..d620e007b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,6 @@ train = [
     "mteb>=1.16.3,<1.17.0",
     "peft>=0.11.0,<0.12.0",
     "pillow>=9.2.0,<11.0.0",
-    "transformers>=4.46.1,<4.47.0",
     "typer>=0.15.1",
 ]
 

From 14820d0f93526376b8c2d2be1c9b223ae6322b87 Mon Sep 17 00:00:00 2001
From: Tony Wu <28306721+tonywu71@users.noreply.github.com>
Date: Fri, 10 Jan 2025 15:25:48 +0100
Subject: [PATCH 7/7] chore: add `0.3.6` release in changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e87c1036..a714ad733 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
-## [Unreleased]
+## [0.3.6] - 2025-01-10
 
 ## Added