From 78c2a750d3e91ac6d57a2d032d870bc830c55fc8 Mon Sep 17 00:00:00 2001
From: Arthur Zucker <arthur.zucker@gmail.com>
Date: Thu, 7 Sep 2023 22:56:55 +0000
Subject: [PATCH 1/5] fix `set_infilling_processor` to properly reset

---
 .../models/code_llama/tokenization_code_llama_fast.py            | 1 +
 1 file changed, 1 insertion(+)
diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
index d3fc6e8abb99..406ae2cef92c 100644
--- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py
+++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
@@ -264,6 +264,7 @@ def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=
                 ]
             )
             self.update_post_processor()
+            return
 
         self._tokenizer.normalizer = normalizers.Replace(pattern=" ", content="▁")
         pair = [self.bos_token] if self.add_bos_token and add_special_tokens else []

From 6ad456769aa0e90043f62b99d3075946aec6a812 Mon Sep 17 00:00:00 2001
From: Arthur Zucker <arthur.zucker@gmail.com>
Date: Fri, 8 Sep 2023 14:07:20 +0000
Subject: [PATCH 2/5] Add docstring!

---
 .../models/code_llama/tokenization_code_llama_fast.py    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
index 406ae2cef92c..6e32783dec8d 100644
--- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py
+++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
@@ -256,6 +256,15 @@ def add_bos_token(self, value):
         self.update_post_processor()
 
     def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
+        """
+        Updates the normalizer to make sure the prompt format for `infilling` is respected.
+        If `reset` is set to `True`, the normalizer is reset to the normal behaviour.
+        The infilling format is the following:
+        if suffix_first
+            " <PRE> <SUF>{suf} <MID> {pre}"
+        else:
+            " <PRE> {pre} <SUF>{suf} <MID>"
+        """
         if reset:
             self._tokenizer.normalizer = normalizers.Sequence(
                 [

From 76e0e351fc92480880f96eae497a2e407c862c4e Mon Sep 17 00:00:00 2001
From: Arthur Zucker <arthur.zucker@gmail.com>
Date: Fri, 8 Sep 2023 14:20:25 +0000
Subject: [PATCH 3/5] fixups

---
 .../models/code_llama/tokenization_code_llama_fast.py       | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
index 6e32783dec8d..5fc34b615559 100644
--- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py
+++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
@@ -257,10 +257,8 @@ def add_bos_token(self, value):
 
     def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
         """
-        Updates the normalizer to make sure the prompt format for `infilling` is respected.
-        If `reset` is set to `True`, the normalizer is reset to the normal behaviour.
-        The infilling format is the following:
-        if suffix_first
+        Updates the normalizer to make sure the prompt format for `infilling` is respected. If `reset` is set to
+        `True`, the normalizer is reset to the normal behaviour. The infilling format is the following: if suffix_first
             " <PRE> <SUF>{suf} <MID> {pre}"
         else:
             " <PRE> {pre} <SUF>{suf} <MID>"

From cceb8c3e1822fe44151ec852a2c9ede2f54d19ae Mon Sep 17 00:00:00 2001
From: Arthur Zucker <arthur.zucker@gmail.com>
Date: Fri, 8 Sep 2023 18:30:26 +0000
Subject: [PATCH 4/5] more details in the docuemtation about the tokenization

---
 .../models/code_llama/tokenization_code_llama_fast.py      | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
index 5fc34b615559..9bcf18f760a8 100644
--- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py
+++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
@@ -257,11 +257,14 @@ def add_bos_token(self, value):
 
     def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
         """
-        Updates the normalizer to make sure the prompt format for `infilling` is respected. If `reset` is set to
-        `True`, the normalizer is reset to the normal behaviour. The infilling format is the following: if suffix_first
+        Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the following: if suffix_first
             " <PRE> <SUF>{suf} <MID> {pre}"
         else:
             " <PRE> {pre} <SUF>{suf} <MID>"
+
+        If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to
+        their "normal" behaviour, which is to add a prefix space for the normalizer,
+        and add a `bos_token` to the input text for the `post_processor`.
         """
         if reset:
             self._tokenizer.normalizer = normalizers.Sequence(

From 5c2f404409e2a7a12662f437dc8fc78d2bba521c Mon Sep 17 00:00:00 2001
From: Arthur Zucker <arthur.zucker@gmail.com>
Date: Fri, 8 Sep 2023 18:30:30 +0000
Subject: [PATCH 5/5] styl;e

---
 .../models/code_llama/tokenization_code_llama_fast.py     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/code_llama/tokenization_code_llama_fast.py b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
index 9bcf18f760a8..768946ea35f7 100644
--- a/src/transformers/models/code_llama/tokenization_code_llama_fast.py
+++ b/src/transformers/models/code_llama/tokenization_code_llama_fast.py
@@ -257,14 +257,14 @@ def add_bos_token(self, value):
 
     def set_infilling_processor(self, reset, suffix_first=False, add_special_tokens=True):
         """
-        Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the following: if suffix_first
+        Updates the normalizer to make sure the prompt format for `infilling` is respected. The infilling format is the
+        following: if suffix_first
             " <PRE> <SUF>{suf} <MID> {pre}"
         else:
             " <PRE> {pre} <SUF>{suf} <MID>"
 
-        If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to
-        their "normal" behaviour, which is to add a prefix space for the normalizer,
-        and add a `bos_token` to the input text for the `post_processor`.
+        If `reset` is set to `True`, the `normalizer` and `post_processor` are reset to their "normal" behaviour, which
+        is to add a prefix space for the normalizer, and add a `bos_token` to the input text for the `post_processor`.
         """
         if reset:
             self._tokenizer.normalizer = normalizers.Sequence(