From a3616389a032c3a2cbe48fb6355e2dbaf0e8554b Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 25 Dec 2020 14:20:35 +0100
Subject: [PATCH 01/18] Cleaning up conversation tests.

---
 src/transformers/pipelines/conversational.py | 10 +++-
 tests/test_pipelines_common.py               | 63 ++++++++++++++++++++
 tests/test_pipelines_conversational.py       | 51 +++++++++++++++-
 tests/test_pipelines_summarization.py        | 37 +++++++++++-
 4 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index d6f0e2517f4b..7dc7399a356a 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -47,12 +47,18 @@ class Conversation:
         conversation.add_user_input("Is it good?")
     """
 
-    def __init__(self, text: str = None, conversation_id: uuid.UUID = None):
+    def __init__(
+        self, text: str = None, conversation_id: uuid.UUID = None, past_user_inputs=None, generated_responses=None
+    ):
         if not conversation_id:
             conversation_id = uuid.uuid4()
         self.uuid: uuid.UUID = conversation_id
+        if past_user_inputs is None:
+            past_user_inputs = past_user_inputs
         self.past_user_inputs: List[str] = []
-        self.generated_responses: List[str] = []
+        if generated_responses is None:
+            generated_responses = []
+        self.generated_responses: List[str] = generated_responses
         self.history: List[int] = []
         self.new_user_input: Optional[str] = text
 
diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index c8a66053a307..951aa96a3559 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -18,9 +18,14 @@
 from transformers import is_tf_available, is_torch_available, pipeline
 from transformers.pipelines import Pipeline
 from transformers.testing_utils import _run_slow_tests, is_pipeline_test, require_tf, require_torch, slow
+from transformers.tokenization_utils import TruncationStrategy
 from transformers.tokenization_utils_base import to_py_obj
 
 
+if is_torch_available():
+    import torch
+
+
 VALID_INPUTS = ["A simple string", ["list of strings"]]
 
 
@@ -242,3 +247,61 @@ def _test_pipeline(self, nlp: Pipeline):
                 self.assertIn(key, result)
 
         self.assertRaises(Exception, nlp, self.invalid_inputs)
+
+
+class DummyTok:
+    pad_token_id = 0
+    eos_token_id = 0
+
+    def __init__(self, **kwargs):
+        for name, v in kwargs.items():
+            setattr(self, name, v)
+
+    def __call__(self, inputs, **kwargs):
+        if kwargs.get("return_tensors", "") == "pt":
+            return self.encode_pt(inputs, **kwargs)
+        else:
+            return self.encode_list(inputs, **kwargs)
+
+    def encode_list(self, inputs, **kwargs):
+        unwrap = False
+        if isinstance(inputs, str):
+            unwrap = True
+            inputs = [inputs]
+
+        assert isinstance(inputs, list)
+        input_ids = [self.encode(input_) for input_ in inputs]
+
+        if unwrap:
+            input_ids = input_ids[0]
+
+        return {"input_ids": input_ids}
+
+    def encode_pt(self, inputs, **kwargs):
+        if isinstance(inputs, str):
+            input_ids = torch.LongTensor(self.encode(inputs)).unsqueeze(0)
+        else:
+            input_ids = self._pad([self.encode(input_) for input_ in inputs])
+        return self.finalize_pt(input_ids, **kwargs)
+
+    def finalize_pt(self, input_ids, **kwargs):
+        if kwargs.get("truncation", TruncationStrategy.DO_NOT_TRUNCATE) == TruncationStrategy.ONLY_FIRST:
+            input_ids = input_ids[:, : self.model_max_length]
+        attention_mask = torch.zeros_like(input_ids).long() + 1
+        return {"input_ids": input_ids, "attention_mask": attention_mask}
+
+    def _pad(self, inputs):
+        return torch.nn.utils.rnn.pad_sequence(
+            [torch.LongTensor(input_) for input_ in inputs],
+            padding_value=self.pad_token_id,
+        ).transpose(1, 0)
+
+    def pad(self, inputs, **kwargs):
+        input_ids = self._pad(inputs["input_ids"])
+        return self.finalize_pt(input_ids, **kwargs)
+
+    def encode(self, input_):
+        return list(input_.encode("utf-8"))
+
+    def decode(self, sequence, **kwargs):
+        return "D" * len(sequence)
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 16703b1113d3..10be49d0243e 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -15,14 +15,63 @@
 import unittest
 
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Conversation, ConversationalPipeline, pipeline
+from transformers.models.gpt2 import GPT2Config, GPT2LMHeadModel
 from transformers.testing_utils import require_torch, slow, torch_device
 
-from .test_pipelines_common import MonoInputPipelineCommonMixin
+from .test_pipelines_common import DummyTok, MonoInputPipelineCommonMixin
 
 
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
 
 
+class SimpleConversationPipelineTests(unittest.TestCase):
+    @require_torch
+    def test_integration_torch_conversation(self):
+        # When
+        config = GPT2Config(
+            vocab_size=257, n_ctx=64, n_embd=64, n_layer=1, n_head=8, eos_token_id=0, bos_token_id=0, pad_token_id=0
+        )
+        model = GPT2LMHeadModel(config)
+        tokenizer = DummyTok()
+        nlp = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM, model=model, tokenizer=tokenizer)
+        conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
+        conversation_2 = Conversation("What's the last book you have read?")
+        # Then
+        self.assertEqual(len(conversation_1.past_user_inputs), 0)
+        self.assertEqual(len(conversation_2.past_user_inputs), 0)
+        # When
+        result = nlp([conversation_1, conversation_2], do_sample=False, max_length=1000)
+        # Then
+        self.assertEqual(result, [conversation_1, conversation_2])
+        self.assertEqual(
+            result,
+            [
+                Conversation(
+                    None,
+                    past_user_inputs=["Going to the movies tonight - any suggestions?"],
+                    generated_responses=["D"],
+                ),
+                Conversation(
+                    None, past_user_inputs=["What's the last book you have read?"], generated_responses=["D"]
+                ),
+            ],
+        )
+
+        # When
+        conversation_2.add_user_input("Why do you recommend it?")
+        result = nlp(conversation_2, do_sample=False, max_length=1000)
+        # Then
+        self.assertEqual(result, conversation_2)
+        self.assertEqual(
+            result,
+            Conversation(
+                None,
+                past_user_inputs=["What's the last book you have read?", "Why do you recommend it?"],
+                generated_responses=["D", "D"],
+            ),
+        )
+
+
 class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
     pipeline_task = "conversational"
     small_models = []  # Models tested without the @slow decorator
diff --git a/tests/test_pipelines_summarization.py b/tests/test_pipelines_summarization.py
index 0622053204b4..f1e7ce02ca08 100644
--- a/tests/test_pipelines_summarization.py
+++ b/tests/test_pipelines_summarization.py
@@ -14,15 +14,48 @@
 
 import unittest
 
-from transformers import pipeline
+from transformers import TruncationStrategy, is_torch_available, pipeline
 from transformers.testing_utils import require_torch, slow, torch_device
 
-from .test_pipelines_common import MonoInputPipelineCommonMixin
+from .test_pipelines_common import DummyTok, MonoInputPipelineCommonMixin
+
+
+if is_torch_available():
+    from transformers import BartConfig, BartForConditionalGeneration
 
 
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
 
 
+class SimpleSummarizationPipelineTests(unittest.TestCase):
+    @require_torch
+    def test_input_too_long(self):
+        config = BartConfig(
+            vocab_size=257,
+            d_model=32,
+            encoder_layers=1,
+            decoder_layers=1,
+            encoder_ffn_dim=32,
+            decoder_ffn_dim=32,
+            # So any text > 4 should raise an exception
+            max_position_embeddings=4,
+            encoder_attention_heads=1,
+            decoder_attention_heads=1,
+            max_length=4,
+            min_length=1,
+        )
+        model = BartForConditionalGeneration(config)
+        tokenizer = DummyTok(model_max_length=4)
+        nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer)
+
+        with self.assertRaises(IndexError):
+            _ = nlp("This is a test")
+
+        output = nlp("This is a test", truncation=TruncationStrategy.ONLY_FIRST)
+
+        self.assertEqual(output, [{"summary_text": "DDDD"}])
+
+
 class SummarizationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
     pipeline_task = "summarization"
     pipeline_running_kwargs = {"num_beams": 2, "min_length": 2, "max_length": 5}

From ee6aec1963d22883cf8699be16461d4a8fa5dd31 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 28 Dec 2020 14:00:36 +0100
Subject: [PATCH 02/18] Adding tests that don't require downloading models +
 conversation can be

fully created from static state.
---
 src/transformers/pipelines/conversational.py | 51 ++++++++----
 tests/test_pipelines_common.py               |  9 ++-
 tests/test_pipelines_conversational.py       | 81 ++++++++++++++++++--
 tests/test_pipelines_summarization.py        |  2 +-
 4 files changed, 116 insertions(+), 27 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 7dc7399a356a..bb656258cd64 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -52,15 +52,28 @@ def __init__(
     ):
         if not conversation_id:
             conversation_id = uuid.uuid4()
-        self.uuid: uuid.UUID = conversation_id
         if past_user_inputs is None:
-            past_user_inputs = past_user_inputs
-        self.past_user_inputs: List[str] = []
+            past_user_inputs = []
         if generated_responses is None:
             generated_responses = []
+
+        self.uuid: uuid.UUID = conversation_id
+        self.past_user_inputs: List[str] = past_user_inputs
         self.generated_responses: List[str] = generated_responses
-        self.history: List[int] = []
         self.new_user_input: Optional[str] = text
+        self._index: int = 0
+        self._history: List[int] = []
+
+    def __eq__(self, other):
+        if not isinstance(other, Conversation):
+            return False
+        if self.uuid == other.uuid:
+            return True
+        return (
+            self.new_user_input == other.new_user_input
+            and self.past_user_inputs == other.past_user_inputs
+            and self.generated_responses == other.generated_responses
+        )
 
     def add_user_input(self, text: str, overwrite: bool = False):
         """
@@ -106,16 +119,6 @@ def append_response(self, response: str):
         """
         self.generated_responses.append(response)
 
-    def set_history(self, history: List[int]):
-        """
-        Updates the value of the history of the conversation. The history is represented by a list of :obj:`token_ids`.
-        The history is used by the model to generate responses based on the previous conversation turns.
-
-        Args:
-            history (:obj:`List[int]`): History of tokens provided and generated for this conversation.
-        """
-        self.history = history
-
     def __repr__(self):
         """
         Generates a string representation of the conversation.
@@ -179,6 +182,21 @@ def __init__(self, min_length_for_response=32, *args, **kwargs):
 
         self.min_length_for_response = min_length_for_response
 
+    def _get_history(self, conversation):
+        # history = conversation._history[:]
+        # index = conversation._index
+        history = []
+        index = 0
+        for i, (past_user_input, generated_response) in enumerate(
+            zip(conversation.past_user_inputs[index:], conversation.generated_responses[index:])
+        ):
+            for el in (past_user_input, generated_response):
+                new_history = self._parse_and_tokenize([el])[0]
+                history.extend(new_history)
+            conversation._index = i + index + 1
+        conversation._history = history
+        return history[:]
+
     def __call__(
         self,
         conversations: Union[Conversation, List[Conversation]],
@@ -206,7 +224,7 @@ def __call__(
             conversations = [conversations]
         # Input validation
         if isinstance(conversations, list):
-            for conversation in conversations:
+            for i, conversation in enumerate(conversations):
                 assert isinstance(
                     conversation, Conversation
                 ), "DialoguePipeline expects a Conversation or list of Conversations as an input"
@@ -226,7 +244,7 @@ def __call__(
         with self.device_placement():
 
             inputs = self._parse_and_tokenize([conversation.new_user_input for conversation in conversations])
-            histories = [conversation.history for conversation in conversations]
+            histories = [self._get_history(conversation) for conversation in conversations]
             max_length = generate_kwargs.get("max_length", self.model.config.max_length)
             inputs = self._concat_inputs_history(inputs, histories, max_length)
 
@@ -272,7 +290,6 @@ def __call__(
                         clean_up_tokenization_spaces=clean_up_tokenization_spaces,
                     )
                 )
-                conversation.set_history(history[conversation_index])
                 output.append(conversation)
             if len(output) == 1:
                 return output[0]
diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 951aa96a3559..fed7053e9ab8 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from string import ascii_lowercase
 from typing import List, Optional
 from unittest import mock
 
@@ -256,6 +257,7 @@ class DummyTok:
     def __init__(self, **kwargs):
         for name, v in kwargs.items():
             setattr(self, name, v)
+        self.index = 0
 
     def __call__(self, inputs, **kwargs):
         if kwargs.get("return_tensors", "") == "pt":
@@ -304,4 +306,9 @@ def encode(self, input_):
         return list(input_.encode("utf-8"))
 
     def decode(self, sequence, **kwargs):
-        return "D" * len(sequence)
+        string = ""
+        for i in range(len(sequence)):
+            string += ascii_lowercase[self.index]
+            self.index += 1
+            self.index %= len(ascii_lowercase)
+        return string
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 10be49d0243e..f4173f11701c 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -25,22 +25,34 @@
 
 
 class SimpleConversationPipelineTests(unittest.TestCase):
-    @require_torch
-    def test_integration_torch_conversation(self):
+    def get_pipeline(self):
         # When
         config = GPT2Config(
-            vocab_size=257, n_ctx=64, n_embd=64, n_layer=1, n_head=8, eos_token_id=0, bos_token_id=0, pad_token_id=0
+            vocab_size=257,
+            n_ctx=64,
+            max_length=64,
+            n_embd=64,
+            n_layer=1,
+            n_head=8,
+            eos_token_id=0,
+            bos_token_id=0,
+            pad_token_id=0,
         )
         model = GPT2LMHeadModel(config)
         tokenizer = DummyTok()
         nlp = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM, model=model, tokenizer=tokenizer)
+        return nlp
+
+    @require_torch
+    def test_integration_torch_conversation(self):
+        nlp = self.get_pipeline()
         conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
         conversation_2 = Conversation("What's the last book you have read?")
         # Then
         self.assertEqual(len(conversation_1.past_user_inputs), 0)
         self.assertEqual(len(conversation_2.past_user_inputs), 0)
         # When
-        result = nlp([conversation_1, conversation_2], do_sample=False, max_length=1000)
+        result = nlp([conversation_1, conversation_2], do_sample=False)
         # Then
         self.assertEqual(result, [conversation_1, conversation_2])
         self.assertEqual(
@@ -49,17 +61,17 @@ def test_integration_torch_conversation(self):
                 Conversation(
                     None,
                     past_user_inputs=["Going to the movies tonight - any suggestions?"],
-                    generated_responses=["D"],
+                    generated_responses=["a"],
                 ),
                 Conversation(
-                    None, past_user_inputs=["What's the last book you have read?"], generated_responses=["D"]
+                    None, past_user_inputs=["What's the last book you have read?"], generated_responses=["b"]
                 ),
             ],
         )
 
         # When
         conversation_2.add_user_input("Why do you recommend it?")
-        result = nlp(conversation_2, do_sample=False, max_length=1000)
+        result = nlp(conversation_2, do_sample=False)
         # Then
         self.assertEqual(result, conversation_2)
         self.assertEqual(
@@ -67,10 +79,63 @@ def test_integration_torch_conversation(self):
             Conversation(
                 None,
                 past_user_inputs=["What's the last book you have read?", "Why do you recommend it?"],
-                generated_responses=["D", "D"],
+                generated_responses=["b", "c"],
             ),
         )
 
+    def test_history_cache(self):
+        nlp = self.get_pipeline()
+        conversation = Conversation(
+            "Why do you recommend it?",
+            past_user_inputs=["What's the last book you have read?"],
+            generated_responses=["b"],
+        )
+        _ = nlp(conversation)
+        self.assertEquals(conversation._index, 1)
+        self.assertEquals(
+            conversation._history,
+            [
+                87,
+                104,
+                97,
+                116,
+                39,
+                115,
+                32,
+                116,
+                104,
+                101,
+                32,
+                108,
+                97,
+                115,
+                116,
+                32,
+                98,
+                111,
+                111,
+                107,
+                32,
+                121,
+                111,
+                117,
+                32,
+                104,
+                97,
+                118,
+                101,
+                32,
+                114,
+                101,
+                97,
+                100,
+                63,
+                0,
+                98,
+                0,
+            ],
+        )
+
 
 class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
     pipeline_task = "conversational"
diff --git a/tests/test_pipelines_summarization.py b/tests/test_pipelines_summarization.py
index f1e7ce02ca08..d919c53165f5 100644
--- a/tests/test_pipelines_summarization.py
+++ b/tests/test_pipelines_summarization.py
@@ -53,7 +53,7 @@ def test_input_too_long(self):
 
         output = nlp("This is a test", truncation=TruncationStrategy.ONLY_FIRST)
 
-        self.assertEqual(output, [{"summary_text": "DDDD"}])
+        self.assertEqual(output, [{"summary_text": "abcd"}])
 
 
 class SummarizationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):

From f43037820436efda57b517da6a8e2129c5fc5735 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 28 Dec 2020 14:18:11 +0100
Subject: [PATCH 03/18] Making tests non flaky (by fixing generation length)

---
 src/transformers/pipelines/conversational.py |  1 +
 tests/test_pipelines_conversational.py       | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index bb656258cd64..7d1ca9a84ffc 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -356,6 +356,7 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
                     if cutoff_eos_index == 0 or cutoff_eos_index == len(new_input) - 1:
                         break
                     else:
+                        logger.warning("Cutting history off because it's too long for underlying model")
                         new_input = new_input[cutoff_eos_index + 1 :]
             outputs.append(new_input)
         padded_outputs = self.tokenizer.pad(
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index f4173f11701c..17f8b6c03d08 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -52,7 +52,7 @@ def test_integration_torch_conversation(self):
         self.assertEqual(len(conversation_1.past_user_inputs), 0)
         self.assertEqual(len(conversation_2.past_user_inputs), 0)
         # When
-        result = nlp([conversation_1, conversation_2], do_sample=False)
+        result = nlp([conversation_1, conversation_2], max_length=48)
         # Then
         self.assertEqual(result, [conversation_1, conversation_2])
         self.assertEqual(
@@ -71,7 +71,7 @@ def test_integration_torch_conversation(self):
 
         # When
         conversation_2.add_user_input("Why do you recommend it?")
-        result = nlp(conversation_2, do_sample=False)
+        result = nlp(conversation_2, max_length=49)
         # Then
         self.assertEqual(result, conversation_2)
         self.assertEqual(
@@ -90,9 +90,9 @@ def test_history_cache(self):
             past_user_inputs=["What's the last book you have read?"],
             generated_responses=["b"],
         )
-        _ = nlp(conversation)
-        self.assertEquals(conversation._index, 1)
-        self.assertEquals(
+        _ = nlp(conversation, max_length=26)
+        self.assertEqual(conversation._index, 1)
+        self.assertEqual(
             conversation._history,
             [
                 87,

From b5c5202da55f5d8a4740bd76bddf98282ecf16cb Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 28 Dec 2020 14:20:22 +0100
Subject: [PATCH 04/18] Bumping isort version.


From f6a5494e67f3701128d0d1aaf2dff1593bab92c6 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 28 Dec 2020 17:22:51 +0100
Subject: [PATCH 05/18] Doc cleanup.

---
 src/transformers/pipelines/conversational.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 7d1ca9a84ffc..db6fbe215324 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -33,6 +33,14 @@ class Conversation:
         conversation_id (:obj:`uuid.UUID`, `optional`):
             Unique identifier for the conversation. If not provided, a random UUID4 id will be assigned to the
             conversation.
+        past_user_inputs (:obj:`List[str]`, `optional`):
+            Eventual past history of the conversation of the user. You don't need to pass it manually if you use the
+            pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and
+            `generated_responses` with equal length strings
+        generated_responses (:obj:`List[str]`, `optional`):
+            Eventual past history of the conversation of the model. You don't need to pass it manually if you use the
+            pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and
+            `generated_responses` with equal length strings
 
     Usage::
 
@@ -183,6 +191,16 @@ def __init__(self, min_length_for_response=32, *args, **kwargs):
         self.min_length_for_response = min_length_for_response
 
     def _get_history(self, conversation):
+        """
+        Private function (subject to change) that simply tokenizes and concatenates past inputs. Also saves that
+        tokenization into the conversation state.
+
+        Args:
+            conversation (:class:`~transformers.Conversation`)
+
+        Returns:
+            :obj:`List[int]`: The list of tokens for the past input of that conversation.
+        """
         # history = conversation._history[:]
         # index = conversation._index
         history = []

From 63e9c7bf6784c9488b2b978b51c3c237a3937594 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 10:48:10 +0100
Subject: [PATCH 06/18] Remove unused test in this PR.

---
 tests/test_pipelines_summarization.py | 37 ++-------------------------
 1 file changed, 2 insertions(+), 35 deletions(-)

diff --git a/tests/test_pipelines_summarization.py b/tests/test_pipelines_summarization.py
index d919c53165f5..0622053204b4 100644
--- a/tests/test_pipelines_summarization.py
+++ b/tests/test_pipelines_summarization.py
@@ -14,48 +14,15 @@
 
 import unittest
 
-from transformers import TruncationStrategy, is_torch_available, pipeline
+from transformers import pipeline
 from transformers.testing_utils import require_torch, slow, torch_device
 
-from .test_pipelines_common import DummyTok, MonoInputPipelineCommonMixin
-
-
-if is_torch_available():
-    from transformers import BartConfig, BartForConditionalGeneration
+from .test_pipelines_common import MonoInputPipelineCommonMixin
 
 
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
 
 
-class SimpleSummarizationPipelineTests(unittest.TestCase):
-    @require_torch
-    def test_input_too_long(self):
-        config = BartConfig(
-            vocab_size=257,
-            d_model=32,
-            encoder_layers=1,
-            decoder_layers=1,
-            encoder_ffn_dim=32,
-            decoder_ffn_dim=32,
-            # So any text > 4 should raise an exception
-            max_position_embeddings=4,
-            encoder_attention_heads=1,
-            decoder_attention_heads=1,
-            max_length=4,
-            min_length=1,
-        )
-        model = BartForConditionalGeneration(config)
-        tokenizer = DummyTok(model_max_length=4)
-        nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer)
-
-        with self.assertRaises(IndexError):
-            _ = nlp("This is a test")
-
-        output = nlp("This is a test", truncation=TruncationStrategy.ONLY_FIRST)
-
-        self.assertEqual(output, [{"summary_text": "abcd"}])
-
-
 class SummarizationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
     pipeline_task = "summarization"
     pipeline_running_kwargs = {"num_beams": 2, "min_length": 2, "max_length": 5}

From 05cb49a11d2cb4b8d7b100a53059f08408a3d06e Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 10:59:05 +0100
Subject: [PATCH 07/18] Torch import guard for TF.

---
 tests/test_pipelines_conversational.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 17f8b6c03d08..88bb276d71f9 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -14,13 +14,22 @@
 
 import unittest
 
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Conversation, ConversationalPipeline, pipeline
-from transformers.models.gpt2 import GPT2Config, GPT2LMHeadModel
+from transformers import (
+    AutoModelForSeq2SeqLM,
+    AutoTokenizer,
+    Conversation,
+    ConversationalPipeline,
+    is_torch_available,
+    pipeline,
+)
 from transformers.testing_utils import require_torch, slow, torch_device
 
 from .test_pipelines_common import DummyTok, MonoInputPipelineCommonMixin
 
 
+if is_torch_available():
+    from transformers.models.gpt2 import GPT2Config, GPT2LMHeadModel
+
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
 
 
From 6ec30e18e51992203b70e335ff6abacb5d632afa Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 11:13:24 +0100
Subject: [PATCH 08/18] Missing torch guard.

---
 tests/test_pipelines_conversational.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 88bb276d71f9..8df7111e479c 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -92,6 +92,7 @@ def test_integration_torch_conversation(self):
             ),
         )
 
+    @require_torch
     def test_history_cache(self):
         nlp = self.get_pipeline()
         conversation = Conversation(

From ec753398e733355d57708fb0c4eaa621e1c736fe Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 14:17:23 +0100
Subject: [PATCH 09/18] Small mistake in doc.

---
 src/transformers/pipelines/conversational.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index db6fbe215324..7b755d8bbc55 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -36,11 +36,11 @@ class Conversation:
         past_user_inputs (:obj:`List[str]`, `optional`):
             Eventual past history of the conversation of the user. You don't need to pass it manually if you use the
             pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and
-            `generated_responses` with equal length strings
+            `generated_responses` with equal length lists of strings
         generated_responses (:obj:`List[str]`, `optional`):
             Eventual past history of the conversation of the model. You don't need to pass it manually if you use the
             pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and
-            `generated_responses` with equal length strings
+            `generated_responses` with equal length lists of strings
 
     Usage::
 

From 40dcc758b41ef1a99fed28dc06cfa9a509634028 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 14:22:17 +0100
Subject: [PATCH 10/18] Actual uses `_history` and `_index` cache.

+ remove dead enumerate
+ improve warning message.
---
 src/transformers/pipelines/conversational.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 7b755d8bbc55..072be1aa30b8 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -201,10 +201,8 @@ def _get_history(self, conversation):
         Returns:
             :obj:`List[int]`: The list of tokens for the past input of that conversation.
         """
-        # history = conversation._history[:]
-        # index = conversation._index
-        history = []
-        index = 0
+        history = conversation._history[:]
+        index = conversation._index
         for i, (past_user_input, generated_response) in enumerate(
             zip(conversation.past_user_inputs[index:], conversation.generated_responses[index:])
         ):
@@ -242,7 +240,7 @@ def __call__(
             conversations = [conversations]
         # Input validation
         if isinstance(conversations, list):
-            for i, conversation in enumerate(conversations):
+            for conversation in conversations:
                 assert isinstance(
                     conversation, Conversation
                 ), "DialoguePipeline expects a Conversation or list of Conversations as an input"
@@ -374,7 +372,9 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
                     if cutoff_eos_index == 0 or cutoff_eos_index == len(new_input) - 1:
                         break
                     else:
-                        logger.warning("Cutting history off because it's too long for underlying model")
+                        logger.warning(
+                            "Cutting history off because it's too long ({len(new_input)} > {max_length - self.min_length_for_response}) for underlying model"
+                        )
                         new_input = new_input[cutoff_eos_index + 1 :]
             outputs.append(new_input)
         padded_outputs = self.tokenizer.pad(

From c9d35915cee21c75e51bb38f55ed695b09159b53 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 18:23:05 +0100
Subject: [PATCH 11/18] Update src/transformers/pipelines/conversational.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
---
 src/transformers/pipelines/conversational.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 072be1aa30b8..82e3d7bab1ee 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -35,8 +35,8 @@ class Conversation:
             conversation.
         past_user_inputs (:obj:`List[str]`, `optional`):
             Eventual past history of the conversation of the user. You don't need to pass it manually if you use the
-            pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and
-            `generated_responses` with equal length lists of strings
+            pipeline interactively but if you want to recreate history you need to set both :obj:`past_user_inputs` and
+            :obj:`generated_responses` with equal length lists of strings
         generated_responses (:obj:`List[str]`, `optional`):
             Eventual past history of the conversation of the model. You don't need to pass it manually if you use the
             pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and

From 20d4aec3cf194a61740f8b6454d0358b7f73d6ad Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 18:23:13 +0100
Subject: [PATCH 12/18] Update src/transformers/pipelines/conversational.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
---
 src/transformers/pipelines/conversational.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 82e3d7bab1ee..c09fb6e25473 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -39,8 +39,8 @@ class Conversation:
             :obj:`generated_responses` with equal length lists of strings
         generated_responses (:obj:`List[str]`, `optional`):
             Eventual past history of the conversation of the model. You don't need to pass it manually if you use the
-            pipeline interactively but if you want to recreate history you need to set both `past_user_inputs` and
-            `generated_responses` with equal length lists of strings
+            pipeline interactively but if you want to recreate history you need to set both :obj:`past_user_inputs` and
+            :obj:`generated_responses` with equal length lists of strings
 
     Usage::
 

From 98fcbd7c9aea3a5e6016fec78579c2b8d39c657f Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 18:23:39 +0100
Subject: [PATCH 13/18] Update src/transformers/pipelines/conversational.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
---
 src/transformers/pipelines/conversational.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index c09fb6e25473..6c67ae071f4e 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -373,7 +373,7 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
                         break
                     else:
                         logger.warning(
-                            "Cutting history off because it's too long ({len(new_input)} > {max_length - self.min_length_for_response}) for underlying model"
+                            f"Cutting history off because it's too long ({len(new_input)} > {max_length - self.min_length_for_response}) for underlying model"
                         )
                         new_input = new_input[cutoff_eos_index + 1 :]
             outputs.append(new_input)

From 562ec54291e5b5ccfd980edab9a0ca9240a701fb Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 6 Jan 2021 18:29:23 +0100
Subject: [PATCH 14/18] Adding comments and cleaner code to address history
 copy.

---
 src/transformers/pipelines/conversational.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index 6c67ae071f4e..d26b57457b71 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -201,17 +201,22 @@ def _get_history(self, conversation):
         Returns:
             :obj:`List[int]`: The list of tokens for the past input of that conversation.
         """
-        history = conversation._history[:]
+        # Make a copy to prevent messing cache up if there's an error
+        # within this function
+        history = conversation._history.copy()
         index = conversation._index
+        new_index = index
         for i, (past_user_input, generated_response) in enumerate(
             zip(conversation.past_user_inputs[index:], conversation.generated_responses[index:])
         ):
             for el in (past_user_input, generated_response):
                 new_history = self._parse_and_tokenize([el])[0]
                 history.extend(new_history)
-            conversation._index = i + index + 1
+            new_index = i + index + 1
+        conversation._index = new_index
         conversation._history = history
-        return history[:]
+        # Hand back a copy to caller so they can't accidently modify our cache.
+        return history.copy()
 
     def __call__(
         self,

From c87fdc8a54ff8cd5ba797e3d7cedfe58254180eb Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 7 Jan 2021 10:14:10 +0100
Subject: [PATCH 15/18] Improving pipeline name in tests.

---
 tests/test_pipelines_conversational.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 8df7111e479c..0c960da7d35d 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -49,19 +49,21 @@ def get_pipeline(self):
         )
         model = GPT2LMHeadModel(config)
         tokenizer = DummyTok()
-        nlp = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM, model=model, tokenizer=tokenizer)
-        return nlp
+        conversation_agent = pipeline(
+            task="conversational", device=DEFAULT_DEVICE_NUM, model=model, tokenizer=tokenizer
+        )
+        return conversation_agent
 
     @require_torch
     def test_integration_torch_conversation(self):
-        nlp = self.get_pipeline()
+        conversation_agent = self.get_pipeline()
         conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
         conversation_2 = Conversation("What's the last book you have read?")
         # Then
         self.assertEqual(len(conversation_1.past_user_inputs), 0)
         self.assertEqual(len(conversation_2.past_user_inputs), 0)
         # When
-        result = nlp([conversation_1, conversation_2], max_length=48)
+        result = conversation_agent([conversation_1, conversation_2], max_length=48)
         # Then
         self.assertEqual(result, [conversation_1, conversation_2])
         self.assertEqual(
@@ -80,7 +82,7 @@ def test_integration_torch_conversation(self):
 
         # When
         conversation_2.add_user_input("Why do you recommend it?")
-        result = nlp(conversation_2, max_length=49)
+        result = conversation_agent(conversation_2, max_length=49)
         # Then
         self.assertEqual(result, conversation_2)
         self.assertEqual(
@@ -94,13 +96,13 @@ def test_integration_torch_conversation(self):
 
     @require_torch
     def test_history_cache(self):
-        nlp = self.get_pipeline()
+        conversation_agent = self.get_pipeline()
         conversation = Conversation(
             "Why do you recommend it?",
             past_user_inputs=["What's the last book you have read?"],
             generated_responses=["b"],
         )
-        _ = nlp(conversation, max_length=26)
+        _ = conversation_agent(conversation, max_length=26)
         self.assertEqual(conversation._index, 1)
         self.assertEqual(
             conversation._history,

From edf5a68e29721b0d60ee29bc411c814222d5ffdb Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 7 Jan 2021 15:58:20 +0100
Subject: [PATCH 16/18] Change tokenizer to a real one (still created at
 runtime with no

external dependency)
---
 src/transformers/pipelines/conversational.py |  11 +-
 tests/test_pipelines_common.py               |  85 +++---------
 tests/test_pipelines_conversational.py       | 132 +++++++++++--------
 3 files changed, 98 insertions(+), 130 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index d26b57457b71..e72e314e6187 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -322,9 +322,7 @@ def _parse_and_tokenize(self, inputs, **kwargs):
         Parse arguments and tokenize, adding an EOS token at the end of the user input
         """
         # Parse arguments
-        inputs = self.tokenizer(inputs, add_special_tokens=False, padding=False).get("input_ids", [])
-        for input in inputs:
-            input.append(self.tokenizer.eos_token_id)
+        inputs = self.tokenizer(inputs, **kwargs).get("input_ids", [])
         return inputs
 
     def _clean_padding_history(self, generated_tensor) -> List[List[int]]:
@@ -368,9 +366,9 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
         for new_input, history in zip(inputs, histories):
             if history is not None:
                 new_input = history + new_input
-            if len(new_input) > max_length - self.min_length_for_response:
+            if len(new_input) > max_length:
                 cutoff_eos_index = 0
-                while len(new_input) - cutoff_eos_index > max_length - self.min_length_for_response:
+                while len(new_input) - cutoff_eos_index > max_length:
                     if cutoff_eos_index >= len(new_input):
                         break
                     cutoff_eos_index = new_input[cutoff_eos_index:].index(self.tokenizer.eos_token_id)
@@ -378,9 +376,8 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
                         break
                     else:
                         logger.warning(
-                            f"Cutting history off because it's too long ({len(new_input)} > {max_length - self.min_length_for_response}) for underlying model"
+                            f"Cutting history off because it's too long ({len(new_input)} > {max_length}) for underlying model"
                         )
-                        new_input = new_input[cutoff_eos_index + 1 :]
             outputs.append(new_input)
         padded_outputs = self.tokenizer.pad(
             {"input_ids": outputs}, padding="longest", return_attention_mask=True, return_tensors=self.framework
diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index fed7053e9ab8..8e585cf1acf4 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -12,21 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from string import ascii_lowercase
 from typing import List, Optional
 from unittest import mock
 
 from transformers import is_tf_available, is_torch_available, pipeline
 from transformers.pipelines import Pipeline
 from transformers.testing_utils import _run_slow_tests, is_pipeline_test, require_tf, require_torch, slow
-from transformers.tokenization_utils import TruncationStrategy
 from transformers.tokenization_utils_base import to_py_obj
 
 
-if is_torch_available():
-    import torch
-
-
 VALID_INPUTS = ["A simple string", ["list of strings"]]
 
 
@@ -250,65 +244,20 @@ def _test_pipeline(self, nlp: Pipeline):
         self.assertRaises(Exception, nlp, self.invalid_inputs)
 
 
-class DummyTok:
-    pad_token_id = 0
-    eos_token_id = 0
-
-    def __init__(self, **kwargs):
-        for name, v in kwargs.items():
-            setattr(self, name, v)
-        self.index = 0
-
-    def __call__(self, inputs, **kwargs):
-        if kwargs.get("return_tensors", "") == "pt":
-            return self.encode_pt(inputs, **kwargs)
-        else:
-            return self.encode_list(inputs, **kwargs)
-
-    def encode_list(self, inputs, **kwargs):
-        unwrap = False
-        if isinstance(inputs, str):
-            unwrap = True
-            inputs = [inputs]
-
-        assert isinstance(inputs, list)
-        input_ids = [self.encode(input_) for input_ in inputs]
-
-        if unwrap:
-            input_ids = input_ids[0]
-
-        return {"input_ids": input_ids}
-
-    def encode_pt(self, inputs, **kwargs):
-        if isinstance(inputs, str):
-            input_ids = torch.LongTensor(self.encode(inputs)).unsqueeze(0)
-        else:
-            input_ids = self._pad([self.encode(input_) for input_ in inputs])
-        return self.finalize_pt(input_ids, **kwargs)
-
-    def finalize_pt(self, input_ids, **kwargs):
-        if kwargs.get("truncation", TruncationStrategy.DO_NOT_TRUNCATE) == TruncationStrategy.ONLY_FIRST:
-            input_ids = input_ids[:, : self.model_max_length]
-        attention_mask = torch.zeros_like(input_ids).long() + 1
-        return {"input_ids": input_ids, "attention_mask": attention_mask}
-
-    def _pad(self, inputs):
-        return torch.nn.utils.rnn.pad_sequence(
-            [torch.LongTensor(input_) for input_ in inputs],
-            padding_value=self.pad_token_id,
-        ).transpose(1, 0)
-
-    def pad(self, inputs, **kwargs):
-        input_ids = self._pad(inputs["input_ids"])
-        return self.finalize_pt(input_ids, **kwargs)
-
-    def encode(self, input_):
-        return list(input_.encode("utf-8"))
-
-    def decode(self, sequence, **kwargs):
-        string = ""
-        for i in range(len(sequence)):
-            string += ascii_lowercase[self.index]
-            self.index += 1
-            self.index %= len(ascii_lowercase)
-        return string
+def DummyTok():
+    import tempfile
+
+    from tokenizers import ByteLevelBPETokenizer, processors
+    from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
+
+    tokenizer = ByteLevelBPETokenizer()
+    with tempfile.NamedTemporaryFile() as f:
+        tokenizer.train([f.name], vocab_size=256, show_progress=False)
+    tokenizer.add_special_tokens(["<bos>", "<eos>"])
+    tokenizer._tokenizer.post_processor = processors.TemplateProcessing(
+        single="<bos> $0 <eos>", special_tokens=[("<bos>", 256), ("<eos>", 257)]
+    )
+    with tempfile.NamedTemporaryFile() as f:
+        tokenizer.save(f.name)
+        real_tokenizer = PreTrainedTokenizerFast(tokenizer_file=f.name, eos_token="<eos>", bos_token="<bos>")
+    return real_tokenizer
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 0c960da7d35d..c8d4648f9790 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -37,17 +37,24 @@ class SimpleConversationPipelineTests(unittest.TestCase):
     def get_pipeline(self):
         # When
         config = GPT2Config(
-            vocab_size=257,
-            n_ctx=64,
-            max_length=64,
+            vocab_size=258,
+            n_ctx=128,
+            max_length=128,
             n_embd=64,
             n_layer=1,
             n_head=8,
-            eos_token_id=0,
-            bos_token_id=0,
-            pad_token_id=0,
+            bos_token_id=256,
+            eos_token_id=257,
         )
         model = GPT2LMHeadModel(config)
+        # Force model output to be L
+        import torch
+
+        V, D = model.lm_head.weight.shape
+        bias = torch.zeros(V, requires_grad=True)
+        bias[43] = 1
+
+        model.lm_head.bias = torch.nn.Parameter(bias)
         tokenizer = DummyTok()
         conversation_agent = pipeline(
             task="conversational", device=DEFAULT_DEVICE_NUM, model=model, tokenizer=tokenizer
@@ -59,12 +66,16 @@ def test_integration_torch_conversation(self):
         conversation_agent = self.get_pipeline()
         conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
         conversation_2 = Conversation("What's the last book you have read?")
-        # Then
         self.assertEqual(len(conversation_1.past_user_inputs), 0)
         self.assertEqual(len(conversation_2.past_user_inputs), 0)
-        # When
-        result = conversation_agent([conversation_1, conversation_2], max_length=48)
-        # Then
+
+        with self.assertLogs("transformers", level="WARNING") as log:
+            result = conversation_agent([conversation_1, conversation_2], max_length=49)
+            self.assertEqual(len(log.output), 2)
+            self.assertIn("You might consider trimming the early phase of the conversation", log.output[0])
+            self.assertIn("Setting `pad_token_id`", log.output[1])
+
+        # Two conversations in one pass
         self.assertEqual(result, [conversation_1, conversation_2])
         self.assertEqual(
             result,
@@ -72,25 +83,29 @@ def test_integration_torch_conversation(self):
                 Conversation(
                     None,
                     past_user_inputs=["Going to the movies tonight - any suggestions?"],
-                    generated_responses=["a"],
+                    generated_responses=["L"],
                 ),
                 Conversation(
-                    None, past_user_inputs=["What's the last book you have read?"], generated_responses=["b"]
+                    None, past_user_inputs=["What's the last book you have read?"], generated_responses=["L"]
                 ),
             ],
         )
 
-        # When
+        # One conversation with history
         conversation_2.add_user_input("Why do you recommend it?")
-        result = conversation_agent(conversation_2, max_length=49)
-        # Then
+        with self.assertLogs("transformers", level="WARNING") as log:
+            result = conversation_agent(conversation_2, max_length=67)
+            self.assertEqual(len(log.output), 2)
+            self.assertIn("You might consider trimming the early phase of the conversation", log.output[0])
+            self.assertIn("Setting `pad_token_id`", log.output[1])
+
         self.assertEqual(result, conversation_2)
         self.assertEqual(
             result,
             Conversation(
                 None,
                 past_user_inputs=["What's the last book you have read?", "Why do you recommend it?"],
-                generated_responses=["b", "c"],
+                generated_responses=["L", "L"],
             ),
         )
 
@@ -102,49 +117,56 @@ def test_history_cache(self):
             past_user_inputs=["What's the last book you have read?"],
             generated_responses=["b"],
         )
-        _ = conversation_agent(conversation, max_length=26)
+        with self.assertLogs("transformers", level="WARNING") as log:
+            _ = conversation_agent(conversation, max_length=28)
+            self.assertEqual(len(log.output), 3)
+            self.assertIn("Cutting history off because it's too long (66 > 28) for underlying model", log.output[0])
+            self.assertIn("66 is bigger than 0.9 * max_length: 28", log.output[1])
+            self.assertIn("Setting `pad_token_id`", log.output[2])
         self.assertEqual(conversation._index, 1)
         self.assertEqual(
             conversation._history,
             [
-                87,
-                104,
-                97,
-                116,
-                39,
-                115,
-                32,
-                116,
-                104,
-                101,
-                32,
-                108,
-                97,
-                115,
-                116,
-                32,
-                98,
-                111,
-                111,
-                107,
-                32,
-                121,
-                111,
-                117,
-                32,
-                104,
-                97,
-                118,
-                101,
-                32,
-                114,
-                101,
-                97,
-                100,
-                63,
-                0,
-                98,
-                0,
+                256,  # BOS
+                54,
+                71,
+                64,
+                83,
+                6,
+                82,
+                220,
+                83,
+                71,
+                68,
+                220,
+                75,
+                64,
+                82,
+                83,
+                220,
+                65,
+                78,
+                78,
+                74,
+                220,
+                88,
+                78,
+                84,
+                220,
+                71,
+                64,
+                85,
+                68,
+                220,
+                81,
+                68,
+                64,
+                67,
+                30,
+                257,  # EOS
+                256,
+                65,
+                257,
             ],
         )
 

From ae9a54bcbfb33d0e0f0065ecff6e0aad8cf7ca6f Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 8 Jan 2021 10:33:20 +0100
Subject: [PATCH 17/18] Simplify DummyTok, reverse changes on tokenization.

---
 src/transformers/pipelines/conversational.py | 12 +--
 tests/test_pipelines_common.py               |  9 +-
 tests/test_pipelines_conversational.py       | 95 ++++++++++----------
 3 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/src/transformers/pipelines/conversational.py b/src/transformers/pipelines/conversational.py
index e72e314e6187..9dce94626d24 100644
--- a/src/transformers/pipelines/conversational.py
+++ b/src/transformers/pipelines/conversational.py
@@ -184,7 +184,7 @@ def __init__(self, min_length_for_response=32, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
         # We need at least an eos_token
-        assert self.tokenizer.eos_token_id is not None, "DialoguePipeline tokenizer should have an EOS token set"
+        assert self.tokenizer.eos_token_id is not None, "ConversationalPipeline tokenizer should have an EOS token set"
         if self.tokenizer.pad_token_id is None:
             self.tokenizer.pad_token = self.tokenizer.eos_token
 
@@ -322,7 +322,9 @@ def _parse_and_tokenize(self, inputs, **kwargs):
         Parse arguments and tokenize, adding an EOS token at the end of the user input
         """
         # Parse arguments
-        inputs = self.tokenizer(inputs, **kwargs).get("input_ids", [])
+        inputs = self.tokenizer(inputs, add_special_tokens=False, padding=False).get("input_ids", [])
+        for input in inputs:
+            input.append(self.tokenizer.eos_token_id)
         return inputs
 
     def _clean_padding_history(self, generated_tensor) -> List[List[int]]:
@@ -366,9 +368,9 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
         for new_input, history in zip(inputs, histories):
             if history is not None:
                 new_input = history + new_input
-            if len(new_input) > max_length:
+            if len(new_input) > max_length - self.min_length_for_response:
                 cutoff_eos_index = 0
-                while len(new_input) - cutoff_eos_index > max_length:
+                while len(new_input) - cutoff_eos_index > max_length - self.min_length_for_response:
                     if cutoff_eos_index >= len(new_input):
                         break
                     cutoff_eos_index = new_input[cutoff_eos_index:].index(self.tokenizer.eos_token_id)
@@ -376,7 +378,7 @@ def _concat_inputs_history(self, inputs: List[List[int]], histories: List[Option
                         break
                     else:
                         logger.warning(
-                            f"Cutting history off because it's too long ({len(new_input)} > {max_length}) for underlying model"
+                            f"Cutting history off because it's too long ({len(new_input)} > {max_length - self.min_length_for_response}) for underlying model"
                         )
             outputs.append(new_input)
         padded_outputs = self.tokenizer.pad(
diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 8e585cf1acf4..127766a2fa13 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -247,14 +247,13 @@ def _test_pipeline(self, nlp: Pipeline):
 def DummyTok():
     import tempfile
 
-    from tokenizers import ByteLevelBPETokenizer, processors
+    from tokenizers import Tokenizer, models, processors
     from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
 
-    tokenizer = ByteLevelBPETokenizer()
-    with tempfile.NamedTemporaryFile() as f:
-        tokenizer.train([f.name], vocab_size=256, show_progress=False)
+    vocab = [(chr(i), i) for i in range(256)]
+    tokenizer = Tokenizer(models.Unigram(vocab))
     tokenizer.add_special_tokens(["<bos>", "<eos>"])
-    tokenizer._tokenizer.post_processor = processors.TemplateProcessing(
+    tokenizer.post_processor = processors.TemplateProcessing(
         single="<bos> $0 <eos>", special_tokens=[("<bos>", 256), ("<eos>", 257)]
     )
     with tempfile.NamedTemporaryFile() as f:
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index c8d4648f9790..68ccfa640cb4 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -52,7 +52,7 @@ def get_pipeline(self):
 
         V, D = model.lm_head.weight.shape
         bias = torch.zeros(V, requires_grad=True)
-        bias[43] = 1
+        bias[76] = 1
 
         model.lm_head.bias = torch.nn.Parameter(bias)
         tokenizer = DummyTok()
@@ -70,7 +70,7 @@ def test_integration_torch_conversation(self):
         self.assertEqual(len(conversation_2.past_user_inputs), 0)
 
         with self.assertLogs("transformers", level="WARNING") as log:
-            result = conversation_agent([conversation_1, conversation_2], max_length=49)
+            result = conversation_agent([conversation_1, conversation_2], max_length=48)
             self.assertEqual(len(log.output), 2)
             self.assertIn("You might consider trimming the early phase of the conversation", log.output[0])
             self.assertIn("Setting `pad_token_id`", log.output[1])
@@ -94,10 +94,11 @@ def test_integration_torch_conversation(self):
         # One conversation with history
         conversation_2.add_user_input("Why do you recommend it?")
         with self.assertLogs("transformers", level="WARNING") as log:
-            result = conversation_agent(conversation_2, max_length=67)
-            self.assertEqual(len(log.output), 2)
-            self.assertIn("You might consider trimming the early phase of the conversation", log.output[0])
-            self.assertIn("Setting `pad_token_id`", log.output[1])
+            result = conversation_agent(conversation_2, max_length=64)
+            self.assertEqual(len(log.output), 3)
+            self.assertIn("Cutting history off because it's too long", log.output[0])
+            self.assertIn("You might consider trimming the early phase of the conversation", log.output[1])
+            self.assertIn("Setting `pad_token_id`", log.output[2])
 
         self.assertEqual(result, conversation_2)
         self.assertEqual(
@@ -118,55 +119,53 @@ def test_history_cache(self):
             generated_responses=["b"],
         )
         with self.assertLogs("transformers", level="WARNING") as log:
-            _ = conversation_agent(conversation, max_length=28)
+            _ = conversation_agent(conversation, max_length=60)
             self.assertEqual(len(log.output), 3)
-            self.assertIn("Cutting history off because it's too long (66 > 28) for underlying model", log.output[0])
-            self.assertIn("66 is bigger than 0.9 * max_length: 28", log.output[1])
+            self.assertIn("Cutting history off because it's too long (63 > 28) for underlying model", log.output[0])
+            self.assertIn("63 is bigger than 0.9 * max_length: 60", log.output[1])
             self.assertIn("Setting `pad_token_id`", log.output[2])
         self.assertEqual(conversation._index, 1)
         self.assertEqual(
             conversation._history,
             [
-                256,  # BOS
-                54,
-                71,
-                64,
-                83,
-                6,
-                82,
-                220,
-                83,
-                71,
-                68,
-                220,
-                75,
-                64,
-                82,
-                83,
-                220,
-                65,
-                78,
-                78,
-                74,
-                220,
-                88,
-                78,
-                84,
-                220,
-                71,
-                64,
-                85,
-                68,
-                220,
-                81,
-                68,
-                64,
-                67,
-                30,
+                87,
+                104,
+                97,
+                116,
+                39,
+                115,
+                32,
+                116,
+                104,
+                101,
+                32,
+                108,
+                97,
+                115,
+                116,
+                32,
+                98,
+                111,
+                111,
+                107,
+                32,
+                121,
+                111,
+                117,
+                32,
+                104,
+                97,
+                118,
+                101,
+                32,
+                114,
+                101,
+                97,
+                100,
+                63,
+                257,  # EOS
+                98,  # b
                 257,  # EOS
-                256,
-                65,
-                257,
             ],
         )
 

From 16e71d2de0cfe4bdae880c5366259083037b5926 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Fri, 8 Jan 2021 13:00:44 +0100
Subject: [PATCH 18/18] Removing DummyTok.

---
 tests/test_pipelines_common.py         | 18 ----------------
 tests/test_pipelines_conversational.py | 29 +++++++++++++++++++-------
 2 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py
index 127766a2fa13..c8a66053a307 100644
--- a/tests/test_pipelines_common.py
+++ b/tests/test_pipelines_common.py
@@ -242,21 +242,3 @@ def _test_pipeline(self, nlp: Pipeline):
                 self.assertIn(key, result)
 
         self.assertRaises(Exception, nlp, self.invalid_inputs)
-
-
-def DummyTok():
-    import tempfile
-
-    from tokenizers import Tokenizer, models, processors
-    from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
-
-    vocab = [(chr(i), i) for i in range(256)]
-    tokenizer = Tokenizer(models.Unigram(vocab))
-    tokenizer.add_special_tokens(["<bos>", "<eos>"])
-    tokenizer.post_processor = processors.TemplateProcessing(
-        single="<bos> $0 <eos>", special_tokens=[("<bos>", 256), ("<eos>", 257)]
-    )
-    with tempfile.NamedTemporaryFile() as f:
-        tokenizer.save(f.name)
-        real_tokenizer = PreTrainedTokenizerFast(tokenizer_file=f.name, eos_token="<eos>", bos_token="<bos>")
-    return real_tokenizer
diff --git a/tests/test_pipelines_conversational.py b/tests/test_pipelines_conversational.py
index 68ccfa640cb4..ad00d92b3b9e 100644
--- a/tests/test_pipelines_conversational.py
+++ b/tests/test_pipelines_conversational.py
@@ -24,10 +24,12 @@
 )
 from transformers.testing_utils import require_torch, slow, torch_device
 
-from .test_pipelines_common import DummyTok, MonoInputPipelineCommonMixin
+from .test_pipelines_common import MonoInputPipelineCommonMixin
 
 
 if is_torch_available():
+    import torch
+
     from transformers.models.gpt2 import GPT2Config, GPT2LMHeadModel
 
 DEFAULT_DEVICE_NUM = -1 if torch_device == "cpu" else 0
@@ -37,7 +39,7 @@ class SimpleConversationPipelineTests(unittest.TestCase):
     def get_pipeline(self):
         # When
         config = GPT2Config(
-            vocab_size=258,
+            vocab_size=263,
             n_ctx=128,
             max_length=128,
             n_embd=64,
@@ -48,14 +50,27 @@ def get_pipeline(self):
         )
         model = GPT2LMHeadModel(config)
         # Force model output to be L
-        import torch
-
         V, D = model.lm_head.weight.shape
         bias = torch.zeros(V, requires_grad=True)
         bias[76] = 1
 
         model.lm_head.bias = torch.nn.Parameter(bias)
-        tokenizer = DummyTok()
+
+        # # Created with:
+        # import tempfile
+
+        # from tokenizers import Tokenizer, models
+        # from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
+
+        # vocab = [(chr(i), i) for i in range(256)]
+        # tokenizer = Tokenizer(models.Unigram(vocab))
+        # with tempfile.NamedTemporaryFile() as f:
+        #     tokenizer.save(f.name)
+        #     real_tokenizer = PreTrainedTokenizerFast(tokenizer_file=f.name, eos_token="<eos>", bos_token="<bos>")
+
+        # real_tokenizer._tokenizer.save("dummy.json")
+        # Special tokens are automatically added at load time.
+        tokenizer = AutoTokenizer.from_pretrained("Narsil/small_conversational_test")
         conversation_agent = pipeline(
             task="conversational", device=DEFAULT_DEVICE_NUM, model=model, tokenizer=tokenizer
         )
@@ -163,9 +178,9 @@ def test_history_cache(self):
                 97,
                 100,
                 63,
-                257,  # EOS
+                259,  # EOS
                 98,  # b
-                257,  # EOS
+                259,  # EOS
             ],
         )