From 5b68fa1bbf37f9f61d5eb8abbe8e1f3981641471 Mon Sep 17 00:00:00 2001
From: cmdr2 <shashank.shekhar.global@gmail.com>
Date: Tue, 16 May 2023 16:42:38 +0530
Subject: [PATCH] Release large tensors in attention (as soon as they're no
 longer required). Reduces peak VRAM by nearly 2 GB for 1024x1024 (even after
 slicing), and the savings scale up with image size.

---
 src/diffusers/models/attention_processor.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/diffusers/models/attention_processor.py b/src/diffusers/models/attention_processor.py
index f88400da0333..a489814c4787 100644
--- a/src/diffusers/models/attention_processor.py
+++ b/src/diffusers/models/attention_processor.py
@@ -344,11 +344,14 @@ def get_attention_scores(self, query, key, attention_mask=None):
             beta=beta,
             alpha=self.scale,
         )
+        del baddbmm_input
 
         if self.upcast_softmax:
             attention_scores = attention_scores.float()
 
         attention_probs = attention_scores.softmax(dim=-1)
+        del attention_scores
+
         attention_probs = attention_probs.to(dtype)
 
         return attention_probs