diff --git a/optimizedSD/optimized_img2img.py b/optimizedSD/optimized_img2img.py
index 76e6419fd..24f3338f0 100644
--- a/optimizedSD/optimized_img2img.py
+++ b/optimizedSD/optimized_img2img.py
@@ -253,9 +253,9 @@ def load_img(path, h0, w0):
 init_latent = modelFS.get_first_stage_encoding(modelFS.encode_first_stage(init_image))  # move to latent space
 
 if opt.device != "cpu":
-    mem = torch.cuda.memory_allocated() / 1e6
+    mem = torch.cuda.memory_allocated(device=opt.device) / 1e6
     modelFS.to("cpu")
-    while torch.cuda.memory_allocated() / 1e6 >= mem:
+    while torch.cuda.memory_allocated(device=opt.device) / 1e6 >= mem:
         time.sleep(1)
 
 
@@ -302,9 +302,9 @@ def load_img(path, h0, w0):
                     c = modelCS.get_learned_conditioning(prompts)
 
                 if opt.device != "cpu":
-                    mem = torch.cuda.memory_allocated() / 1e6
+                    mem = torch.cuda.memory_allocated(device=opt.device) / 1e6
                     modelCS.to("cpu")
-                    while torch.cuda.memory_allocated() / 1e6 >= mem:
+                    while torch.cuda.memory_allocated(device=opt.device) / 1e6 >= mem:
                         time.sleep(1)
 
                 # encode (scaled latent)
@@ -340,13 +340,13 @@ def load_img(path, h0, w0):
                     base_count += 1
 
                 if opt.device != "cpu":
-                    mem = torch.cuda.memory_allocated() / 1e6
+                    mem = torch.cuda.memory_allocated(device=opt.device) / 1e6
                     modelFS.to("cpu")
-                    while torch.cuda.memory_allocated() / 1e6 >= mem:
+                    while torch.cuda.memory_allocated(device=opt.device) / 1e6 >= mem:
                         time.sleep(1)
 
                 del samples_ddim
-                print("memory_final = ", torch.cuda.memory_allocated() / 1e6)
+                print("memory_final = ", torch.cuda.memory_allocated(device=opt.device) / 1e6)
 
 toc = time.time()