From 0789a055b542cf45e650db402fff4dd1b8cf5116 Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Mon, 6 Jan 2025 22:50:39 +0900
Subject: [PATCH 01/11] WIP, unifying InpaintingGenerator and
 SyntheticDataGenerator

---
 .../synthetic_data_generation/image_utils.py  |  54 ++-
 .../in_painting_generator.py                  | 342 ------------------
 .../synthetic_data_generator.py               | 140 +++----
 bitmind/validator/config.py                   |  50 +--
 4 files changed, 157 insertions(+), 429 deletions(-)
 delete mode 100644 bitmind/synthetic_data_generation/in_painting_generator.py

diff --git a/bitmind/synthetic_data_generation/image_utils.py b/bitmind/synthetic_data_generation/image_utils.py
index 5c419537..ab16dbb9 100644
--- a/bitmind/synthetic_data_generation/image_utils.py
+++ b/bitmind/synthetic_data_generation/image_utils.py
@@ -1,6 +1,9 @@
+import numpy as np
 import PIL
 import os
-import json
+from PIL import Image, ImageDraw
+from typing import Tuple
+
 from bitmind.validator.config import TARGET_IMAGE_SIZE
 
 
@@ -56,4 +59,51 @@ def save_images_to_disk(image_dataset, start_index, num_images, save_directory,
             image.save(file_path, 'JPEG')  # Save the image
             print(f"Saved: {file_path}")
         except Exception as e:
-            print(f"Failed to save image {i}: {e}")
\ No newline at end of file
+            print(f"Failed to save image {i}: {e}")
+
+
+def create_random_mask(self, size: Tuple[int, int]) -> Image.Image:
+    """
+    Create a random mask for i2i transformation.
+    """
+    w, h = size
+    mask = Image.new('RGB', size, 'black')
+
+    if np.random.rand() < 0.5:
+        # Rectangular mask with smoother edges
+        width = np.random.randint(w//4, w//2)
+        height = np.random.randint(h//4, h//2)
+
+        # Center the rectangle with some random offset
+        x1 = (w - width) // 2 + np.random.randint(-width//4, width//4)
+        y1 = (h - height) // 2 + np.random.randint(-height//4, height//4)
+
+        # Create mask with PIL draw for smoother edges
+        draw = ImageDraw.Draw(mask)
+        draw.rounded_rectangle(
+            [x1, y1, x1 + width, y1 + height],
+            radius=min(width, height) // 10,  # Smooth corners
+            fill='white'
+        )
+    else:
+        # Circular mask with feathered edges
+        draw = ImageDraw.Draw(mask)
+        center_x = w//2
+        center_y = h//2
+
+        # Make radius proportional to image size
+        radius = min(w, h) // 4
+
+        # Add small random offset to center
+        center_x += np.random.randint(-radius//4, radius//4)
+        center_y += np.random.randint(-radius//4, radius//4)
+
+        # Draw multiple circles with decreasing opacity for feathered edge
+        for r in range(radius, radius-10, -1):
+            opacity = int(255 * (r - (radius-10)) / 10)
+            draw.ellipse(
+                [center_x-r, center_y-r, center_x+r, center_y+r],
+                fill=(255, 255, 255, opacity)
+            )
+
+    return mask
\ No newline at end of file
diff --git a/bitmind/synthetic_data_generation/in_painting_generator.py b/bitmind/synthetic_data_generation/in_painting_generator.py
deleted file mode 100644
index 00f53cf5..00000000
--- a/bitmind/synthetic_data_generation/in_painting_generator.py
+++ /dev/null
@@ -1,342 +0,0 @@
-import gc
-import time
-from pathlib import Path
-from typing import Dict, Optional, Any, Union, Tuple
-import json
-
-import bittensor as bt
-import numpy as np
-import torch
-from PIL import Image, ImageDraw
-
-from bitmind.validator.config import (
-    HUGGINGFACE_CACHE_DIR,
-    TEXT_MODERATION_MODEL,
-    IMAGE_ANNOTATION_MODEL,
-    I2I_MODELS,
-    I2I_MODEL_NAMES,
-    TARGET_IMAGE_SIZE,
-    select_random_i2i_model
-)
-from bitmind.synthetic_data_generation.prompt_utils import truncate_prompt_if_too_long
-from bitmind.synthetic_data_generation.image_annotation_generator import ImageAnnotationGenerator
-from bitmind.validator.cache import ImageCache
-
-
-class InPaintingGenerator:
-    """
-    A class for generating image-to-image transformations using inpainting models.
-    
-    This class supports generating prompts from input images and applying
-    inpainting transformations using various models.
-    """
-    
-    def __init__(
-        self,
-        i2i_model_name: Optional[str] = None,
-        use_random_i2i_model: bool = True,
-        output_dir: Optional[Union[str, Path]] = None,
-        image_cache: Optional[ImageCache] = None,
-        device: str = 'cuda'
-    ) -> None:
-        """
-        Initialize the I2IGenerator.
-
-        Args:
-            i2i_model_name: Name of the image-to-image model.
-            use_random_i2i_model: Whether to randomly select models for generation.
-            output_dir: Directory to write generated data.
-            image_cache: Optional image cache instance.
-            device: Device identifier.
-        """
-        if not use_random_i2i_model and i2i_model_name not in I2I_MODEL_NAMES:
-            raise ValueError(
-                f"Invalid model name '{i2i_model_name}'. "
-                f"Options are {I2I_MODEL_NAMES}"
-            )
-
-        self.use_random_i2i_model = use_random_i2i_model
-        self.i2i_model_name = i2i_model_name
-        self.i2i_model = None
-        self.device = device
-
-        if self.use_random_i2i_model and i2i_model_name is not None:
-            bt.logging.warning(
-                "i2i_model_name will be ignored (use_random_i2i_model=True)"
-            )
-            self.i2i_model_name = None
-
-        self.image_annotation_generator = ImageAnnotationGenerator(
-            model_name=IMAGE_ANNOTATION_MODEL,
-            text_moderation_model_name=TEXT_MODERATION_MODEL
-        )
-        self.image_cache = image_cache
-        self.output_dir = Path(output_dir) if output_dir else None
-        if self.output_dir:
-            (self.output_dir / "image").mkdir(parents=True, exist_ok=True)
-
-    def generate(
-        self,
-        image: Image.Image,
-        custom_prompt: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """
-        Generate an image-to-image transformation based on input image.
-
-        Args:
-            image: Input image for transformation.
-            custom_prompt: Optional custom prompt to use instead of generating one.
-
-        Returns:
-            Dictionary containing generated data information.
-        """
-        # Resize input image to target size at the start
-        image = image.resize(TARGET_IMAGE_SIZE, Image.Resampling.LANCZOS)
-        
-        if custom_prompt is None:
-            prompt = self.generate_prompt(image, clear_gpu=True)
-        else:
-            prompt = custom_prompt
-            
-        bt.logging.info("Generating i2i transformation...")
-        gen_data = self.run_i2i(prompt, image)
-        self.clear_gpu()
-        return gen_data
-
-    def generate_prompt(
-        self, 
-        image: Image.Image,
-        clear_gpu: bool = True
-    ) -> str:
-        """Generate a prompt based on the input image."""
-        bt.logging.info("Generating prompt from image")
-        self.image_annotation_generator.load_models()
-        prompt = self.image_annotation_generator.generate(image)
-        if clear_gpu:
-            self.image_annotation_generator.clear_gpu()
-        return prompt
-
-    def run_i2i(
-        self,
-        prompt: str,
-        original_image: Image.Image,
-        model_name: Optional[str] = None
-    ) -> Dict[str, Any]:
-        """
-        Generate image-to-image transformation based on a text prompt.
-
-        Args:
-            prompt: The text prompt used to inspire the generation.
-            original_image: The source image to be inpainted.
-            model_name: Optional model name to use for generation.
-
-        Returns:
-            Dictionary containing generated data and metadata.
-
-        Raises:
-            RuntimeError: If generation fails.
-        """
-        if model_name is not None:
-            self.i2i_model_name = model_name
-        self.load_i2i_model()
-        
-        original_image = original_image.convert('RGB')
-        
-        # Use larger image size (1024x1024 or keep original if smaller)
-        target_size = (1024, 1024)
-        if original_image.size[0] > target_size[0] or original_image.size[1] > target_size[1]:
-            original_image = original_image.resize(target_size, Image.Resampling.LANCZOS)
-        
-        # Create random mask at same size as image
-        mask = self.create_random_mask(original_image.size)
-        
-        try:
-            truncated_prompt = truncate_prompt_if_too_long(prompt, self.i2i_model)
-            generator = torch.Generator(device=self.device).manual_seed(0)
-            
-            bt.logging.info(f"Generating inpainting from prompt: {truncated_prompt}")
-            start_time = time.time()
-            gen_output = self.i2i_model(
-                prompt=truncated_prompt,
-                image=original_image,
-                mask_image=mask,
-                guidance_scale=7.5,
-                num_inference_steps=50,
-                strength=0.99,
-                generator=generator,
-            )
-            gen_time = time.time() - start_time
-            bt.logging.info(f"Finished generation in {gen_time/60:.2f} minutes")
-            
-            # Ensure output is in RGB mode
-            output_image = gen_output.images[0]
-            output_image = output_image.convert('RGB')
-            gen_output.images[0] = output_image
-
-        except Exception as e:
-            bt.logging.error(f"I2I generation error: {e}")
-            raise RuntimeError(f"Failed to generate i2i image: {e}")
-
-        return {
-            'prompt': truncated_prompt,
-            'prompt_long': prompt,
-            'gen_output': gen_output,
-            'time': time.time(),
-            'model_name': self.i2i_model_name,
-            'gen_time': gen_time
-        }
-
-    def create_random_mask(self, size: Tuple[int, int]) -> Image.Image:
-        """
-        Create a random mask for i2i transformation.
-        """
-        w, h = size
-        mask = Image.new('RGB', size, 'black')
-        
-        if np.random.rand() < 0.5:
-            # Rectangular mask with smoother edges
-            width = np.random.randint(w//4, w//2)
-            height = np.random.randint(h//4, h//2)
-            
-            # Center the rectangle with some random offset
-            x1 = (w - width) // 2 + np.random.randint(-width//4, width//4)
-            y1 = (h - height) // 2 + np.random.randint(-height//4, height//4)
-            
-            # Create mask with PIL draw for smoother edges
-            draw = ImageDraw.Draw(mask)
-            draw.rounded_rectangle(
-                [x1, y1, x1 + width, y1 + height],
-                radius=min(width, height) // 10,  # Smooth corners
-                fill='white'
-            )
-        else:
-            # Circular mask with feathered edges
-            draw = ImageDraw.Draw(mask)
-            center_x = w//2
-            center_y = h//2
-            
-            # Make radius proportional to image size
-            radius = min(w, h) // 4
-            
-            # Add small random offset to center
-            center_x += np.random.randint(-radius//4, radius//4)
-            center_y += np.random.randint(-radius//4, radius//4)
-            
-            # Draw multiple circles with decreasing opacity for feathered edge
-            for r in range(radius, radius-10, -1):
-                opacity = int(255 * (r - (radius-10)) / 10)
-                draw.ellipse(
-                    [center_x-r, center_y-r, center_x+r, center_y+r],
-                    fill=(255, 255, 255, opacity)
-                )
-        
-        return mask
-
-    def load_i2i_model(self, model_name: Optional[str] = None) -> None:
-        """Load a Hugging Face image-to-image inpainting model to a specific GPU."""
-        if model_name is not None:
-            self.i2i_model_name = model_name
-        elif self.use_random_i2i_model or model_name == 'random':
-            model_name = select_random_i2i_model()
-            self.i2i_model_name = model_name
-
-        bt.logging.info(f"Loading {self.i2i_model_name}")
-        
-        pipeline_cls = I2I_MODELS[self.i2i_model_name]['pipeline_cls']
-        pipeline_args = I2I_MODELS[self.i2i_model_name]['from_pretrained_args']
-
-        self.i2i_model = pipeline_cls.from_pretrained(
-            pipeline_args.get('base', self.i2i_model_name),
-            cache_dir=HUGGINGFACE_CACHE_DIR,
-            **pipeline_args,
-            add_watermarker=False
-        )
-
-        self.i2i_model.set_progress_bar_config(disable=True)
-
-        # Load scheduler if specified
-        if 'scheduler' in I2I_MODELS[self.i2i_model_name]:
-            sched_cls = I2I_MODELS[self.i2i_model_name]['scheduler']['cls']
-            sched_args = I2I_MODELS[self.i2i_model_name]['scheduler']['from_config_args']
-            self.i2i_model.scheduler = sched_cls.from_config(
-                self.i2i_model.scheduler.config,
-                **sched_args
-            )
-
-        # Configure model optimizations
-        model_config = I2I_MODELS[self.i2i_model_name]
-        if model_config.get('enable_model_cpu_offload', False):
-            bt.logging.info(f"Enabling cpu offload for {self.i2i_model_name}")
-            self.i2i_model.enable_model_cpu_offload()
-        if model_config.get('enable_sequential_cpu_offload', False):
-            bt.logging.info(f"Enabling sequential cpu offload for {self.i2i_model_name}")
-            self.i2i_model.enable_sequential_cpu_offload()
-        if model_config.get('vae_enable_slicing', False):
-            bt.logging.info(f"Enabling vae slicing for {self.i2i_model_name}")
-            try:
-                self.i2i_model.vae.enable_slicing()
-            except Exception:
-                try:
-                    self.i2i_model.enable_vae_slicing()
-                except Exception:
-                    bt.logging.warning(f"Could not enable vae slicing for {self.i2i_model}")
-        if model_config.get('vae_enable_tiling', False):
-            bt.logging.info(f"Enabling vae tiling for {self.i2i_model_name}")
-            try:
-                self.i2i_model.vae.enable_tiling()
-            except Exception:
-                try:
-                    self.i2i_model.enable_vae_tiling()
-                except Exception:
-                    bt.logging.warning(f"Could not enable vae tiling for {self.i2i_model}")
-
-        self.i2i_model.to(self.device)
-        bt.logging.info(f"Loaded {self.i2i_model_name} using {pipeline_cls.__name__}.")
-
-    def clear_gpu(self) -> None:
-        """Clear GPU memory by deleting models and running garbage collection."""
-        if self.i2i_model is not None:
-            bt.logging.info("Clearing i2i model from GPU memory")
-            self.i2i_model.to('cpu')
-            del self.i2i_model
-            self.i2i_model = None
-            gc.collect()
-            torch.cuda.empty_cache() 
-
-    def batch_generate(self, batch_size: int = 5) -> None:
-        """
-        Generate inpainting transformations in batches.
-        
-        Args:
-            batch_size: Number of images to process in each batch.
-        """
-        prompts = []
-        bt.logging.info(f"Generating {batch_size} prompts")
-        for i in range(batch_size):
-            image_sample = self.image_cache.sample()
-            bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}")
-            prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1))
-            bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}")
-
-        # Randomly select model if enabled
-        if self.use_random_i2i_model:
-            model_name = select_random_i2i_model()
-        else:
-            model_name = self.i2i_model_name
-
-        for i, prompt in enumerate(prompts):
-            bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}")
-            
-            # Generate inpainted image from current prompt
-            start = time.time()
-            image_sample = self.image_cache.sample()
-            output = self.run_i2i(prompt, image_sample['image'], model_name)
-            
-            bt.logging.info(f'Writing to cache {self.output_dir}')
-            base_path = Path(self.output_dir) / 'image' / str(output['time'])
-            metadata = {k: v for k, v in output.items() if k != 'gen_output'}
-            base_path.with_suffix('.json').write_text(json.dumps(metadata))
-            
-            out_path = base_path.with_suffix('.png')
-            output['gen_output'].images[0].save(out_path)
-            bt.logging.info(f"Wrote to {out_path}")
\ No newline at end of file
diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index ce38a5c3..3b5c72ab 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -17,14 +17,16 @@
     HUGGINGFACE_CACHE_DIR,
     TEXT_MODERATION_MODEL,
     IMAGE_ANNOTATION_MODEL,
-    T2VIS_MODELS,
-    T2VIS_MODEL_NAMES,
+    MODELS,
+    MODEL_NAMES,
     T2V_MODEL_NAMES,
     T2I_MODEL_NAMES,
     TARGET_IMAGE_SIZE,
-    select_random_t2vis_model,
+    select_random_model,
+    get_task,
     get_modality
 )
+from bitmind.synthetic_data_generation.image_utils import create_random_mask
 from bitmind.synthetic_data_generation.prompt_utils import truncate_prompt_if_too_long
 from bitmind.synthetic_data_generation.prompt_generator import PromptGenerator
 from bitmind.validator.cache import ImageCache
@@ -54,19 +56,19 @@ class SyntheticDataGenerator:
     various text-to-video (t2v) and text-to-image (t2i) models.
 
     Attributes:
-        use_random_t2vis_model: Whether to randomly select a t2v or t2i for each
+        use_random_model: Whether to randomly select a t2v or t2i for each
             generation task.
         prompt_type: The type of prompt generation strategy ('random', 'annotation').
         prompt_generator_name: Name of the prompt generation model.
-        t2vis_model_name: Name of the t2v or t2i model.
+        model_name: Name of the t2v, t2i, or i2i model.
         prompt_generator: The vlm/llm pipeline for generating input prompts for t2i/t2v models
         output_dir: Directory to write generated data.
     """
 
     def __init__(
         self,
-        t2vis_model_name: Optional[str] = None,
-        use_random_t2vis_model: bool = True,
+        model_name: Optional[str] = None,
+        use_random_model: bool = True,
         prompt_type: str = 'annotation',
         output_dir: Optional[Union[str, Path]] = None,
         image_cache: Optional[ImageCache] = None,
@@ -76,34 +78,33 @@ def __init__(
         Initialize the SyntheticDataGenerator.
 
         Args:
-            t2vis_model_name: Name of the text-to-video or text-to-image model.
-            use_random_t2vis_model: Whether to randomly select models for generation.
+            model_name: Name of the generative image/video model
+            use_random_model: Whether to randomly select models for generation.
             prompt_type: The type of prompt generation strategy.
             output_dir: Directory to write generated data.
             device: Device identifier.
-            run_as_daemon: Whether to run generation in the background.
             image_cache: Optional image cache instance.
 
         Raises:
             ValueError: If an invalid model name is provided.
             NotImplementedError: If an unsupported prompt type is specified.
         """
-        if not use_random_t2vis_model and t2vis_model_name not in T2VIS_MODEL_NAMES:
+        if not use_random_model and model_name not in MODEL_NAMES:
             raise ValueError(
-                f"Invalid model name '{t2vis_model_name}'. "
-                f"Options are {T2VIS_MODEL_NAMES}"
+                f"Invalid model name '{model_name}'. "
+                f"Options are {MODEL_NAMES}"
             )
 
-        self.use_random_t2vis_model = use_random_t2vis_model
-        self.t2vis_model_name = t2vis_model_name
-        self.t2vis_model = None
+        self.use_random_model = use_random_model
+        self.model_name = model_name
+        self.model = None
         self.device = device
 
-        if self.use_random_t2vis_model and t2vis_model_name is not None:
+        if self.use_random_model and model_name is not None:
             bt.logging.warning(
-                "t2vis_model_name will be ignored (use_random_t2vis_model=True)"
+                "model_name will be ignored (use_random_model=True)"
             )
-            self.t2vis_model_name = None
+            self.model_name = None
 
         self.prompt_type = prompt_type
         self.image_cache = image_cache
@@ -146,8 +147,7 @@ def batch_generate(self, batch_size: int = 5) -> None:
                 bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}")
 
                 # Generate image/video from current model and prompt
-                start = time.time()
-                output = self.run_t2vis(prompt, modality, t2vis_model_name=model_name)
+                output = self._run_generation(prompt, model_name=model_name)
 
                 bt.logging.info(f'Writing to cache {self.output_dir}')
                 base_path = self.output_dir / modality / str(output['time'])
@@ -171,7 +171,7 @@ def generate(
         self,
         image: Optional[Image.Image] = None,
         modality: str = 'image',
-        t2vis_model_name: Optional[str] = None
+        model_name: Optional[str] = None
     ) -> Dict[str, Any]:
         """
         Generate synthetic data based on input parameters.
@@ -189,7 +189,7 @@ def generate(
         """
         prompt = self.generate_prompt(image, clear_gpu=True)
         bt.logging.info("Generating synthetic data...")
-        gen_data = self.run_t2vis(prompt, modality, t2vis_model_name)
+        gen_data = self._run_generation(prompt, modality, model_name)
         self.clear_gpu()
         return gen_data
 
@@ -213,11 +213,12 @@ def generate_prompt(
             raise NotImplementedError(f"Unsupported prompt type: {self.prompt_type}")
         return prompt
 
-    def run_t2vis(
+    def _run_generation(
         self,
         prompt: str,
-        modality: str,
-        t2vis_model_name: Optional[str] = None,        
+        task: Optional[str] = None,
+        model_name: Optional[str] = None,
+        image: Optional[Image.Image] = None,
         generate_at_target_size: bool = False,
 
     ) -> Dict[str, Any]:
@@ -226,8 +227,10 @@ def run_t2vis(
 
         Args:
             prompt: The text prompt used to inspire the generation.
+            task: The generation task type ('t2i', 't2v', 'i2i', or None).
+            model_name: Optional model name to use for generation.
+            image: Optional input image for image-to-image generation.
             generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions.
-            t2vis_model_name: Optional model name to use for generation.
 
         Returns:
             Dictionary containing generated data and metadata.
@@ -235,12 +238,23 @@ def run_t2vis(
         Raises:
             RuntimeError: If generation fails.
         """
-        self.load_t2vis_model(t2vis_model_name)
-        model_config = T2VIS_MODELS[self.t2vis_model_name]
+        self.load_model(model_name)
+        model_config = MODELS[self.model_name]
+        task = get_task(model_name) if task is None else task      
 
         bt.logging.info("Preparing generation arguments")
         gen_args = model_config.get('generate_args', {}).copy()
-        
+
+        # prep inpainting-specific generation args
+        if task == 'i2i':
+            # Use larger image size for better inpainting quality
+            target_size = (1024, 1024)
+            if image.size[0] > target_size[0] or image.size[1] > target_size[1]:
+                image = image.resize(target_size, Image.Resampling.LANCZOS)
+
+            gen_args['mask_image'] = create_random_mask(image.size)
+            gen_args['image'] = image
+
         # Process generation arguments
         for k, v in gen_args.items():
             if isinstance(v, dict):
@@ -259,7 +273,7 @@ def run_t2vis(
 
             truncated_prompt = truncate_prompt_if_too_long(
                 prompt,
-                self.t2vis_model
+                self.model
             )
 
             bt.logging.info(f"Generating media from prompt: {truncated_prompt}")
@@ -269,12 +283,12 @@ def run_t2vis(
                 pretrained_args = model_config.get('from_pretrained_args', {})
                 torch_dtype = pretrained_args.get('torch_dtype', torch.bfloat16)
                 with torch.autocast(self.device, torch_dtype, cache_enabled=False): 
-                    gen_output = self.t2vis_model(
+                    gen_output = self.model(
                         prompt=truncated_prompt,
                         **gen_args
                     )
             else:
-                gen_output = self.t2vis_model(
+                gen_output = self.model(
                     prompt=truncated_prompt,
                     **gen_args
                 )
@@ -287,7 +301,7 @@ def run_t2vis(
                     f"default dimensions. Error: {e}"
                 )
                 try:
-                    gen_output = self.t2vis_model(prompt=truncated_prompt)
+                    gen_output = self.model(prompt=truncated_prompt)
                     gen_time = time.time() - start_time
                 except Exception as fallback_error:
                     bt.logging.error(
@@ -307,80 +321,80 @@ def run_t2vis(
             'prompt_long': prompt,
             'gen_output': gen_output,  # image or video
             'time': time.time(),
-            'model_name': self.t2vis_model_name,
+            'model_name': self.model_name,
             'gen_time': gen_time
         }
 
-    def load_t2vis_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None:
+    def load_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None:
         """Load a Hugging Face text-to-image or text-to-video model to a specific GPU."""
         if model_name is not None:
-            self.t2vis_model_name = model_name
-        elif self.use_random_t2vis_model or model_name == 'random':
-            model_name = select_random_t2vis_model(modality)
-            self.t2vis_model_name = model_name
+            self.model_name = model_name
+        elif self.use_random_model or model_name == 'random':
+            model_name = select_random_model(modality)
+            self.model_name = model_name
 
-        bt.logging.info(f"Loading {self.t2vis_model_name}")
+        bt.logging.info(f"Loading {self.model_name}")
         
-        pipeline_cls = T2VIS_MODELS[model_name]['pipeline_cls']
-        pipeline_args = T2VIS_MODELS[model_name]['from_pretrained_args']
+        pipeline_cls = MODELS[model_name]['pipeline_cls']
+        pipeline_args = MODELS[model_name]['from_pretrained_args']
 
-        self.t2vis_model = pipeline_cls.from_pretrained(
+        self.model = pipeline_cls.from_pretrained(
             pipeline_args.get('base', model_name),
             cache_dir=HUGGINGFACE_CACHE_DIR,
             **pipeline_args,
             add_watermarker=False
         )
 
-        self.t2vis_model.set_progress_bar_config(disable=True)
+        self.model.set_progress_bar_config(disable=True)
 
         # Load scheduler if specified
-        if 'scheduler' in T2VIS_MODELS[model_name]:
-            sched_cls = T2VIS_MODELS[model_name]['scheduler']['cls']
-            sched_args = T2VIS_MODELS[model_name]['scheduler']['from_config_args']
-            self.t2vis_model.scheduler = sched_cls.from_config(
-                self.t2vis_model.scheduler.config,
+        if 'scheduler' in MODELS[model_name]:
+            sched_cls = MODELS[model_name]['scheduler']['cls']
+            sched_args = MODELS[model_name]['scheduler']['from_config_args']
+            self.model.scheduler = sched_cls.from_config(
+                self.model.scheduler.config,
                 **sched_args
             )
 
         # Configure model optimizations
-        model_config = T2VIS_MODELS[model_name]
+        model_config = MODELS[model_name]
         if model_config.get('enable_model_cpu_offload', False):
             bt.logging.info(f"Enabling cpu offload for {model_name}")
-            self.t2vis_model.enable_model_cpu_offload()
+            self.model.enable_model_cpu_offload()
         if model_config.get('enable_sequential_cpu_offload', False):
             bt.logging.info(f"Enabling sequential cpu offload for {model_name}")
-            self.t2vis_model.enable_sequential_cpu_offload()
+            self.model.enable_sequential_cpu_offload()
         if model_config.get('vae_enable_slicing', False):
             bt.logging.info(f"Enabling vae slicing for {model_name}")
             try:
-                self.t2vis_model.vae.enable_slicing()
+                self.model.vae.enable_slicing()
             except Exception:
                 try:
-                    self.t2vis_model.enable_vae_slicing()
+                    self.model.enable_vae_slicing()
                 except Exception:
-                    bt.logging.warning(f"Could not enable vae slicing for {self.t2vis_model}")
+                    bt.logging.warning(f"Could not enable vae slicing for {self.model}")
         if model_config.get('vae_enable_tiling', False):
             bt.logging.info(f"Enabling vae tiling for {model_name}")
             try:
-                self.t2vis_model.vae.enable_tiling()
+                self.model.vae.enable_tiling()
             except Exception:
                 try:
-                    self.t2vis_model.enable_vae_tiling()
+                    self.model.enable_vae_tiling()
                 except Exception:
-                    bt.logging.warning(f"Could not enable vae tiling for {self.t2vis_model}")
+                    bt.logging.warning(f"Could not enable vae tiling for {self.model}")
 
-        self.t2vis_model.to(self.device)
+        self.model.to(self.device)
         bt.logging.info(f"Loaded {model_name} using {pipeline_cls.__name__}.")
 
     def clear_gpu(self) -> None:
         """Clear GPU memory by deleting models and running garbage collection."""
-        if self.t2vis_model is not None:
+        if self.model is not None:
             bt.logging.info(
                 "Deleting previous text-to-image or text-to-video model, "
                 "freeing memory"
             )
-            del self.t2vis_model
-            self.t2vis_model = None
+            del self.model
+            self.model = None
             gc.collect()
             torch.cuda.empty_cache()
 
diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py
index 867f6609..a7d630b2 100644
--- a/bitmind/validator/config.py
+++ b/bitmind/validator/config.py
@@ -141,7 +141,7 @@
         "from_pretrained_args": {
             "use_safetensors": True,
             "torch_dtype": torch.float16,
-        }
+        },
     }
 }
 T2I_MODEL_NAMES: List[str] = list(T2I_MODELS.keys())
@@ -154,6 +154,12 @@
             "use_safetensors": True,
             "torch_dtype": torch.float16,
             "variant": "fp16"
+        },
+        "generate_args": {
+            "guidance_scale": 7.5,
+            "num_inference_steps": 50,
+            "strength": 0.99,
+            "generator": torch.Generator("cuda" if torch.cuda.is_available() else "cpu"),
         }
     }
 }
@@ -213,27 +219,34 @@
 T2V_MODEL_NAMES: List[str] = list(T2V_MODELS.keys())
 
 # Combined model configurations
-T2VIS_MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS}
-T2VIS_MODEL_NAMES: List[str] = list(T2VIS_MODELS.keys())
+MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS}
+MODEL_NAMES: List[str] = list(MODELS.keys())
 
 
 def get_modality(model_name):
-    if model_name in T2V_MODEL_NAMES:
+     if model_name in T2V_MODEL_NAMES:
         return 'video'
+     elif model_name in T2I_MODEL_NAMES + I2I_MODEL_NAMES:
+        return 'image'   
+
+
+def get_task(model_name):
+    if model_name in T2V_MODEL_NAMES:
+        return 't2v'
     elif model_name in T2I_MODEL_NAMES:
-        return 'image'
+        return 't2i'
     elif model_name in I2I_MODEL_NAMES:
         return 'i2i'
 
 
-def select_random_t2vis_model(modality: Optional[str] = None) -> str:
+def select_random_model(task: Optional[str] = None) -> str:
     """
     Select a random text-to-image or text-to-video model based on the specified
     modality.
 
     Args:
-        modality: The type of model to select ('image', 'video', or 'random').
-            If None or 'random', randomly chooses between image and video.
+        modality: The type of model to select ('t2v', 't2i', 'i2i', or 'random').
+            If None or 'random', randomly chooses between the valid options
 
     Returns:
         The name of the selected model.
@@ -241,22 +254,15 @@ def select_random_t2vis_model(modality: Optional[str] = None) -> str:
     Raises:
         NotImplementedError: If the specified modality is not supported.
     """
-    if modality is None or modality == 'random':
-        modality = np.random.choice(['image', 'video'])
+    if task is None or task == 'random':
+        task = np.random.choice(['t2i', 'i2i', 't2v'])
 
-    if modality == 'image':
+    if task == 't2i':
         return np.random.choice(T2I_MODEL_NAMES)
-    elif modality == 'video':
+    elif task == 't2v':
         return np.random.choice(T2V_MODEL_NAMES)
+    elif task == 'i2i':
+        return np.random.choice(I2I_MODEL_NAMES)
     else:
-        raise NotImplementedError(f"Unsupported modality: {modality}")
-
-
-def select_random_i2i_model() -> str:
-    """
-    Select a random image-to-image inpainting model.
+        raise NotImplementedError(f"Unsupported task: {task}")
 
-    Returns:
-        The name of the selected model.
-    """
-    return np.random.choice(I2I_MODEL_NAMES)

From 694535d777730cf96660adc85da121c8d5730da4 Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Mon, 6 Jan 2025 23:14:40 +0900
Subject: [PATCH 02/11] minor simplification of forward flow

---
 bitmind/validator/forward.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py
index 9b242b9b..d6801556 100644
--- a/bitmind/validator/forward.py
+++ b/bitmind/validator/forward.py
@@ -58,22 +58,21 @@ async def forward(self):
     challenge_metadata['modality'] = modality
 
     bt.logging.info(f"Sampling data from {modality} cache")
-    
+    cache = self.media_cache[CHALLENGE_TYPE[label]][modality]
+
     if modality == 'video':
-        cache = self.media_cache[CHALLENGE_TYPE[label]][modality]
         num_frames = random.randint(
             self.config.neuron.clip_frames_min,
             self.config.neuron.clip_frames_max)
         challenge = cache.sample(num_frames, min_fps=8, max_fps=30)
+
     elif modality == 'image':
-        if label == 1:  # synthetic
-            # 20% chance to use i2i (in-painting
+        if label == 1: 
+            # 20% chance to use i2i (in-painting)
             synthetic_type = 'i2i' if np.random.rand() < 0.2 else 't2i'
-            cache = self.media_cache[CHALLENGE_TYPE[label]][modality][synthetic_type]
             challenge_metadata['synthetic_type'] = synthetic_type
-            bt.logging.info(f"Using {synthetic_type} image cache")
-        else:  # real
-            cache = self.media_cache[CHALLENGE_TYPE[label]][modality]
+            cache = cache[synthetic_type]
+
         challenge = cache.sample()
 
     if challenge is None:

From 13348ff872cfe6fa3f86564d3d572896c00362fb Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 05:57:49 +0900
Subject: [PATCH 03/11] simplifying forward flow

---
 bitmind/validator/forward.py | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py
index d6801556..e2a3188c 100644
--- a/bitmind/validator/forward.py
+++ b/bitmind/validator/forward.py
@@ -31,6 +31,19 @@
 from bitmind.validator.config import CHALLENGE_TYPE, MAINNET_UID, TARGET_IMAGE_SIZE
 from bitmind.validator.reward import get_rewards
 
+def determine_challenge_type(fake_prob=0.5):
+    modality = 'video' if np.random.rand() > 0.5 else 'image'
+    label = 0 if np.random.rand() > fake_prob else 1
+    task = None
+    if label == 1:
+        if modality == 'video':
+            task = 't2v'
+        elif modality == 'image':
+            # 20% chance to use i2i (in-painting)
+            task = 'i2i' if np.random.rand() < 0.2 else 't2i'
+
+    return label, modality, task
+
 
 async def forward(self):
     """
@@ -52,10 +65,10 @@ async def forward(self):
     challenge_metadata = {}  # for bookkeeping
     challenge = {}           # for querying miners
 
-    modality = 'video' if np.random.rand() > 0.5 else 'image'
-    label = 0 if np.random.rand() > self._fake_prob else 1
+    label, modality, source_model_task = determine_challenge_type()
     challenge_metadata['label'] = label
     challenge_metadata['modality'] = modality
+    challenge_metadata['source_model_task'] = source_model_task
 
     bt.logging.info(f"Sampling data from {modality} cache")
     cache = self.media_cache[CHALLENGE_TYPE[label]][modality]
@@ -67,12 +80,6 @@ async def forward(self):
         challenge = cache.sample(num_frames, min_fps=8, max_fps=30)
 
     elif modality == 'image':
-        if label == 1: 
-            # 20% chance to use i2i (in-painting)
-            synthetic_type = 'i2i' if np.random.rand() < 0.2 else 't2i'
-            challenge_metadata['synthetic_type'] = synthetic_type
-            cache = cache[synthetic_type]
-
         challenge = cache.sample()
 
     if challenge is None:

From 2c4c8bdb9dca96a0d88d7dd5b269f43b97c8e92f Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 05:58:46 +0900
Subject: [PATCH 04/11] standardizing cache structures with the introduction of
 task type subdirs

---
 bitmind/validator/config.py | 14 +++++++++-----
 neurons/validator.py        | 16 ++++++++--------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py
index a7d630b2..91b0d574 100644
--- a/bitmind/validator/config.py
+++ b/bitmind/validator/config.py
@@ -30,15 +30,19 @@
 # Cache directories
 HUGGINGFACE_CACHE_DIR: Path = Path.home() / '.cache' / 'huggingface'
 SN34_CACHE_DIR: Path = Path.home() / '.cache' / 'sn34'
+SN34_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+VALIDATOR_INFO_PATH: Path = SN34_CACHE_DIR / 'validator.yaml'
+
 REAL_CACHE_DIR: Path = SN34_CACHE_DIR / 'real'
 SYNTH_CACHE_DIR: Path = SN34_CACHE_DIR / 'synthetic'
+
 REAL_VIDEO_CACHE_DIR: Path = REAL_CACHE_DIR / 'video'
 REAL_IMAGE_CACHE_DIR: Path = REAL_CACHE_DIR / 'image'
-SYNTH_VIDEO_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'video'
-SYNTH_IMAGE_T2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'image' / 't2i'
-SYNTH_IMAGE_I2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'image' / 'i2i'
-VALIDATOR_INFO_PATH: Path = SN34_CACHE_DIR / 'validator.yaml'
-SN34_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+T2V_CACHE_DIR: Path = SYNTH_CACHE_DIR / 't2v' 
+T2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 't2i'
+I2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'i2i'
 
 # Update intervals in hours
 VIDEO_ZIP_CACHE_UPDATE_INTERVAL = 3
diff --git a/neurons/validator.py b/neurons/validator.py
index ff6797d5..d144a441 100644
--- a/neurons/validator.py
+++ b/neurons/validator.py
@@ -33,14 +33,12 @@
     MAINNET_UID,
     MAINNET_WANDB_PROJECT,
     TESTNET_WANDB_PROJECT,
-    IMAGE_DATASETS,
-    VIDEO_DATASETS,
     WANDB_ENTITY,
     REAL_VIDEO_CACHE_DIR,
     REAL_IMAGE_CACHE_DIR,
-    SYNTH_IMAGE_T2I_CACHE_DIR,
-    SYNTH_IMAGE_I2I_CACHE_DIR,
-    SYNTH_VIDEO_CACHE_DIR,
+    T2I_CACHE_DIR,
+    I2I_CACHE_DIR,
+    T2V_CACHE_DIR,
     VALIDATOR_INFO_PATH
 )
 
@@ -76,10 +74,12 @@ def __init__(self, config=None):
         # synthetic media caches are populated by the SyntheticDataGenerator process (started by start_validator.sh)
         self.synthetic_media_cache = {
             'image': {
-                't2i': ImageCache(SYNTH_IMAGE_T2I_CACHE_DIR),
-                'i2i': ImageCache(SYNTH_IMAGE_I2I_CACHE_DIR)
+                't2i': ImageCache(T2I_CACHE_DIR),
+                'i2i': ImageCache(I2I_CACHE_DIR)
             },
-            'video': VideoCache(SYNTH_VIDEO_CACHE_DIR)
+            'video': {
+                't2v': VideoCache(T2V_CACHE_DIR)
+            }
         }
 
         self.media_cache = {

From ca38347bd2125108295cc6490068a5d64e91e5fd Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 05:59:02 +0900
Subject: [PATCH 05/11] adding i2i models to batch generation

---
 .../synthetic_data_generator.py                 | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index 3b5c72ab..4c487756 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -6,6 +6,7 @@
 import warnings
 from pathlib import Path
 from typing import Dict, Optional, Any, Union
+from itertools import zip_longest
 
 import bittensor as bt
 import numpy as np
@@ -21,6 +22,7 @@
     MODEL_NAMES,
     T2V_MODEL_NAMES,
     T2I_MODEL_NAMES,
+    I2I_MODEL_NAMES,
     TARGET_IMAGE_SIZE,
     select_random_model,
     get_task,
@@ -137,12 +139,19 @@ def batch_generate(self, batch_size: int = 5) -> None:
             prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1))
             bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}")
 
-        # shuffle and interleave models
+        # shuffle and interleave models to add stochasticity to initial validator challenges
+        i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES))
         t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES))
         t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES))
-        model_names = [m for pair in zip(t2v_model_names, t2i_model_names) for m in pair]
-        for model_name in model_names:
+        model_names_interleaved = [
+            m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) 
+            for m in triple if m is not None
+        ]
+
+        # for each model, generate an image/video from the prompt generated for its specific tokenizer max len
+        for model_name in model_names_interleaved:
             modality = get_modality(model_name)
+            task = get_task(model_name)
             for i, prompt in enumerate(prompts):
                 bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}")
 
@@ -150,7 +159,7 @@ def batch_generate(self, batch_size: int = 5) -> None:
                 output = self._run_generation(prompt, model_name=model_name)
 
                 bt.logging.info(f'Writing to cache {self.output_dir}')
-                base_path = self.output_dir / modality / str(output['time'])
+                base_path = self.output_dir / modality / task / str(output['time'])
                 metadata = {k: v for k, v in output.items() if k != 'gen_output'}
                 base_path.with_suffix('.json').write_text(json.dumps(metadata))
 

From caabd93e066d28bcfe5403054df0995c172391db Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 05:59:41 +0900
Subject: [PATCH 06/11] removing depracted InPaintingGenerator from run script

---
 bitmind/validator/scripts/run_data_generator.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py
index 90865ec6..fd16c309 100644
--- a/bitmind/validator/scripts/run_data_generator.py
+++ b/bitmind/validator/scripts/run_data_generator.py
@@ -48,20 +48,12 @@
         image_cache=image_cache,
         output_dir=args.t2i_output_dir)
 
-    ipg = InPaintingGenerator(
-        use_random_i2i_model=True,
-        device=args.device,
-        image_cache=image_cache,
-        output_dir=args.i2i_output_dir)
-
     bt.logging.info("Starting data generator service")
     sdg.batch_generate(batch_size=1)
-    ipg.batch_generate(batch_size=1)
     
     while True:
         try:
             sdg.batch_generate(batch_size=args.batch_size)
-            ipg.batch_generate(batch_size=args.batch_size)
         except Exception as e:
             bt.logging.error(f"Error in batch generation: {str(e)}")
             time.sleep(5)

From ba47e0c268b06f382f622316a15c5ff2a80e02e7 Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 06:41:50 +0900
Subject: [PATCH 07/11] adding --clear-cache option for validator

---
 run_neuron.py      |  7 +++++--
 start_validator.sh | 13 +++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/run_neuron.py b/run_neuron.py
index 8105cb6d..7904b256 100644
--- a/run_neuron.py
+++ b/run_neuron.py
@@ -61,6 +61,7 @@ def run_auto_update_self_heal(neuron_type, auto_update, self_heal):
     parser.add_argument("--miner", action="store_true")
     parser.add_argument("--no-self-heal", action="store_true", help="Disable the automatic restart of the PM2 process")
     parser.add_argument("--no-auto-update", action="store_true", help="Disable the automatic update of the local repository")
+    parser.add_argument("--clear-cache", action="store_true", help="Clear the cache before starting validator")
 
     args = parser.parse_args()
     if not (args.miner ^ args.validator):
@@ -69,11 +70,13 @@ def run_auto_update_self_heal(neuron_type, auto_update, self_heal):
 
     neuron_type = 'miner' if args.miner else 'validator'
 
-    os.system(f"./start_{neuron_type}.sh")
+    if args.clear_cache and args.validator:
+        os.system(f"./start_{neuron_type}.sh --clear-cache")
+    else:
+        os.system(f"./start_{neuron_type}.sh")
 
     if not args.no_auto_update or not args.no_self_heal:
         run_auto_update_self_heal(
             neuron_type,
             auto_update=not args.no_auto_update,
             self_heal=not args.no_self_heal)
-
diff --git a/start_validator.sh b/start_validator.sh
index 9ca0d22f..0b49e774 100755
--- a/start_validator.sh
+++ b/start_validator.sh
@@ -12,6 +12,19 @@ VALIDATOR_PROCESS_NAME="bitmind_validator"
 DATA_GEN_PROCESS_NAME="bitmind_data_generator"
 CACHE_UPDATE_PROCESS_NAME="bitmind_cache_updater"
 
+# Clear cache if specified 
+while [[ $# -gt 0 ]]; do
+  case $1 in
+    --clear-cache)
+      rm -rf ~/.cache/sn34
+      shift
+      ;;
+    *)
+      shift
+      ;;
+  esac
+done
+
 # Login to Weights & Biases
 if ! wandb login $WANDB_API_KEY; then
   echo "Failed to login to Weights & Biases with the provided API key."

From 4ef2b4b46eca6598997c29185cfbb89884224ff7 Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 08:47:19 +0000
Subject: [PATCH 08/11] updating SDG init params

---
 bitmind/synthetic_data_generation/__init__.py   |  1 -
 bitmind/validator/scripts/run_data_generator.py |  2 +-
 bitmind/validator/verify_models.py              | 13 +++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/bitmind/synthetic_data_generation/__init__.py b/bitmind/synthetic_data_generation/__init__.py
index 94c360d3..5a2682ac 100644
--- a/bitmind/synthetic_data_generation/__init__.py
+++ b/bitmind/synthetic_data_generation/__init__.py
@@ -1,3 +1,2 @@
 from .synthetic_data_generator import SyntheticDataGenerator
-from .in_painting_generator import InPaintingGenerator
 from .prompt_generator import PromptGenerator
diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py
index fd16c309..31fda239 100644
--- a/bitmind/validator/scripts/run_data_generator.py
+++ b/bitmind/validator/scripts/run_data_generator.py
@@ -43,7 +43,7 @@
 
     sdg = SyntheticDataGenerator(
         prompt_type='annotation',
-        use_random_t2vis_model=True,
+        use_random_model=True,
         device=args.device,
         image_cache=image_cache,
         output_dir=args.t2i_output_dir)
diff --git a/bitmind/validator/verify_models.py b/bitmind/validator/verify_models.py
index 278a0ff3..a16b7123 100644
--- a/bitmind/validator/verify_models.py
+++ b/bitmind/validator/verify_models.py
@@ -1,6 +1,6 @@
 import os
 from bitmind.synthetic_data_generation import SyntheticDataGenerator
-from bitmind.validator.config import T2VIS_MODEL_NAMES as MODEL_NAMES, IMAGE_ANNOTATION_MODEL, TEXT_MODERATION_MODEL
+from bitmind.validator.config import MODEL_NAMES, IMAGE_ANNOTATION_MODEL, TEXT_MODERATION_MODEL
 import bittensor as bt
 
 
@@ -40,7 +40,8 @@ def main():
     bt.logging.info("Verifying validator model downloads....")
     synthetic_image_generator = SyntheticDataGenerator(
         prompt_type='annotation',
-        use_random_t2vis_model=True
+        image_cache='test',
+        use_random_model=True
     )
 
     # Check and load annotation and moderation models if not cached
@@ -52,11 +53,11 @@ def main():
     for model_name in MODEL_NAMES:
         if not is_model_cached(model_name):
             synthetic_image_generator = SyntheticDataGenerator(
-                prompt_type='annotation',
-                use_random_t2vis_model=False,
-                t2vis_model_name=model_name
+                prompt_type=None,
+                use_random_model=False,
+                model_name=model_name
             )
-            synthetic_image_generator.load_t2vis_model(model_name)
+            synthetic_image_generator.load_model(model_name)
             synthetic_image_generator.clear_gpu()
 
 

From 935fbf6b678380156c920ac9c0f47f34401b36b8 Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 09:40:12 +0000
Subject: [PATCH 09/11] fixing last imports + directory structure references

---
 .../synthetic_data_generator.py                       |  6 +++---
 bitmind/utils/mock.py                                 |  2 +-
 bitmind/validator/forward.py                          | 11 ++++++-----
 bitmind/validator/scripts/run_data_generator.py       | 10 +++-------
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index 4c487756..463b3a20 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -120,9 +120,9 @@ def __init__(
 
         self.output_dir = Path(output_dir) if output_dir else None
         if self.output_dir:
-            (self.output_dir / "video").mkdir(parents=True, exist_ok=True)
-            (self.output_dir / "image").mkdir(parents=True, exist_ok=True)
-
+            (self.output_dir / "t2v").mkdir(parents=True, exist_ok=True)
+            (self.output_dir / "t2i").mkdir(parents=True, exist_ok=True)
+            (self.output_dir / "i2i").mkdir(parents=True, exist_ok=True)
 
     def batch_generate(self, batch_size: int = 5) -> None:
         """
diff --git a/bitmind/utils/mock.py b/bitmind/utils/mock.py
index bfb6639a..8fae787f 100644
--- a/bitmind/utils/mock.py
+++ b/bitmind/utils/mock.py
@@ -6,7 +6,7 @@
 from typing import List
 from PIL import Image
 
-from bitmind.validator.config import T2VIS_MODEL_NAMES as MODEL_NAMES
+from bitmind.validator.config import MODEL_NAMES
 from bitmind.validator.miner_performance_tracker import MinerPerformanceTracker
 
 
diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py
index e2a3188c..82b6aaf0 100644
--- a/bitmind/validator/forward.py
+++ b/bitmind/validator/forward.py
@@ -31,9 +31,11 @@
 from bitmind.validator.config import CHALLENGE_TYPE, MAINNET_UID, TARGET_IMAGE_SIZE
 from bitmind.validator.reward import get_rewards
 
-def determine_challenge_type(fake_prob=0.5):
+
+def determine_challenge_type(media_cache, fake_prob=0.5):
     modality = 'video' if np.random.rand() > 0.5 else 'image'
     label = 0 if np.random.rand() > fake_prob else 1
+    cache = media_cache[CHALLENGE_TYPE[label]][modality]
     task = None
     if label == 1:
         if modality == 'video':
@@ -41,8 +43,8 @@ def determine_challenge_type(fake_prob=0.5):
         elif modality == 'image':
             # 20% chance to use i2i (in-painting)
             task = 'i2i' if np.random.rand() < 0.2 else 't2i'
-
-    return label, modality, task
+        cache = cache[task]
+    return label, modality, task, cache
 
 
 async def forward(self):
@@ -65,13 +67,12 @@ async def forward(self):
     challenge_metadata = {}  # for bookkeeping
     challenge = {}           # for querying miners
 
-    label, modality, source_model_task = determine_challenge_type()
+    label, modality, source_model_task, cache = determine_challenge_type(self.media_cache)
     challenge_metadata['label'] = label
     challenge_metadata['modality'] = modality
     challenge_metadata['source_model_task'] = source_model_task
 
     bt.logging.info(f"Sampling data from {modality} cache")
-    cache = self.media_cache[CHALLENGE_TYPE[label]][modality]
 
     if modality == 'video':
         num_frames = random.randint(
diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py
index 31fda239..f9a9b213 100644
--- a/bitmind/validator/scripts/run_data_generator.py
+++ b/bitmind/validator/scripts/run_data_generator.py
@@ -5,12 +5,10 @@
 
 from bitmind.validator.scripts.util import load_validator_info, init_wandb_run
 from bitmind.synthetic_data_generation import SyntheticDataGenerator
-from bitmind.synthetic_data_generation import InPaintingGenerator
 from bitmind.validator.cache import ImageCache
 from bitmind.validator.config import (
     REAL_IMAGE_CACHE_DIR,
-    SYNTH_IMAGE_T2I_CACHE_DIR,
-    SYNTH_IMAGE_I2I_CACHE_DIR
+    SYNTH_CACHE_DIR
 )
 
 
@@ -19,9 +17,7 @@
     parser = argparse.ArgumentParser()
     parser.add_argument('--image-cache-dir', type=str, default=REAL_IMAGE_CACHE_DIR,
                       help='Directory containing real images to use as reference')
-    parser.add_argument('--t2i-output-dir', type=str, default=SYNTH_IMAGE_T2I_CACHE_DIR,
-                      help='Directory to save generated synthetic data')
-    parser.add_argument('--i2i-output-dir', type=str, default=SYNTH_IMAGE_I2I_CACHE_DIR,
+    parser.add_argument('--output-dir', type=str, default=SYNTH_CACHE_DIR,
                       help='Directory to save generated inpainting data')
     parser.add_argument('--device', type=str, default='cuda',
                       help='Device to run generation on (cuda/cpu)')
@@ -46,7 +42,7 @@
         use_random_model=True,
         device=args.device,
         image_cache=image_cache,
-        output_dir=args.t2i_output_dir)
+        output_dir=args.output_dir)
 
     bt.logging.info("Starting data generator service")
     sdg.batch_generate(batch_size=1)

From a97c4a1d732f3c70b09882e89af16e1e34bd0233 Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 14:15:53 +0000
Subject: [PATCH 10/11] fixing images passed to generate function for i2i

---
 bitmind/synthetic_data_generation/image_utils.py       |  4 ++--
 .../synthetic_data_generator.py                        | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/bitmind/synthetic_data_generation/image_utils.py b/bitmind/synthetic_data_generation/image_utils.py
index ab16dbb9..c8345d57 100644
--- a/bitmind/synthetic_data_generation/image_utils.py
+++ b/bitmind/synthetic_data_generation/image_utils.py
@@ -62,7 +62,7 @@ def save_images_to_disk(image_dataset, start_index, num_images, save_directory,
             print(f"Failed to save image {i}: {e}")
 
 
-def create_random_mask(self, size: Tuple[int, int]) -> Image.Image:
+def create_random_mask(size: Tuple[int, int]) -> Image.Image:
     """
     Create a random mask for i2i transformation.
     """
@@ -106,4 +106,4 @@ def create_random_mask(self, size: Tuple[int, int]) -> Image.Image:
                 fill=(255, 255, 255, opacity)
             )
 
-    return mask
\ No newline at end of file
+    return mask
diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index 463b3a20..b5d90fb0 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -132,9 +132,11 @@ def batch_generate(self, batch_size: int = 5) -> None:
             batch_size: Number of prompts to generate in each batch.
         """
         prompts = []
+        images = []
         bt.logging.info(f"Generating {batch_size} prompts")
         for i in range(batch_size):
             image_sample = self.image_cache.sample()
+            images.append(image_sample['image'])
             bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}")
             prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1))
             bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}")
@@ -156,10 +158,10 @@ def batch_generate(self, batch_size: int = 5) -> None:
                 bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}")
 
                 # Generate image/video from current model and prompt
-                output = self._run_generation(prompt, model_name=model_name)
+                output = self._run_generation(prompt, task=task, model_name=model_name, image=images[i])
 
                 bt.logging.info(f'Writing to cache {self.output_dir}')
-                base_path = self.output_dir / modality / task / str(output['time'])
+                base_path = self.output_dir / task / str(output['time'])
                 metadata = {k: v for k, v in output.items() if k != 'gen_output'}
                 base_path.with_suffix('.json').write_text(json.dumps(metadata))
 
@@ -179,7 +181,7 @@ def batch_generate(self, batch_size: int = 5) -> None:
     def generate(
         self,
         image: Optional[Image.Image] = None,
-        modality: str = 'image',
+        task: Optional[str] = None,
         model_name: Optional[str] = None
     ) -> Dict[str, Any]:
         """
@@ -198,7 +200,7 @@ def generate(
         """
         prompt = self.generate_prompt(image, clear_gpu=True)
         bt.logging.info("Generating synthetic data...")
-        gen_data = self._run_generation(prompt, modality, model_name)
+        gen_data = self._run_generation(prompt, task, model_name, image)
         self.clear_gpu()
         return gen_data
 

From 93461d2b1a7d3e20e1a972653ce6c3a7fa9b329a Mon Sep 17 00:00:00 2001
From: Dylan Uys <dylan.uys@gmail.com>
Date: Tue, 7 Jan 2025 19:25:55 +0000
Subject: [PATCH 11/11] option to log masks/original images for i2i challenges

---
 .../synthetic_data_generation/synthetic_data_generator.py   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index b5d90fb0..8899a5f5 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -162,7 +162,7 @@ def batch_generate(self, batch_size: int = 5) -> None:
 
                 bt.logging.info(f'Writing to cache {self.output_dir}')
                 base_path = self.output_dir / task / str(output['time'])
-                metadata = {k: v for k, v in output.items() if k != 'gen_output'}
+                metadata = {k: v for k, v in output.items() if k != 'gen_output' and 'image' not in k}
                 base_path.with_suffix('.json').write_text(json.dumps(metadata))
 
                 if modality == 'image':
@@ -333,7 +333,9 @@ def _run_generation(
             'gen_output': gen_output,  # image or video
             'time': time.time(),
             'model_name': self.model_name,
-            'gen_time': gen_time
+            'gen_time': gen_time,
+            'mask_image': gen_args.get('mask_image', None),
+            'image': gen_args.get('image', None)
         }
 
     def load_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None: