diff --git a/bitmind/__init__.py b/bitmind/__init__.py
index da534563..bb041705 100644
--- a/bitmind/__init__.py
+++ b/bitmind/__init__.py
@@ -18,7 +18,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 
-__version__ = "2.1.3"
+__version__ = "2.1.4"
 version_split = __version__.split(".")
 __spec_version__ = (
     (1000 * int(version_split[0]))
diff --git a/bitmind/synthetic_data_generation/image_utils.py b/bitmind/synthetic_data_generation/image_utils.py
index c8345d57..4593edb4 100644
--- a/bitmind/synthetic_data_generation/image_utils.py
+++ b/bitmind/synthetic_data_generation/image_utils.py
@@ -75,35 +75,35 @@ def create_random_mask(size: Tuple[int, int]) -> Image.Image:
         height = np.random.randint(h//4, h//2)
 
         # Center the rectangle with some random offset
-        x1 = (w - width) // 2 + np.random.randint(-width//4, width//4)
-        y1 = (h - height) // 2 + np.random.randint(-height//4, height//4)
+        x = (w - width) // 2 + np.random.randint(-width//4, width//4)
+        y = (h - height) // 2 + np.random.randint(-height//4, height//4)
 
         # Create mask with PIL draw for smoother edges
         draw = ImageDraw.Draw(mask)
         draw.rounded_rectangle(
-            [x1, y1, x1 + width, y1 + height],
+            [x, y, x + width, y + height],
             radius=min(width, height) // 10,  # Smooth corners
             fill='white'
         )
     else:
         # Circular mask with feathered edges
         draw = ImageDraw.Draw(mask)
-        center_x = w//2
-        center_y = h//2
+        x = w//2
+        y = h//2
 
         # Make radius proportional to image size
         radius = min(w, h) // 4
 
         # Add small random offset to center
-        center_x += np.random.randint(-radius//4, radius//4)
-        center_y += np.random.randint(-radius//4, radius//4)
+        x += np.random.randint(-radius//4, radius//4)
+        y += np.random.randint(-radius//4, radius//4)
 
         # Draw multiple circles with decreasing opacity for feathered edge
         for r in range(radius, radius-10, -1):
             opacity = int(255 * (r - (radius-10)) / 10)
             draw.ellipse(
-                [center_x-r, center_y-r, center_x+r, center_y+r],
+                [x-r, y-r, x+r, y+r],
                 fill=(255, 255, 255, opacity)
             )
 
-    return mask
+    return mask, (x, y)
diff --git a/bitmind/synthetic_data_generation/prompt_generator.py b/bitmind/synthetic_data_generation/prompt_generator.py
index a8083b87..a8b8bc09 100644
--- a/bitmind/synthetic_data_generation/prompt_generator.py
+++ b/bitmind/synthetic_data_generation/prompt_generator.py
@@ -185,8 +185,7 @@ def generate(
             description += '.'
 
         moderated_description = self.moderate(description)
-        enhanced_description = self.enhance(description)
-        return enhanced_description
+        return self.enhance(moderated_description)
 
     def moderate(self, description: str, max_new_tokens: int = 80) -> str:
         """
diff --git a/bitmind/synthetic_data_generation/prompt_utils.py b/bitmind/synthetic_data_generation/prompt_utils.py
index 7c5ce81e..f2407f88 100644
--- a/bitmind/synthetic_data_generation/prompt_utils.py
+++ b/bitmind/synthetic_data_generation/prompt_utils.py
@@ -1,20 +1,23 @@
-
-
 def get_tokenizer_with_min_len(model):
     """
-    Returns the tokenizer with the smallest maximum token length from the 't2vis_model` object.
-
-    If a second tokenizer exists, it compares both and returns the one with the smaller 
-    maximum token length. Otherwise, it returns the available tokenizer.
+    Returns the tokenizer with the smallest maximum token length.
     
+    Args:
+        model: Single pipeline or dict of pipeline stages.
+        
     Returns:
-        tuple: A tuple containing the tokenizer and its maximum token length.
+        tuple: (tokenizer, max_token_length)
     """
-    # Check if a second tokenizer is available in the t2vis_model
-    if hasattr(model, 'tokenizer_2'):
-        if model.tokenizer.model_max_length > model.tokenizer_2.model_max_length:
-            return model.tokenizer_2, model.tokenizer_2.model_max_length
-    return model.tokenizer, model.tokenizer.model_max_length
+    # Get the model to check for tokenizers
+    pipeline = model['stage1'] if isinstance(model, dict) else model
+    
+    # If model has two tokenizers, return the one with smaller max length
+    if hasattr(pipeline, 'tokenizer_2'):
+        len_1 = pipeline.tokenizer.model_max_length
+        len_2 = pipeline.tokenizer_2.model_max_length
+        return (pipeline.tokenizer_2, len_2) if len_2 < len_1 else (pipeline.tokenizer, len_1)
+    
+    return pipeline.tokenizer, pipeline.tokenizer.model_max_length
 
 
 def truncate_prompt_if_too_long(prompt: str, model):
diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py
index 7bce9d7a..fd4e1cd3 100644
--- a/bitmind/synthetic_data_generation/synthetic_data_generator.py
+++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py
@@ -33,6 +33,13 @@
 from bitmind.synthetic_data_generation.prompt_utils import truncate_prompt_if_too_long
 from bitmind.synthetic_data_generation.prompt_generator import PromptGenerator
 from bitmind.validator.cache import ImageCache
+from bitmind.validator.model_utils import (
+    load_hunyuanvideo_transformer,
+    load_annimatediff_motion_adapter,
+    JanusWrapper,
+    create_pipeline_generator,
+    enable_model_optimizations
+)
 
 
 future_warning_modules_to_ignore = [
@@ -135,24 +142,30 @@ def batch_generate(self, batch_size: int = 5) -> None:
         prompts = []
         images = []
         bt.logging.info(f"Generating {batch_size} prompts")
+        
+        # Generate all prompts first
         for i in range(batch_size):
             image_sample = self.image_cache.sample()
             images.append(image_sample['image'])
             bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}")
             prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1))
             bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}")
-
-        # shuffle and interleave models to add stochasticity to initial validator challenges
-        i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES))
-        t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES))
-        t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES))
-        model_names_interleaved = [
-            m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) 
-            for m in triple if m is not None
-        ]
-
-        # for each model, generate an image/video from the prompt generated for its specific tokenizer max len
-        for model_name in model_names_interleaved:
+            
+        # If specific model is set, use only that model
+        if not self.use_random_model and self.model_name:
+            model_names = [self.model_name]
+        else:
+            # shuffle and interleave models to add stochasticity
+            i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES))
+            t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES))
+            t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES))
+            model_names = [
+                m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) 
+                for m in triple if m is not None
+            ]
+
+        # Generate for each model/prompt combination
+        for model_name in model_names:
             modality = get_modality(model_name)
             task = get_task(model_name)
             for i, prompt in enumerate(prompts):
@@ -232,7 +245,6 @@ def _run_generation(
         model_name: Optional[str] = None,
         image: Optional[Image.Image] = None,
         generate_at_target_size: bool = False,
-
     ) -> Dict[str, Any]:
         """
         Generate synthetic data based on a text prompt.
@@ -256,6 +268,7 @@ def _run_generation(
 
         bt.logging.info("Preparing generation arguments")
         gen_args = model_config.get('generate_args', {}).copy()
+        mask_center = None
 
         # prep inpainting-specific generation args
         if task == 'i2i':
@@ -264,7 +277,7 @@ def _run_generation(
             if image.size[0] > target_size[0] or image.size[1] > target_size[1]:
                 image = image.resize(target_size, Image.Resampling.LANCZOS)
 
-            gen_args['mask_image'] = create_random_mask(image.size)
+            gen_args['mask_image'], mask_center = create_random_mask(image.size)
             gen_args['image'] = image
 
         # Prepare generation arguments
@@ -284,28 +297,24 @@ def _run_generation(
                 gen_args['width'] = gen_args['resolution'][1]
                 del gen_args['resolution']
 
-            truncated_prompt = truncate_prompt_if_too_long(
-                prompt,
-                self.model
-            )
-
+            truncated_prompt = truncate_prompt_if_too_long(prompt, self.model)
             bt.logging.info(f"Generating media from prompt: {truncated_prompt}")
             bt.logging.info(f"Generation args: {gen_args}")
 
             start_time = time.time()
+            
+            # Create pipeline-specific generator
+            generate = create_pipeline_generator(model_config, self.model)
+            
+            # Handle autocast if needed
             if model_config.get('use_autocast', True):
                 pretrained_args = model_config.get('from_pretrained_args', {})
                 torch_dtype = pretrained_args.get('torch_dtype', torch.bfloat16)
-                with torch.autocast(self.device, torch_dtype, cache_enabled=False): 
-                    gen_output = self.model(
-                        prompt=truncated_prompt,
-                        **gen_args
-                    )
+                with torch.autocast(self.device, torch_dtype, cache_enabled=False):
+                    gen_output = generate(truncated_prompt, **gen_args)
             else:
-                gen_output = self.model(
-                    prompt=truncated_prompt,
-                    **gen_args
-                )
+                gen_output = generate(truncated_prompt, **gen_args)
+                
             gen_time = time.time() - start_time
 
         except Exception as e:
@@ -338,78 +347,90 @@ def _run_generation(
             'model_name': self.model_name,
             'gen_time': gen_time,
             'mask_image': gen_args.get('mask_image', None),
+            'mask_center': mask_center,
             'image': gen_args.get('image', None)
         }
 
     def load_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None:
-        """Load a Hugging Face text-to-image or text-to-video model to a specific GPU."""
+        """Load a Hugging Face text-to-image or text-to-video model."""
         if model_name is not None:
             self.model_name = model_name
         elif self.use_random_model or model_name == 'random':
-            model_name = select_random_model(modality)
-            self.model_name = model_name
+            self.model_name = select_random_model(modality)
 
         bt.logging.info(f"Loading {self.model_name}")
+        
+        model_config = MODELS[self.model_name]
+        pipeline_cls = model_config['pipeline_cls']
+        pipeline_args = model_config['from_pretrained_args'].copy()
 
-        pipeline_cls = MODELS[model_name]['pipeline_cls']
-        pipeline_args = MODELS[model_name]['from_pretrained_args']
+        # Handle custom loading functions passed as tuples
         for k, v in pipeline_args.items():
             if isinstance(v, tuple) and callable(v[0]):
                 pipeline_args[k] = v[0](**v[1])
 
-        if 'model_id' in pipeline_args:
-            model_id = pipeline_args['model_id']
-            del pipeline_args['model_id']
-        else:
-            model_id = model_name
+        # Get model_id if specified, otherwise use model_name
+        model_id = pipeline_args.pop('model_id', self.model_name)
+
+        # Handle multi-stage pipeline
+        if isinstance(pipeline_cls, dict):
+            self.model = {}
+            for stage_name, stage_cls in pipeline_cls.items():
+                stage_args = pipeline_args.get(stage_name, {})
+                base_model = stage_args.get('base', model_id)
+                stage_args_filtered = {k:v for k,v in stage_args.items() if k != 'base'}
+                
+                bt.logging.info(f"Loading {stage_name} from {base_model}")
+                self.model[stage_name] = stage_cls.from_pretrained(
+                    base_model,
+                    cache_dir=HUGGINGFACE_CACHE_DIR,
+                    **stage_args_filtered,
+                    add_watermarker=False
+                )
+                
+                enable_model_optimizations(
+                    model=self.model[stage_name],
+                    device=self.device,
+                    enable_cpu_offload=model_config.get('enable_model_cpu_offload', False),
+                    enable_sequential_cpu_offload=model_config.get('enable_sequential_cpu_offload', False),
+                    enable_vae_slicing=model_config.get('vae_enable_slicing', False),
+                    enable_vae_tiling=model_config.get('vae_enable_tiling', False),
+                    stage_name=stage_name
+                )
 
-        self.model = pipeline_cls.from_pretrained(
-            model_id,
-            cache_dir=HUGGINGFACE_CACHE_DIR,
-            **pipeline_args,
-            add_watermarker=False
-        )
+                # Disable watermarker
+                self.model[stage_name].watermarker = None
+        else:
+            # Single-stage pipeline
+            self.model = pipeline_cls.from_pretrained(
+                model_id,
+                cache_dir=HUGGINGFACE_CACHE_DIR,
+                **pipeline_args,
+                add_watermarker=False
+            )
 
-        self.model.set_progress_bar_config(disable=True)
+            # Load scheduler if specified
+            if 'scheduler' in model_config:
+                sched_cls = model_config['scheduler']['cls']
+                sched_args = model_config['scheduler']['from_config_args']
+                self.model.scheduler = sched_cls.from_config(
+                    self.model.scheduler.config,
+                    **sched_args
+                )
 
-        # Load scheduler if specified
-        if 'scheduler' in MODELS[model_name]:
-            sched_cls = MODELS[model_name]['scheduler']['cls']
-            sched_args = MODELS[model_name]['scheduler']['from_config_args']
-            self.model.scheduler = sched_cls.from_config(
-                self.model.scheduler.config,
-                **sched_args
+            enable_model_optimizations(
+                model=self.model,
+                device=self.device,
+                enable_cpu_offload=model_config.get('enable_model_cpu_offload', False),
+                enable_sequential_cpu_offload=model_config.get('enable_sequential_cpu_offload', False),
+                enable_vae_slicing=model_config.get('vae_enable_slicing', False),
+                enable_vae_tiling=model_config.get('vae_enable_tiling', False)
             )
 
-        # Configure model optimizations
-        model_config = MODELS[model_name]
-        if model_config.get('enable_model_cpu_offload', False):
-            bt.logging.info(f"Enabling cpu offload for {model_name}")
-            self.model.enable_model_cpu_offload()
-        if model_config.get('enable_sequential_cpu_offload', False):
-            bt.logging.info(f"Enabling sequential cpu offload for {model_name}")
-            self.model.enable_sequential_cpu_offload()
-        if model_config.get('vae_enable_slicing', False):
-            bt.logging.info(f"Enabling vae slicing for {model_name}")
-            try:
-                self.model.vae.enable_slicing()
-            except Exception:
-                try:
-                    self.model.enable_vae_slicing()
-                except Exception:
-                    bt.logging.warning(f"Could not enable vae slicing for {self.model}")
-        if model_config.get('vae_enable_tiling', False):
-            bt.logging.info(f"Enabling vae tiling for {model_name}")
-            try:
-                self.model.vae.enable_tiling()
-            except Exception:
-                try:
-                    self.model.enable_vae_tiling()
-                except Exception:
-                    bt.logging.warning(f"Could not enable vae tiling for {self.model}")
+            # Disable watermarker
+            self.model.watermarker = None
 
-        self.model.to(self.device)
-        bt.logging.info(f"Loaded {model_name} using {pipeline_cls.__name__}.")
+        bt.logging.info(f"Loaded {self.model_name}")
 
     def clear_gpu(self) -> None:
         """Clear GPU memory by deleting models and running garbage collection."""
diff --git a/bitmind/synthetic_data_generation/test_in_painting.py b/bitmind/synthetic_data_generation/test_in_painting.py
deleted file mode 100644
index ebb1e021..00000000
--- a/bitmind/synthetic_data_generation/test_in_painting.py
+++ /dev/null
@@ -1,152 +0,0 @@
-import argparse
-from pathlib import Path
-from PIL import Image
-import bittensor as bt
-import numpy as np
-
-from bitmind.synthetic_data_generation.in_painting_generator import InPaintingGenerator
-
-def create_inpaint_only_image(original: Image.Image, mask: Image.Image, result: Image.Image) -> Image.Image:
-    """Create an image showing only the inpainted region."""
-    # Ensure all images are the same size as the result
-    size = result.size
-    original = original.resize(size, Image.Resampling.LANCZOS)
-    mask = mask.resize(size, Image.Resampling.LANCZOS)
-    
-    # Convert mask to boolean array (True where white/inpainted)
-    mask_array = np.array(mask.convert('L')) > 128
-    
-    # Create blank (black) image
-    inpaint_only = Image.new('RGB', size, 'black')
-    
-    # Copy only the inpainted region
-    inpaint_array = np.array(result)
-    inpaint_only_array = np.array(inpaint_only)
-    inpaint_only_array[mask_array] = inpaint_array[mask_array]
-    
-    return Image.fromarray(inpaint_only_array)
-
-
-def create_final_image(original: Image.Image, mask: Image.Image, inpainted: Image.Image) -> Image.Image:
-    """
-    Create the final image by combining original and inpainted regions.
-    Only use inpainted content where the mask is white.
-    """
-    # Ensure all images are in RGB mode
-    original = original.convert('RGB')
-    inpainted = inpainted.convert('RGB')
-    
-    # Ensure all images are the same size
-    size = original.size
-    mask = mask.resize(size, Image.Resampling.LANCZOS)
-    inpainted = inpainted.resize(size, Image.Resampling.LANCZOS)
-    
-    # Convert to numpy arrays with float32 for precise calculations
-    original_array = np.array(original, dtype=np.float32)
-    inpainted_array = np.array(inpainted, dtype=np.float32)
-    
-    # Create mask array (values between 0 and 1)
-    mask_array = np.array(mask.convert('L'), dtype=np.float32) / 255.0
-    
-    # Expand mask dimensions to match RGB
-    mask_array = np.expand_dims(mask_array, axis=-1)
-    
-    # Blend images using the mask as alpha
-    result_array = (original_array * (1 - mask_array) + 
-                   inpainted_array * mask_array)
-    
-    # Convert back to uint8 with proper rounding
-    result_array = np.clip(np.round(result_array), 0, 255).astype(np.uint8)
-    
-    return Image.fromarray(result_array, mode='RGB')
-
-
-def main():
-    parser = argparse.ArgumentParser(description='Test InPainting Generator on a single image')
-    parser.add_argument('--image_path', type=str, required=True, help='Path to input image')
-    parser.add_argument('--output_dir', type=str, default='output', help='Directory to save outputs')
-    parser.add_argument('--custom_prompt', type=str, help='Optional custom prompt to use')
-    parser.add_argument('--device', type=str, default='cuda', help='Device to run on (cuda/cpu)')
-    args = parser.parse_args()
-
-    # Create output directory
-    output_dir = Path(args.output_dir)
-    output_dir.mkdir(parents=True, exist_ok=True)
-
-    try:
-        # Load image
-        bt.logging.info(f"Loading image from {args.image_path}")
-        image = Image.open(args.image_path)
-        
-        # Initialize generator
-        generator = InPaintingGenerator(
-            use_random_i2i_model=True,
-            output_dir=output_dir,
-            device=args.device
-        )
-        
-        # Generate transformation
-        result = generator.run_i2i(
-            prompt=args.custom_prompt if args.custom_prompt else generator.generate_prompt(image),
-            original_image=image
-        )
-        
-        # Get final size from generated image
-        final_size = result['gen_output'].images[0].size
-        
-        # Save outputs
-        timestamp = str(int(result['time']))
-        
-        # 1. Save the original image (resized to match output)
-        original_resized = image.resize(final_size, Image.Resampling.LANCZOS)
-        original_path = output_dir / f"1_original_{timestamp}.png"
-        original_resized.save(original_path)
-        bt.logging.info(f"Saved original image to {original_path}")
-        
-        # 2. Save the mask (resized to match output)
-        mask = generator.create_random_mask(final_size)
-        mask_path = output_dir / f"2_mask_{timestamp}.png"
-        mask.save(mask_path)
-        bt.logging.info(f"Saved mask to {mask_path}")
-        
-        # 3. Save the inpainted region only
-        inpaint_only = create_inpaint_only_image(
-            original_resized,
-            mask,
-            result['gen_output'].images[0]
-        )
-        inpaint_path = output_dir / f"3_inpaint_only_{timestamp}.png"
-        inpaint_only.save(inpaint_path)
-        bt.logging.info(f"Saved inpainted region to {inpaint_path}")
-        
-        # 4. Save the final transformed image (properly combined)
-        final_image = create_final_image(
-            original_resized,
-            mask,
-            result['gen_output'].images[0]
-        )
-        final_path = output_dir / f"4_final_{timestamp}.png"
-        final_image.save(final_path)
-        bt.logging.info(f"Saved final image to {final_path}")
-        
-        # Save the prompt/caption to a text file
-        caption_path = output_dir / f"caption_{timestamp}.txt"
-        with open(caption_path, 'w') as f:
-            f.write(f"Generated Prompt: {result['prompt']}\n")
-            if result['prompt'] != result['prompt_long']:
-                f.write(f"Full Prompt: {result['prompt_long']}\n")
-            f.write(f"\nGeneration Time: {result['gen_time']:.2f} seconds\n")
-            f.write(f"Model: {result['model_name']}\n")
-        bt.logging.info(f"Saved caption to {caption_path}")
-        
-        # Print generation info
-        bt.logging.info(f"Prompt used: {result['prompt']}")
-        bt.logging.info(f"Generation time: {result['gen_time']:.2f} seconds")
-        
-    except Exception as e:
-        bt.logging.error(f"Error during inpainting generation: {e}")
-        raise
-
-
-if __name__ == "__main__":
-    main() 
\ No newline at end of file
diff --git a/bitmind/utils/image_transforms.py b/bitmind/utils/image_transforms.py
index 7accca7c..5e79a9dd 100644
--- a/bitmind/utils/image_transforms.py
+++ b/bitmind/utils/image_transforms.py
@@ -18,15 +18,37 @@ def fn(img):
 
 
 class RandomResizedCropWithParams(transforms.RandomResizedCrop):
-    def __init__(self, *args, **kwargs):
+    def __init__(self, *args, include_point=None, **kwargs):
         super().__init__(*args, **kwargs)
         self.params = None
+        self.include_point = include_point
+        print(f"created RRC with point included: {self.include_point}")
 
     def forward(self, img, crop_params=None):
+        """
+        Args:
+            img: PIL Image to be cropped and resized
+            crop_params: Optional pre-computed crop parameters (i, j, h, w)
+        """
         if crop_params is None:
             i, j, h, w = super().get_params(img, self.scale, self.ratio)
+            if self.include_point is not None:
+                x, y = self.include_point
+                width, height = img.shape[1:]
+
+                # adjust crop to keep mask point
+                if x < j:
+                    j = max(0, x - 10)
+                elif x > j + w: 
+                    j = min(width - w, x - w + 10)
+
+                if y < i:
+                    i = max(0, y - 10)
+                elif y > i + h:
+                    i = min(height - h, y - h + 10)
         else:
             i, j, h, w = crop_params
+
         self.params = {'crop_params': (i, j, h, w)}
         return F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias)
 
@@ -34,29 +56,35 @@ def forward(self, img, crop_params=None):
 class RandomHorizontalFlipWithParams(transforms.RandomHorizontalFlip):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.params = None
+        self.params = {}
 
-    def forward(self, img, do_flip=False):
-        if do_flip or (torch.rand(1) < self.p):
-            self.params = {'do_flip': True}
-            return transforms.functional.hflip(img)
+    def forward(self, img, do_flip=None):
+        if do_flip is not None:
+            self.params = {'do_flip': do_flip}
+            return transforms.functional.hflip(img) if do_flip else img
+        elif not hasattr(self, 'params'):
+            do_flip = torch.rand(1) < self.p
+            self.params = {'do_flip': do_flip}
+            return transforms.functional.hflip(img) if do_flip else img
         else:
-            self.params = {'do_flip': False}
-            return img
+            return transforms.functional.hflip(img) if self.params.get('do_flip', False) else img
 
 
 class RandomVerticalFlipWithParams(transforms.RandomVerticalFlip):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.params = None
+        self.params = {}
 
-    def forward(self, img, do_flip=True):
-        if do_flip or (torch.rand(1) < self.p):
-            self.params = {'do_flip': True}
-            return transforms.functional.vflip(img)
+    def forward(self, img, do_flip=None):
+        if do_flip is not None:
+            self.params = {'do_flip': do_flip}
+            return transforms.functional.vflip(img) if do_flip else img
+        elif not hasattr(self, 'params'):
+            do_flip = torch.rand(1) < self.p
+            self.params = {'do_flip': do_flip}
+            return transforms.functional.vflip(img) if do_flip else img
         else:
-            self.params = {'do_flip': False}
-            return img
+            return transforms.functional.vflip(img) if self.params.get('do_flip', False) else img
 
 
 class RandomRotationWithParams(transforms.RandomRotation):
@@ -134,6 +162,7 @@ def get_distortion_parameter(distortion_type, level):
     }
     return param_dict[distortion_type][level - 1]
 
+
 def get_distortion_function(distortion_type):
     """Get distortion function based on type."""
     func_dict = {
@@ -146,6 +175,7 @@ def get_distortion_function(distortion_type):
     }
     return func_dict[distortion_type]
 
+
 def rgb_to_bgr(tensor_img):
     """Convert a PyTorch tensor image from RGB to BGR format.
     
@@ -156,6 +186,7 @@ def rgb_to_bgr(tensor_img):
         tensor_img = tensor_img[[2, 1, 0], ...]
     return tensor_img
 
+
 def bgr_to_rgb(tensor_img):
     """Convert a PyTorch tensor image from BGR to RGB format.
     
@@ -166,6 +197,7 @@ def bgr_to_rgb(tensor_img):
         tensor_img = tensor_img[[2, 1, 0], ...]
     return tensor_img
 
+
 def bgr2ycbcr(img_bgr):
     """Convert BGR image to YCbCr color space."""
     img_bgr = img_bgr.astype(np.float32)
@@ -175,6 +207,7 @@ def bgr2ycbcr(img_bgr):
     img_ycbcr[:, :, 1:] = (img_ycbcr[:, :, 1:] * (240 - 16) + 16) / 255.0
     return img_ycbcr
 
+
 def ycbcr2bgr(img_ycbcr):
     """Convert YCbCr image to BGR color space."""
     img_ycbcr = img_ycbcr.astype(np.float32)
@@ -184,6 +217,7 @@ def ycbcr2bgr(img_ycbcr):
     img_bgr = cv2.cvtColor(img_ycrcb, cv2.COLOR_YCR_CB2BGR)
     return img_bgr
 
+
 def color_saturation(img, param):
     """Apply color saturation distortion."""
     ycbcr = bgr2ycbcr(img)
@@ -192,11 +226,13 @@ def color_saturation(img, param):
     img = ycbcr2bgr(ycbcr).astype(np.uint8)
     return img
 
+
 def color_contrast(img, param):
     """Apply color contrast distortion."""
     img = img.astype(np.float32) * param
     return img.astype(np.uint8)
 
+
 def block_wise(img, param):
     """Apply block-wise distortion."""
     width = 8
@@ -208,6 +244,7 @@ def block_wise(img, param):
         img[r_h:r_h + width, r_w:r_w + width, :] = block
     return img
 
+
 def gaussian_noise_color(img, param):
     """Apply colored Gaussian noise."""
     ycbcr = bgr2ycbcr(img) / 255
@@ -216,10 +253,12 @@ def gaussian_noise_color(img, param):
     b = ycbcr2bgr(b)
     return np.clip(b, 0, 255).astype(np.uint8)
 
+
 def gaussian_blur(img, param):
     """Apply Gaussian blur."""
     return cv2.GaussianBlur(img, (param, param), param * 1.0 / 6)
 
+
 def jpeg_compression(img, param):
     """Apply JPEG compression distortion."""
     h, w, _ = img.shape
@@ -237,8 +276,12 @@ def __init__(self, distortion_type, level_min=0, level_max=3):
         self.level_min = level_min
         self.level_max = level_max
 
-    def __call__(self, img):
-        self.level = random.randint(self.level_min, self.level_max)
+    def __call__(self, img, level=None):
+        if level is None:
+            self.level = random.randint(self.level_min, self.level_max)
+        else:
+            self.level = level
+
         if self.level > 0:
             self.distortion_param = get_distortion_parameter(self.distortion_type, self.level)
             self.distortion_func = get_distortion_function(self.distortion_type)
@@ -312,13 +355,10 @@ def __init__(self, transforms):
         self.transforms = transforms
         self.params = {}
 
-    def __call__(self, input_data):
-        transform_params = {
-            RandomResizedCropWithParams: 'RandomResizedCrop',
-            RandomHorizontalFlipWithParams: 'RandomHorizontalFlip',
-            RandomVerticalFlipWithParams: 'RandomVerticalFlip',
-            RandomRotationWithParams: 'RandomRotation'
-        }
+    def __call__(self, input_data, clear_params=True):
+        if clear_params:
+            self.params = {}
+
         output_data = []
         list_input = True
         if not isinstance(input_data, list):
@@ -326,14 +366,18 @@ def __call__(self, input_data):
             list_input = False
 
         for img in input_data:
-            for t in self.transforms:
-                if type(t) in transform_params and transform_params[type(t)] in self.params:
-                    params = self.params[transform_params[type(t)]]
-                    img = t(img, **params)
+            for transform in self.transforms:
+                try:
+                    name = transform.__name__
+                except AttributeError:
+                    name = transform.__class__.__name__
+
+                if name in self.params:
+                    img = transform(img, **self.params[name])
                 else:
-                    img = t(img)
-                    if type(t) in transform_params:
-                        self.params[transform_params[type(t)]] = t.params
+                    img = transform(img)
+                    if hasattr(transform, 'params'):
+                        self.params[name] = transform.params
             output_data.append(img)
 
         if list_input:
@@ -351,12 +395,13 @@ def get_base_transforms(target_image_size=TARGET_IMAGE_SIZE):
     ])
 
 
-def get_random_augmentations(target_image_size=TARGET_IMAGE_SIZE):
+def get_random_augmentations(target_image_size=TARGET_IMAGE_SIZE, mask_point=None):
     return ComposeWithParams([
         ConvertToRGB(),
         transforms.ToTensor(),
         RandomRotationWithParams(20, interpolation=transforms.InterpolationMode.BILINEAR),
-        RandomResizedCropWithParams(TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0)),
+        RandomResizedCropWithParams(
+            TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0), include_point=mask_point),
         RandomHorizontalFlipWithParams(),
         RandomVerticalFlipWithParams()
     ])
@@ -378,12 +423,13 @@ def get_tall_base_transforms(target_image_size=TARGET_IMAGE_SIZE):
     ])
 
 # Medium difficulty transforms with mild distortions
-def get_random_augmentations_medium(target_image_size=TARGET_IMAGE_SIZE):
+def get_random_augmentations_medium(target_image_size=TARGET_IMAGE_SIZE, mask_point=None):
     return ComposeWithParams([
         ConvertToRGB(),
         transforms.ToTensor(),
         RandomRotationWithParams(20, interpolation=transforms.InterpolationMode.BILINEAR),
-        RandomResizedCropWithParams(target_image_size, scale=(0.2, 1.0), ratio=(1.0, 1.0)),
+        RandomResizedCropWithParams(
+            TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0), include_point=mask_point),
         RandomHorizontalFlipWithParams(),
         RandomVerticalFlipWithParams(),
         ApplyDeeperForensicsDistortion('CS', level_min=0, level_max=1),
@@ -392,12 +438,13 @@ def get_random_augmentations_medium(target_image_size=TARGET_IMAGE_SIZE):
     ])
 
 # Hard difficulty transforms with more severe distortions
-def get_random_augmentations_hard(target_image_size=TARGET_IMAGE_SIZE):
+def get_random_augmentations_hard(target_image_size=TARGET_IMAGE_SIZE, mask_point=None):
     return ComposeWithParams([
         ConvertToRGB(),
         transforms.ToTensor(), 
         RandomRotationWithParams(20, interpolation=transforms.InterpolationMode.BILINEAR),
-        RandomResizedCropWithParams(target_image_size, scale=(0.2, 1.0), ratio=(1.0, 1.0)),
+        RandomResizedCropWithParams(
+            TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0), include_point=mask_point),
         RandomHorizontalFlipWithParams(),
         RandomVerticalFlipWithParams(),
         ApplyDeeperForensicsDistortion('CS', level_min=0, level_max=2),
@@ -408,7 +455,11 @@ def get_random_augmentations_hard(target_image_size=TARGET_IMAGE_SIZE):
     ])
 
 
-def apply_augmentation_by_level(image, target_image_size, level_probs={
+def apply_augmentation_by_level(
+    image, 
+    target_image_size, 
+    mask_point=None, 
+    level_probs={
         0: 0.25,  # No augmentations (base transforms)
         1: 0.25,  # Basic augmentations
         2: 0.25,  # Medium distortions
@@ -449,16 +500,16 @@ def apply_augmentation_by_level(image, target_image_size, level_probs={
         if rand_val <= cum_prob:
             level = curr_level
             break
-    
+
     # Apply appropriate transform
     if level == 0:
         tforms = get_base_transforms(target_image_size)
     elif level == 1:
-        tforms = get_random_augmentations(target_image_size)
+        tforms = get_random_augmentations(target_image_size, mask_point)
     elif level == 2:
-        tforms = get_random_augmentations_medium(target_image_size)
+        tforms = get_random_augmentations_medium(target_image_size, mask_point)
     else:  # level == 3
-        tforms = get_random_augmentations_hard(target_image_size)
+        tforms = get_random_augmentations_hard(target_image_size, mask_point)
 
     transformed = tforms(image)
         
diff --git a/bitmind/validator/cache/image_cache.py b/bitmind/validator/cache/image_cache.py
index 2d583373..7ec81340 100644
--- a/bitmind/validator/cache/image_cache.py
+++ b/bitmind/validator/cache/image_cache.py
@@ -126,7 +126,8 @@ def sample(self, remove_from_cache=False) -> Optional[Dict[str, Any]]:
                     'image': image,
                     'path': str(image_path),
                     'dataset': metadata.get('dataset', None),
-                    'index': metadata.get('index', None)
+                    'index': metadata.get('index', None),
+                    'mask_center': metadata.get('mask_center', None)
                 }
 
             except Exception as e:
diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py
index ee697d1a..3f225297 100644
--- a/bitmind/validator/config.py
+++ b/bitmind/validator/config.py
@@ -12,10 +12,16 @@
     HunyuanVideoPipeline,
     AnimateDiffPipeline,
     EulerDiscreteScheduler,
-    AutoPipelineForInpainting
+    AutoPipelineForInpainting,
+    IFPipeline,
+    IFSuperResolutionPipeline
 )
 
-from .model_utils import load_annimatediff_motion_adapter, load_hunyuanvideo_transformer
+from .model_utils import (
+    load_annimatediff_motion_adapter,
+    load_hunyuanvideo_transformer,
+    JanusWrapper
+)
 
 
 TARGET_IMAGE_SIZE: tuple[int, int] = (256, 256)
@@ -146,7 +152,72 @@
             "use_safetensors": True,
             "torch_dtype": torch.float16,
         },
-    }
+    },
+    "DeepFloyd/IF": {
+        "pipeline_cls": {
+            "stage1": IFPipeline,
+            "stage2": IFSuperResolutionPipeline
+        },
+        "from_pretrained_args": {
+            "stage1": {
+                "base": "DeepFloyd/IF-I-XL-v1.0",
+                "torch_dtype": torch.float16,
+                "variant": "fp16",
+                "clean_caption": False,
+                "watermarker": None,
+                "requires_safety_checker": False
+            },
+            "stage2": {
+                "base": "DeepFloyd/IF-II-L-v1.0",
+                "torch_dtype": torch.float16,
+                "variant": "fp16",
+                "text_encoder": None,
+                "watermarker": None,
+                "requires_safety_checker": False
+            }
+        },
+        "pipeline_stages": [
+            {
+                "name": "stage1",
+                "args": {
+                    "output_type": "pt",
+                    "num_images_per_prompt": 1,
+                    "return_dict": True
+                },
+                "output_attr": "images",
+                "output_transform": lambda x: x[0].unsqueeze(0),
+                "save_prompt_embeds": True
+            },
+            {
+                "name": "stage2",
+                "input_key": "image",
+                "args": {
+                    "output_type": "pil",
+                    "num_images_per_prompt": 1
+                },
+                "output_attr": "images",
+                "use_prompt_embeds": True
+            }
+        ],
+        "clear_memory_on_stage_end": True
+    },
+    "deepseek-ai/Janus-Pro-7B": {
+        "pipeline_cls": JanusWrapper,
+        "from_pretrained_args": {
+            "torch_dtype": torch.bfloat16,
+            "use_safetensors": True,
+        },
+        "generate_args": {
+            "temperature": 1.0,
+            "parallel_size": 4,
+            "cfg_weight": 5.0,
+            "image_token_num_per_image": 576,
+            "img_size": 384,
+            "patch_size": 16
+        },
+        "use_autocast": False,
+        "enable_model_cpu_offload": False
+    },
 }
 T2I_MODEL_NAMES: List[str] = list(T2I_MODELS.keys())
 
@@ -223,8 +294,7 @@
             "guidance_scale": 2,
             "num_videos_per_prompt": 1,
             "num_inference_steps": {"min": 50, "max": 125},
-            "num_frames": 48,
-            "resolution": [720, 480]
+            "num_frames": 48
         },
         "save_args": {"fps": 8},
         "enable_model_cpu_offload": True,
@@ -310,4 +380,3 @@ def select_random_model(task: Optional[str] = None) -> str:
         return np.random.choice(I2I_MODEL_NAMES)
     else:
         raise NotImplementedError(f"Unsupported task: {task}")
-
diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py
index 82b6aaf0..ba955de9 100644
--- a/bitmind/validator/forward.py
+++ b/bitmind/validator/forward.py
@@ -19,6 +19,7 @@
 
 import random
 import time
+import re
 
 import numpy as np
 import pandas as pd
@@ -47,6 +48,27 @@ def determine_challenge_type(media_cache, fake_prob=0.5):
     return label, modality, task, cache
 
 
+def sample_video_frames(video_cache, min_frames, max_frames, min_fps=8, max_fps=30):
+    if np.random.rand() > 0.2:
+        num_frames = random.randint(min_frames, max_frames)
+        challenge = video_cache.sample(num_frames, min_fps=min_fps, max_fps=max_fps)
+
+    else:
+        num_frames_A = random.randint(min_frames, max_frames - 1)
+        sample_A = video_cache.sample(num_frames_A, min_fps=min_fps, max_fps=max_fps)
+        if sample_A is None:
+            return None
+        num_frames_B = random.randint(min_frames, max(max_frames - num_frames_A, min_frames + 1))
+        sample_B = video_cache.sample(num_frames_B, fps=sample_A['fps'])
+        challenge = {
+            'videos': [sample_A['video'], sample_B['video']],  # for wandb logging to handle different shapes
+            'video': sample_A['video'] + sample_B['video'],
+            'num_frames': sample_A['num_frames'] + sample_B['num_frames'],
+            'fps': sample_A['fps']
+        }
+    return challenge
+
+
 async def forward(self):
     """
     The forward function is called by the validator every time step.
@@ -75,11 +97,8 @@ async def forward(self):
     bt.logging.info(f"Sampling data from {modality} cache")
 
     if modality == 'video':
-        num_frames = random.randint(
-            self.config.neuron.clip_frames_min,
-            self.config.neuron.clip_frames_max)
-        challenge = cache.sample(num_frames, min_fps=8, max_fps=30)
-
+        challenge = sample_video_frames(
+            cache, self.config.neuron.clip_frames_min, self.config.neuron.clip_frames_max)
     elif modality == 'image':
         challenge = cache.sample()
 
@@ -90,8 +109,15 @@ async def forward(self):
     # prepare metadata for logging
     try:
         if modality == 'video':
-            video_arr = np.stack([np.array(img) for img in challenge['video']], axis=0)
-            challenge_metadata['video'] = wandb.Video(video_arr, fps=1)
+            if 'videos' in challenge:
+                for i, video in enumerate(challenge['videos']):
+                    video_arr = np.stack([np.array(img) for img in video], axis=0)
+                    video_arr = video_arr.transpose(0, 3, 1, 2)
+                    challenge_metadata[f'video_{i}'] = wandb.Video(video_arr, fps=1) 
+            else:
+                video_arr = np.stack([np.array(img) for img in challenge['video']], axis=0)
+                video_arr = video_arr.transpose(0, 3, 1, 2)
+                challenge_metadata['video'] = wandb.Video(video_arr, fps=1)
             challenge_metadata['fps'] = challenge['fps']
             challenge_metadata['num_frames'] = challenge['num_frames']
         elif modality == 'image':
@@ -102,15 +128,19 @@ async def forward(self):
         return
 
     # update logging dict with everything except image/video data
-    challenge_metadata.update({k: v for k, v in challenge.items() if k != modality})
+    challenge_metadata.update({
+        k: v for k, v in challenge.items() 
+        if re.match(r'^(?!image$|video$|videos$|video_\d+$).+', k)
+    })
     input_data = challenge[modality]  # extract video or image
 
     # apply data augmentation pipeline
     try:
-       input_data, level, data_aug_params = apply_augmentation_by_level(input_data, TARGET_IMAGE_SIZE)
+        input_data, level, data_aug_params = apply_augmentation_by_level(
+            input_data, TARGET_IMAGE_SIZE, challenge.get('mask_center', None))
     except Exception as e:
-       level, data_aug_params = -1, {}
-       bt.logging.error(f"Unable to applay augmentations: {e}")
+        level, data_aug_params = -1, {}
+        bt.logging.error(f"Unable to apply augmentations: {e}")
 
     challenge_metadata['data_aug_params'] = data_aug_params
     challenge_metadata['data_aug_level'] = level
diff --git a/bitmind/validator/model_utils.py b/bitmind/validator/model_utils.py
index b0c414b0..5159d83d 100644
--- a/bitmind/validator/model_utils.py
+++ b/bitmind/validator/model_utils.py
@@ -1,7 +1,13 @@
 import torch
-from diffusers import MotionAdapter, HunyuanVideoTransformer3DModel
+import numpy as np
+from diffusers import MotionAdapter, HunyuanVideoTransformer3DModel, DiffusionPipeline
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+from transformers import AutoModelForCausalLM
+from janus.models import VLChatProcessor
+import PIL.Image
+from typing import Dict, Any, Any, Optional
+import bittensor as bt
 
 
 def load_hunyuanvideo_transformer(
@@ -46,3 +52,254 @@ def load_annimatediff_motion_adapter(
         )
     )
     return adapter
+
+
+class JanusWrapper(DiffusionPipeline):
+    def __init__(self, model, processor):
+        super().__init__()
+        self.model = model
+        self.processor = processor
+        self.tokenizer = self.processor.tokenizer
+        self.register_modules(
+            model=model,
+            processor=processor,
+            tokenizer=self.processor.tokenizer
+        )
+
+    @torch.inference_mode()
+    def __call__(
+        self,
+        prompt: str,
+        temperature: float = 1.0,
+        parallel_size: int = 4,
+        cfg_weight: float = 5.0,
+        image_token_num_per_image: int = 576,
+        img_size: int = 384,
+        patch_size: int = 16,
+        **kwargs
+    ):
+        conversation = [
+            {
+                "role": "<|User|>",
+                "content": prompt,
+            },
+            {"role": "<|Assistant|>", "content": ""},
+        ]
+
+        sft_format = self.processor.apply_sft_template_for_multi_turn_prompts(
+            conversations=conversation,
+            sft_format=self.processor.sft_format,
+            system_prompt="",
+        )
+        prompt = sft_format + self.processor.image_start_tag
+
+        input_ids = self.processor.tokenizer.encode(prompt)
+        input_ids = torch.LongTensor(input_ids).to(self.device)
+
+        tokens = torch.zeros((parallel_size*2, len(input_ids)), dtype=torch.int).to(self.device)
+        for i in range(parallel_size*2):
+            tokens[i, :] = input_ids
+            if i % 2 != 0:
+                tokens[i, 1:-1] = self.processor.pad_id
+
+        inputs_embeds = self.model.language_model.get_input_embeddings()(tokens)
+        generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).to(self.device)
+        outputs = None
+
+        for i in range(image_token_num_per_image):
+            outputs = self.model.language_model.model(
+                inputs_embeds=inputs_embeds, 
+                use_cache=True, 
+                past_key_values=outputs.past_key_values if i != 0 else None
+            )
+            hidden_states = outputs.last_hidden_state
+            
+            logits = self.model.gen_head(hidden_states[:, -1, :])
+            logit_cond = logits[0::2, :]
+            logit_uncond = logits[1::2, :]
+            
+            logits = logit_uncond + cfg_weight * (logit_cond-logit_uncond)
+            probs = torch.softmax(logits / temperature, dim=-1)
+
+            next_token = torch.multinomial(probs, num_samples=1)
+            generated_tokens[:, i] = next_token.squeeze(dim=-1)
+
+            next_token = torch.cat([next_token.unsqueeze(dim=1), next_token.unsqueeze(dim=1)], dim=1).view(-1)
+            img_embeds = self.model.prepare_gen_img_embeds(next_token)
+            inputs_embeds = img_embeds.unsqueeze(dim=1)
+
+        dec = self.model.gen_vision_model.decode_code(
+            generated_tokens.to(dtype=torch.int), 
+            shape=[parallel_size, 8, img_size//patch_size, img_size//patch_size]
+        )
+        dec = dec.to(torch.float32).cpu().numpy().transpose(0, 2, 3, 1)
+        dec = np.clip((dec + 1) / 2 * 255, 0, 255)
+
+        images = []
+        for i in range(parallel_size):
+            images.append(PIL.Image.fromarray(dec[i].astype(np.uint8)))
+            
+        # Return object with images attribute
+        class Output:
+            def __init__(self, images):
+                self.images = images
+
+        return Output(images)
+
+    @classmethod
+    def from_pretrained(cls, model_path, **kwargs):
+        model, processor = load_janus_model(model_path, **kwargs)
+        return cls(model=model, processor=processor)
+        
+    def to(self, device):
+        self.model = self.model.to(device)
+        return self
+
+
+def load_janus_model(model_path: str, **kwargs):
+    processor = VLChatProcessor.from_pretrained(model_path)
+    
+    # Filter kwargs to only include what Janus expects
+    janus_kwargs = {
+        'trust_remote_code': True,
+        'torch_dtype': kwargs.get('torch_dtype', torch.bfloat16)
+    }
+    
+    # Let device placement be handled by diffusers like other models
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path, 
+        **janus_kwargs
+    ).eval()
+    
+    return model, processor
+
+
+def create_pipeline_generator(model_config: Dict[str, Any], model: Any) -> callable:
+    """
+    Creates a generator function based on pipeline configuration.
+    
+    Args:
+        model_config: Model configuration dictionary
+        model: Loaded model instance(s)
+            
+    Returns:
+        Callable that handles the generation process for the model
+    """
+    if isinstance(model_config.get('pipeline_stages'), list):
+        def generate(prompt: str, **kwargs):
+            output = None
+            prompt_embeds = None
+            negative_embeds = None
+            
+            for stage in model_config['pipeline_stages']:
+                stage_args = {**kwargs}  # Copy base args
+                
+                # Add stage-specific args
+                if stage.get('input_key') and output is not None:
+                    stage_args[stage['input_key']] = output
+                
+                # Add any stage-specific generation args
+                if stage.get('args'):
+                    stage_args.update(stage['args'])
+                
+                # Handle prompt embeddings
+                if stage.get('use_prompt_embeds') and prompt_embeds is not None:
+                    stage_args['prompt_embeds'] = prompt_embeds
+                    stage_args['negative_prompt_embeds'] = negative_embeds
+                    stage_args.pop('prompt', None)
+                elif stage.get('save_prompt_embeds'):
+                    # Get embeddings directly from encode_prompt
+                    prompt_embeds, negative_embeds = model[stage['name']].encode_prompt(
+                        prompt=prompt,
+                        device=model[stage['name']].device,
+                        num_images_per_prompt=stage_args.get('num_images_per_prompt', 1),
+                    )
+                    stage_args['prompt_embeds'] = prompt_embeds
+                    stage_args['negative_prompt_embeds'] = negative_embeds
+                    stage_args.pop('prompt', None)
+                else:
+                    stage_args['prompt'] = prompt
+                
+                # Run stage
+                result = model[stage['name']](**stage_args)
+                
+                # Extract output based on stage config
+                output = getattr(result, stage.get('output_attr', 'images'))
+                
+                # Clear memory if configured
+                if model_config.get('clear_memory_on_stage_end'):
+                    import gc
+                    import torch
+                    if torch.cuda.is_available():
+                        torch.cuda.empty_cache()
+                    gc.collect()
+                    
+            return result
+        return generate
+    
+    # Default single-stage pipeline
+    return lambda prompt, **kwargs: model(prompt=prompt, **kwargs)
+
+
+def enable_model_optimizations(
+    model: Any,
+    device: str,
+    enable_cpu_offload: bool = False,
+    enable_sequential_cpu_offload: bool = False,
+    enable_vae_slicing: bool = False,
+    enable_vae_tiling: bool = False,
+    disable_progress_bar: bool = True,
+    stage_name: Optional[str] = None,
+) -> None:
+    """
+    Enables various model optimizations for better memory usage and performance.
+    
+    Args:
+        model: The model to optimize
+        device: Device to move model to ('cuda', 'cpu', etc)
+        enable_cpu_offload: Whether to enable model CPU offloading
+        enable_sequential_cpu_offload: Whether to enable sequential CPU offloading
+        enable_vae_slicing: Whether to enable VAE slicing
+        enable_vae_tiling: Whether to enable VAE tiling
+        disable_progress_bar: Whether to disable the progress bar
+        stage_name: Optional name of pipeline stage for logging
+    """
+    model_name = f"{stage_name} " if stage_name else ""
+    
+    if disable_progress_bar:
+        bt.logging.info(f"Disabling progress bar for {model_name}model")
+        model.set_progress_bar_config(disable=True)
+
+    # Handle CPU offloading
+    if enable_cpu_offload:
+        bt.logging.info(f"Enabling CPU offload for {model_name}model")
+        model.enable_model_cpu_offload(device=device)
+    elif enable_sequential_cpu_offload:
+        bt.logging.info(f"Enabling sequential CPU offload for {model_name}model")
+        model.enable_sequential_cpu_offload()
+    else:
+        # Only move to device if not using CPU offload
+        bt.logging.info(f"Moving {model_name}model to {device}")
+        model.to(device)
+
+    # Handle VAE optimizations if not using CPU offload
+    if not enable_cpu_offload:
+        if enable_vae_slicing:
+            bt.logging.info(f"Enabling VAE slicing for {model_name}model")
+            try:
+                model.vae.enable_slicing()
+            except Exception:
+                try:
+                    model.enable_vae_slicing()
+                except Exception as e:
+                    bt.logging.warning(f"Failed to enable VAE slicing for {model_name}model: {e}")
+
+        if enable_vae_tiling:
+            bt.logging.info(f"Enabling VAE tiling for {model_name}model")
+            try:
+                model.vae.enable_tiling()
+            except Exception:
+                try:
+                    model.enable_vae_tiling()
+                except Exception as e:
+                    bt.logging.warning(f"Failed to enable VAE tiling for {model_name}model: {e}")
diff --git a/bitmind/validator/scripts/run_cache_updater.py b/bitmind/validator/scripts/run_cache_updater.py
index b2f94186..12ddc819 100644
--- a/bitmind/validator/scripts/run_cache_updater.py
+++ b/bitmind/validator/scripts/run_cache_updater.py
@@ -23,51 +23,62 @@
 
 
 async def main(args):
+    caches = []
 
-    image_cache = ImageCache(
-        cache_dir=args.image_cache_dir,
-        datasets=IMAGE_DATASETS['real'],
-        parquet_update_interval=args.image_parquet_interval,
-        image_update_interval=args.image_interval,
-        num_parquets_per_dataset=5,
-        num_images_per_source=100,
-        max_extracted_size_gb=MAX_EXTRACTED_GB,
-        max_compressed_size_gb=MAX_COMPRESSED_GB
-    )
-    image_cache.start_updater()
+    if args.mode in ['all', 'image']:
+        bt.logging.info("Starting image cache updater")
+        image_cache = ImageCache(
+            cache_dir=args.image_cache_dir,
+            datasets=IMAGE_DATASETS['real'],
+            parquet_update_interval=args.image_parquet_interval,
+            image_update_interval=args.image_interval,
+            num_parquets_per_dataset=5,
+            num_images_per_source=100,
+            max_extracted_size_gb=MAX_EXTRACTED_GB,
+            max_compressed_size_gb=MAX_COMPRESSED_GB
+        )
+        image_cache.start_updater()
+        caches.append(image_cache)
     
-    video_cache = VideoCache(
-        cache_dir=args.video_cache_dir,
-        datasets=VIDEO_DATASETS['real'],
-        video_update_interval=args.video_interval,
-        zip_update_interval=args.video_zip_interval,
-        num_zips_per_dataset=2,
-        num_videos_per_zip=50,
-        max_extracted_size_gb=MAX_EXTRACTED_GB,
-        max_compressed_size_gb=MAX_COMPRESSED_GB
-    )
-    video_cache.start_updater()
+    if args.mode in ['all', 'video']:
+        bt.logging.info("Starting video cache updater")
+        video_cache = VideoCache(
+            cache_dir=args.video_cache_dir,
+            datasets=VIDEO_DATASETS['real'],
+            video_update_interval=args.video_interval,
+            zip_update_interval=args.video_zip_interval,
+            num_zips_per_dataset=2,
+            num_videos_per_zip=50,
+            max_extracted_size_gb=MAX_EXTRACTED_GB,
+            max_compressed_size_gb=MAX_COMPRESSED_GB
+        )
+        video_cache.start_updater()
+        caches.append(video_cache)
+    
+    if not caches:
+        raise ValueError(f"Invalid mode: {args.mode}")
     
     while True:
-        bt.logging.info("Caches running...")
+        bt.logging.info(f"Running cache updaters for: {args.mode}")
         await asyncio.sleep(600)  # Status update every 10 minutes
 
 
 if __name__ == "__main__":
-
     parser = argparse.ArgumentParser()
+    parser.add_argument('--mode', type=str, default='all', choices=['all', 'video', 'image'],
+                      help='Which cache updater(s) to run')
     parser.add_argument('--video-cache-dir', type=str, default=REAL_VIDEO_CACHE_DIR,
-                        help='Directory to cache video data')
+                      help='Directory to cache video data')
     parser.add_argument('--image-cache-dir', type=str, default=REAL_IMAGE_CACHE_DIR,
-                        help='Directory to cache image data')
+                      help='Directory to cache image data')
     parser.add_argument('--image-interval', type=int, default=IMAGE_CACHE_UPDATE_INTERVAL,
-                        help='Update interval for images in hours')
+                      help='Update interval for images in hours')
     parser.add_argument('--image-parquet-interval', type=int, default=IMAGE_PARQUET_CACHE_UPDATE_INTERVAL,
-                        help='Update interval for image parquet files in hours')
+                      help='Update interval for image parquet files in hours')
     parser.add_argument('--video-interval', type=int, default=VIDEO_CACHE_UPDATE_INTERVAL,
-                        help='Update interval for videos in hours')
+                      help='Update interval for videos in hours')
     parser.add_argument('--video-zip-interval', type=int, default=VIDEO_ZIP_CACHE_UPDATE_INTERVAL,
-                        help='Update interval for video zip files in hours')
+                      help='Update interval for video zip files in hours')
     args = parser.parse_args()
 
     bt.logging.set_info()
diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py
index 2543384d..51839e86 100644
--- a/bitmind/validator/scripts/run_data_generator.py
+++ b/bitmind/validator/scripts/run_data_generator.py
@@ -8,12 +8,12 @@
 from bitmind.validator.cache import ImageCache
 from bitmind.validator.config import (
     REAL_IMAGE_CACHE_DIR,
-    SYNTH_CACHE_DIR
+    SYNTH_CACHE_DIR,
+    MODEL_NAMES,
+    get_task
 )
 
-
 if __name__ == '__main__':
-
     parser = argparse.ArgumentParser()
     parser.add_argument('--image-cache-dir', type=str, default=REAL_IMAGE_CACHE_DIR,
                       help='Directory containing real images to use as reference')
@@ -23,8 +23,15 @@
                       help='Device to run generation on (cuda/cpu)')
     parser.add_argument('--batch-size', type=int, default=3,
                       help='Number of images to generate per batch')
+    parser.add_argument('--model', type=str, default=None, choices=MODEL_NAMES,
+                      help='Specific model to test. If not specified, uses random models')
     args = parser.parse_args()
 
+    if args.model:
+        bt.logging.info(f"Using model {args.model} ({get_task(args.model)})")
+    else:
+        bt.logging.info(f"No model selected.")
+
     bt.logging.set_info()
     init_wandb_run(run_base_name='data-generator', **load_validator_info())
 
@@ -39,7 +46,8 @@
 
     sdg = SyntheticDataGenerator(
         prompt_type='annotation',
-        use_random_model=True,
+        use_random_model=args.model is None,
+        model_name=args.model,
         device=args.device,
         image_cache=image_cache,
         output_dir=args.output_dir)
diff --git a/requirements.txt b/requirements.txt
index 7c4725f5..8a5483d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,7 @@ sentencepiece==0.2.0
 timm==1.0.12
 einops==0.8.0
 ultralytics==8.3.44
+janus @ git+https://github.com/deepseek-ai/Janus.git
 
 # Image/Video processing
 datasets==3.1.0
diff --git a/setup_env.sh b/setup_env.sh
index 0934c966..8b1b2470 100755
--- a/setup_env.sh
+++ b/setup_env.sh
@@ -33,6 +33,7 @@ sudo npm install -g pm2@latest
 ############################
 
 pip install -e .
+pip install -r requirements.txt
 
 ############################
 # Environment Files Setup  #
diff --git a/start_validator.sh b/start_validator.sh
index 0b49e774..87604a73 100755
--- a/start_validator.sh
+++ b/start_validator.sh
@@ -37,12 +37,24 @@ if ! huggingface-cli login --token $HUGGING_FACE_TOKEN; then
   exit 1
 fi
 
-# VALIDATOR PROCESS
+# STOP VALIDATOR PROCESS
 if pm2 list | grep -q "$VALIDATOR_PROCESS_NAME"; then
   echo "Process '$VALIDATOR_PROCESS_NAME' is already running. Deleting it..."
   pm2 delete $VALIDATOR_PROCESS_NAME
 fi
 
+# STOP REAL DATA CACHE UPDATER PROCESS
+if pm2 list | grep -q "$CACHE_UPDATE_PROCESS_NAME"; then
+  echo "Process '$CACHE_UPDATE_PROCESS_NAME' is already running. Deleting it..."
+  pm2 delete $CACHE_UPDATE_PROCESS_NAME
+fi
+
+# STOP SYNTHETIC DATA GENERATOR PROCESS
+if pm2 list | grep -q "$DATA_GEN_PROCESS_NAME"; then
+  echo "Process '$DATA_GEN_PROCESS_NAME' is already running. Deleting it..."
+  pm2 delete $DATA_GEN_PROCESS_NAME
+fi
+
 echo "Verifying access to synthetic image generation models. This may take a few minutes."
 if ! python3 bitmind/validator/verify_models.py; then
   echo "Failed to verify diffusion models. Please check the configurations or model access permissions."
@@ -59,21 +71,9 @@ pm2 start neurons/validator.py --name $VALIDATOR_PROCESS_NAME -- \
   --axon.port $VALIDATOR_AXON_PORT \
   --proxy.port $VALIDATOR_PROXY_PORT
 
-# REAL DATA CACHE UPDATER PROCESS
-if pm2 list | grep -q "$CACHE_UPDATE_PROCESS_NAME"; then
-  echo "Process '$CACHE_UPDATE_PROCESS_NAME' is already running. Deleting it..."
-  pm2 delete $CACHE_UPDATE_PROCESS_NAME
-fi
-
 echo "Starting real data cache updater process"
 pm2 start bitmind/validator/scripts/run_cache_updater.py --name $CACHE_UPDATE_PROCESS_NAME
 
-# SYNTHETIC DATA GENERATOR PROCESS
-if pm2 list | grep -q "$DATA_GEN_PROCESS_NAME"; then
-  echo "Process '$DATA_GEN_PROCESS_NAME' is already running. Deleting it..."
-  pm2 delete $DATA_GEN_PROCESS_NAME
-fi
-
 echo "Starting synthetic data generation process"
 pm2 start bitmind/validator/scripts/run_data_generator.py --name $DATA_GEN_PROCESS_NAME -- \
   --device $DEVICE