From 0789a055b542cf45e650db402fff4dd1b8cf5116 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Mon, 6 Jan 2025 22:50:39 +0900 Subject: [PATCH 01/11] WIP, unifying InpaintingGenerator and SyntheticDataGenerator --- .../synthetic_data_generation/image_utils.py | 54 ++- .../in_painting_generator.py | 342 ------------------ .../synthetic_data_generator.py | 140 +++---- bitmind/validator/config.py | 50 +-- 4 files changed, 157 insertions(+), 429 deletions(-) delete mode 100644 bitmind/synthetic_data_generation/in_painting_generator.py diff --git a/bitmind/synthetic_data_generation/image_utils.py b/bitmind/synthetic_data_generation/image_utils.py index 5c419537..ab16dbb9 100644 --- a/bitmind/synthetic_data_generation/image_utils.py +++ b/bitmind/synthetic_data_generation/image_utils.py @@ -1,6 +1,9 @@ +import numpy as np import PIL import os -import json +from PIL import Image, ImageDraw +from typing import Tuple + from bitmind.validator.config import TARGET_IMAGE_SIZE @@ -56,4 +59,51 @@ def save_images_to_disk(image_dataset, start_index, num_images, save_directory, image.save(file_path, 'JPEG') # Save the image print(f"Saved: {file_path}") except Exception as e: - print(f"Failed to save image {i}: {e}") \ No newline at end of file + print(f"Failed to save image {i}: {e}") + + +def create_random_mask(self, size: Tuple[int, int]) -> Image.Image: + """ + Create a random mask for i2i transformation. + """ + w, h = size + mask = Image.new('RGB', size, 'black') + + if np.random.rand() < 0.5: + # Rectangular mask with smoother edges + width = np.random.randint(w//4, w//2) + height = np.random.randint(h//4, h//2) + + # Center the rectangle with some random offset + x1 = (w - width) // 2 + np.random.randint(-width//4, width//4) + y1 = (h - height) // 2 + np.random.randint(-height//4, height//4) + + # Create mask with PIL draw for smoother edges + draw = ImageDraw.Draw(mask) + draw.rounded_rectangle( + [x1, y1, x1 + width, y1 + height], + radius=min(width, height) // 10, # Smooth corners + fill='white' + ) + else: + # Circular mask with feathered edges + draw = ImageDraw.Draw(mask) + center_x = w//2 + center_y = h//2 + + # Make radius proportional to image size + radius = min(w, h) // 4 + + # Add small random offset to center + center_x += np.random.randint(-radius//4, radius//4) + center_y += np.random.randint(-radius//4, radius//4) + + # Draw multiple circles with decreasing opacity for feathered edge + for r in range(radius, radius-10, -1): + opacity = int(255 * (r - (radius-10)) / 10) + draw.ellipse( + [center_x-r, center_y-r, center_x+r, center_y+r], + fill=(255, 255, 255, opacity) + ) + + return mask \ No newline at end of file diff --git a/bitmind/synthetic_data_generation/in_painting_generator.py b/bitmind/synthetic_data_generation/in_painting_generator.py deleted file mode 100644 index 00f53cf5..00000000 --- a/bitmind/synthetic_data_generation/in_painting_generator.py +++ /dev/null @@ -1,342 +0,0 @@ -import gc -import time -from pathlib import Path -from typing import Dict, Optional, Any, Union, Tuple -import json - -import bittensor as bt -import numpy as np -import torch -from PIL import Image, ImageDraw - -from bitmind.validator.config import ( - HUGGINGFACE_CACHE_DIR, - TEXT_MODERATION_MODEL, - IMAGE_ANNOTATION_MODEL, - I2I_MODELS, - I2I_MODEL_NAMES, - TARGET_IMAGE_SIZE, - select_random_i2i_model -) -from bitmind.synthetic_data_generation.prompt_utils import truncate_prompt_if_too_long -from bitmind.synthetic_data_generation.image_annotation_generator import ImageAnnotationGenerator -from bitmind.validator.cache import ImageCache - - -class InPaintingGenerator: - """ - A class for generating image-to-image transformations using inpainting models. - - This class supports generating prompts from input images and applying - inpainting transformations using various models. - """ - - def __init__( - self, - i2i_model_name: Optional[str] = None, - use_random_i2i_model: bool = True, - output_dir: Optional[Union[str, Path]] = None, - image_cache: Optional[ImageCache] = None, - device: str = 'cuda' - ) -> None: - """ - Initialize the I2IGenerator. - - Args: - i2i_model_name: Name of the image-to-image model. - use_random_i2i_model: Whether to randomly select models for generation. - output_dir: Directory to write generated data. - image_cache: Optional image cache instance. - device: Device identifier. - """ - if not use_random_i2i_model and i2i_model_name not in I2I_MODEL_NAMES: - raise ValueError( - f"Invalid model name '{i2i_model_name}'. " - f"Options are {I2I_MODEL_NAMES}" - ) - - self.use_random_i2i_model = use_random_i2i_model - self.i2i_model_name = i2i_model_name - self.i2i_model = None - self.device = device - - if self.use_random_i2i_model and i2i_model_name is not None: - bt.logging.warning( - "i2i_model_name will be ignored (use_random_i2i_model=True)" - ) - self.i2i_model_name = None - - self.image_annotation_generator = ImageAnnotationGenerator( - model_name=IMAGE_ANNOTATION_MODEL, - text_moderation_model_name=TEXT_MODERATION_MODEL - ) - self.image_cache = image_cache - self.output_dir = Path(output_dir) if output_dir else None - if self.output_dir: - (self.output_dir / "image").mkdir(parents=True, exist_ok=True) - - def generate( - self, - image: Image.Image, - custom_prompt: Optional[str] = None - ) -> Dict[str, Any]: - """ - Generate an image-to-image transformation based on input image. - - Args: - image: Input image for transformation. - custom_prompt: Optional custom prompt to use instead of generating one. - - Returns: - Dictionary containing generated data information. - """ - # Resize input image to target size at the start - image = image.resize(TARGET_IMAGE_SIZE, Image.Resampling.LANCZOS) - - if custom_prompt is None: - prompt = self.generate_prompt(image, clear_gpu=True) - else: - prompt = custom_prompt - - bt.logging.info("Generating i2i transformation...") - gen_data = self.run_i2i(prompt, image) - self.clear_gpu() - return gen_data - - def generate_prompt( - self, - image: Image.Image, - clear_gpu: bool = True - ) -> str: - """Generate a prompt based on the input image.""" - bt.logging.info("Generating prompt from image") - self.image_annotation_generator.load_models() - prompt = self.image_annotation_generator.generate(image) - if clear_gpu: - self.image_annotation_generator.clear_gpu() - return prompt - - def run_i2i( - self, - prompt: str, - original_image: Image.Image, - model_name: Optional[str] = None - ) -> Dict[str, Any]: - """ - Generate image-to-image transformation based on a text prompt. - - Args: - prompt: The text prompt used to inspire the generation. - original_image: The source image to be inpainted. - model_name: Optional model name to use for generation. - - Returns: - Dictionary containing generated data and metadata. - - Raises: - RuntimeError: If generation fails. - """ - if model_name is not None: - self.i2i_model_name = model_name - self.load_i2i_model() - - original_image = original_image.convert('RGB') - - # Use larger image size (1024x1024 or keep original if smaller) - target_size = (1024, 1024) - if original_image.size[0] > target_size[0] or original_image.size[1] > target_size[1]: - original_image = original_image.resize(target_size, Image.Resampling.LANCZOS) - - # Create random mask at same size as image - mask = self.create_random_mask(original_image.size) - - try: - truncated_prompt = truncate_prompt_if_too_long(prompt, self.i2i_model) - generator = torch.Generator(device=self.device).manual_seed(0) - - bt.logging.info(f"Generating inpainting from prompt: {truncated_prompt}") - start_time = time.time() - gen_output = self.i2i_model( - prompt=truncated_prompt, - image=original_image, - mask_image=mask, - guidance_scale=7.5, - num_inference_steps=50, - strength=0.99, - generator=generator, - ) - gen_time = time.time() - start_time - bt.logging.info(f"Finished generation in {gen_time/60:.2f} minutes") - - # Ensure output is in RGB mode - output_image = gen_output.images[0] - output_image = output_image.convert('RGB') - gen_output.images[0] = output_image - - except Exception as e: - bt.logging.error(f"I2I generation error: {e}") - raise RuntimeError(f"Failed to generate i2i image: {e}") - - return { - 'prompt': truncated_prompt, - 'prompt_long': prompt, - 'gen_output': gen_output, - 'time': time.time(), - 'model_name': self.i2i_model_name, - 'gen_time': gen_time - } - - def create_random_mask(self, size: Tuple[int, int]) -> Image.Image: - """ - Create a random mask for i2i transformation. - """ - w, h = size - mask = Image.new('RGB', size, 'black') - - if np.random.rand() < 0.5: - # Rectangular mask with smoother edges - width = np.random.randint(w//4, w//2) - height = np.random.randint(h//4, h//2) - - # Center the rectangle with some random offset - x1 = (w - width) // 2 + np.random.randint(-width//4, width//4) - y1 = (h - height) // 2 + np.random.randint(-height//4, height//4) - - # Create mask with PIL draw for smoother edges - draw = ImageDraw.Draw(mask) - draw.rounded_rectangle( - [x1, y1, x1 + width, y1 + height], - radius=min(width, height) // 10, # Smooth corners - fill='white' - ) - else: - # Circular mask with feathered edges - draw = ImageDraw.Draw(mask) - center_x = w//2 - center_y = h//2 - - # Make radius proportional to image size - radius = min(w, h) // 4 - - # Add small random offset to center - center_x += np.random.randint(-radius//4, radius//4) - center_y += np.random.randint(-radius//4, radius//4) - - # Draw multiple circles with decreasing opacity for feathered edge - for r in range(radius, radius-10, -1): - opacity = int(255 * (r - (radius-10)) / 10) - draw.ellipse( - [center_x-r, center_y-r, center_x+r, center_y+r], - fill=(255, 255, 255, opacity) - ) - - return mask - - def load_i2i_model(self, model_name: Optional[str] = None) -> None: - """Load a Hugging Face image-to-image inpainting model to a specific GPU.""" - if model_name is not None: - self.i2i_model_name = model_name - elif self.use_random_i2i_model or model_name == 'random': - model_name = select_random_i2i_model() - self.i2i_model_name = model_name - - bt.logging.info(f"Loading {self.i2i_model_name}") - - pipeline_cls = I2I_MODELS[self.i2i_model_name]['pipeline_cls'] - pipeline_args = I2I_MODELS[self.i2i_model_name]['from_pretrained_args'] - - self.i2i_model = pipeline_cls.from_pretrained( - pipeline_args.get('base', self.i2i_model_name), - cache_dir=HUGGINGFACE_CACHE_DIR, - **pipeline_args, - add_watermarker=False - ) - - self.i2i_model.set_progress_bar_config(disable=True) - - # Load scheduler if specified - if 'scheduler' in I2I_MODELS[self.i2i_model_name]: - sched_cls = I2I_MODELS[self.i2i_model_name]['scheduler']['cls'] - sched_args = I2I_MODELS[self.i2i_model_name]['scheduler']['from_config_args'] - self.i2i_model.scheduler = sched_cls.from_config( - self.i2i_model.scheduler.config, - **sched_args - ) - - # Configure model optimizations - model_config = I2I_MODELS[self.i2i_model_name] - if model_config.get('enable_model_cpu_offload', False): - bt.logging.info(f"Enabling cpu offload for {self.i2i_model_name}") - self.i2i_model.enable_model_cpu_offload() - if model_config.get('enable_sequential_cpu_offload', False): - bt.logging.info(f"Enabling sequential cpu offload for {self.i2i_model_name}") - self.i2i_model.enable_sequential_cpu_offload() - if model_config.get('vae_enable_slicing', False): - bt.logging.info(f"Enabling vae slicing for {self.i2i_model_name}") - try: - self.i2i_model.vae.enable_slicing() - except Exception: - try: - self.i2i_model.enable_vae_slicing() - except Exception: - bt.logging.warning(f"Could not enable vae slicing for {self.i2i_model}") - if model_config.get('vae_enable_tiling', False): - bt.logging.info(f"Enabling vae tiling for {self.i2i_model_name}") - try: - self.i2i_model.vae.enable_tiling() - except Exception: - try: - self.i2i_model.enable_vae_tiling() - except Exception: - bt.logging.warning(f"Could not enable vae tiling for {self.i2i_model}") - - self.i2i_model.to(self.device) - bt.logging.info(f"Loaded {self.i2i_model_name} using {pipeline_cls.__name__}.") - - def clear_gpu(self) -> None: - """Clear GPU memory by deleting models and running garbage collection.""" - if self.i2i_model is not None: - bt.logging.info("Clearing i2i model from GPU memory") - self.i2i_model.to('cpu') - del self.i2i_model - self.i2i_model = None - gc.collect() - torch.cuda.empty_cache() - - def batch_generate(self, batch_size: int = 5) -> None: - """ - Generate inpainting transformations in batches. - - Args: - batch_size: Number of images to process in each batch. - """ - prompts = [] - bt.logging.info(f"Generating {batch_size} prompts") - for i in range(batch_size): - image_sample = self.image_cache.sample() - bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}") - prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1)) - bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}") - - # Randomly select model if enabled - if self.use_random_i2i_model: - model_name = select_random_i2i_model() - else: - model_name = self.i2i_model_name - - for i, prompt in enumerate(prompts): - bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}") - - # Generate inpainted image from current prompt - start = time.time() - image_sample = self.image_cache.sample() - output = self.run_i2i(prompt, image_sample['image'], model_name) - - bt.logging.info(f'Writing to cache {self.output_dir}') - base_path = Path(self.output_dir) / 'image' / str(output['time']) - metadata = {k: v for k, v in output.items() if k != 'gen_output'} - base_path.with_suffix('.json').write_text(json.dumps(metadata)) - - out_path = base_path.with_suffix('.png') - output['gen_output'].images[0].save(out_path) - bt.logging.info(f"Wrote to {out_path}") \ No newline at end of file diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index ce38a5c3..3b5c72ab 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -17,14 +17,16 @@ HUGGINGFACE_CACHE_DIR, TEXT_MODERATION_MODEL, IMAGE_ANNOTATION_MODEL, - T2VIS_MODELS, - T2VIS_MODEL_NAMES, + MODELS, + MODEL_NAMES, T2V_MODEL_NAMES, T2I_MODEL_NAMES, TARGET_IMAGE_SIZE, - select_random_t2vis_model, + select_random_model, + get_task, get_modality ) +from bitmind.synthetic_data_generation.image_utils import create_random_mask from bitmind.synthetic_data_generation.prompt_utils import truncate_prompt_if_too_long from bitmind.synthetic_data_generation.prompt_generator import PromptGenerator from bitmind.validator.cache import ImageCache @@ -54,19 +56,19 @@ class SyntheticDataGenerator: various text-to-video (t2v) and text-to-image (t2i) models. Attributes: - use_random_t2vis_model: Whether to randomly select a t2v or t2i for each + use_random_model: Whether to randomly select a t2v or t2i for each generation task. prompt_type: The type of prompt generation strategy ('random', 'annotation'). prompt_generator_name: Name of the prompt generation model. - t2vis_model_name: Name of the t2v or t2i model. + model_name: Name of the t2v, t2i, or i2i model. prompt_generator: The vlm/llm pipeline for generating input prompts for t2i/t2v models output_dir: Directory to write generated data. """ def __init__( self, - t2vis_model_name: Optional[str] = None, - use_random_t2vis_model: bool = True, + model_name: Optional[str] = None, + use_random_model: bool = True, prompt_type: str = 'annotation', output_dir: Optional[Union[str, Path]] = None, image_cache: Optional[ImageCache] = None, @@ -76,34 +78,33 @@ def __init__( Initialize the SyntheticDataGenerator. Args: - t2vis_model_name: Name of the text-to-video or text-to-image model. - use_random_t2vis_model: Whether to randomly select models for generation. + model_name: Name of the generative image/video model + use_random_model: Whether to randomly select models for generation. prompt_type: The type of prompt generation strategy. output_dir: Directory to write generated data. device: Device identifier. - run_as_daemon: Whether to run generation in the background. image_cache: Optional image cache instance. Raises: ValueError: If an invalid model name is provided. NotImplementedError: If an unsupported prompt type is specified. """ - if not use_random_t2vis_model and t2vis_model_name not in T2VIS_MODEL_NAMES: + if not use_random_model and model_name not in MODEL_NAMES: raise ValueError( - f"Invalid model name '{t2vis_model_name}'. " - f"Options are {T2VIS_MODEL_NAMES}" + f"Invalid model name '{model_name}'. " + f"Options are {MODEL_NAMES}" ) - self.use_random_t2vis_model = use_random_t2vis_model - self.t2vis_model_name = t2vis_model_name - self.t2vis_model = None + self.use_random_model = use_random_model + self.model_name = model_name + self.model = None self.device = device - if self.use_random_t2vis_model and t2vis_model_name is not None: + if self.use_random_model and model_name is not None: bt.logging.warning( - "t2vis_model_name will be ignored (use_random_t2vis_model=True)" + "model_name will be ignored (use_random_model=True)" ) - self.t2vis_model_name = None + self.model_name = None self.prompt_type = prompt_type self.image_cache = image_cache @@ -146,8 +147,7 @@ def batch_generate(self, batch_size: int = 5) -> None: bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}") # Generate image/video from current model and prompt - start = time.time() - output = self.run_t2vis(prompt, modality, t2vis_model_name=model_name) + output = self._run_generation(prompt, model_name=model_name) bt.logging.info(f'Writing to cache {self.output_dir}') base_path = self.output_dir / modality / str(output['time']) @@ -171,7 +171,7 @@ def generate( self, image: Optional[Image.Image] = None, modality: str = 'image', - t2vis_model_name: Optional[str] = None + model_name: Optional[str] = None ) -> Dict[str, Any]: """ Generate synthetic data based on input parameters. @@ -189,7 +189,7 @@ def generate( """ prompt = self.generate_prompt(image, clear_gpu=True) bt.logging.info("Generating synthetic data...") - gen_data = self.run_t2vis(prompt, modality, t2vis_model_name) + gen_data = self._run_generation(prompt, modality, model_name) self.clear_gpu() return gen_data @@ -213,11 +213,12 @@ def generate_prompt( raise NotImplementedError(f"Unsupported prompt type: {self.prompt_type}") return prompt - def run_t2vis( + def _run_generation( self, prompt: str, - modality: str, - t2vis_model_name: Optional[str] = None, + task: Optional[str] = None, + model_name: Optional[str] = None, + image: Optional[Image.Image] = None, generate_at_target_size: bool = False, ) -> Dict[str, Any]: @@ -226,8 +227,10 @@ def run_t2vis( Args: prompt: The text prompt used to inspire the generation. + task: The generation task type ('t2i', 't2v', 'i2i', or None). + model_name: Optional model name to use for generation. + image: Optional input image for image-to-image generation. generate_at_target_size: If True, generate at TARGET_IMAGE_SIZE dimensions. - t2vis_model_name: Optional model name to use for generation. Returns: Dictionary containing generated data and metadata. @@ -235,12 +238,23 @@ def run_t2vis( Raises: RuntimeError: If generation fails. """ - self.load_t2vis_model(t2vis_model_name) - model_config = T2VIS_MODELS[self.t2vis_model_name] + self.load_model(model_name) + model_config = MODELS[self.model_name] + task = get_task(model_name) if task is None else task bt.logging.info("Preparing generation arguments") gen_args = model_config.get('generate_args', {}).copy() - + + # prep inpainting-specific generation args + if task == 'i2i': + # Use larger image size for better inpainting quality + target_size = (1024, 1024) + if image.size[0] > target_size[0] or image.size[1] > target_size[1]: + image = image.resize(target_size, Image.Resampling.LANCZOS) + + gen_args['mask_image'] = create_random_mask(image.size) + gen_args['image'] = image + # Process generation arguments for k, v in gen_args.items(): if isinstance(v, dict): @@ -259,7 +273,7 @@ def run_t2vis( truncated_prompt = truncate_prompt_if_too_long( prompt, - self.t2vis_model + self.model ) bt.logging.info(f"Generating media from prompt: {truncated_prompt}") @@ -269,12 +283,12 @@ def run_t2vis( pretrained_args = model_config.get('from_pretrained_args', {}) torch_dtype = pretrained_args.get('torch_dtype', torch.bfloat16) with torch.autocast(self.device, torch_dtype, cache_enabled=False): - gen_output = self.t2vis_model( + gen_output = self.model( prompt=truncated_prompt, **gen_args ) else: - gen_output = self.t2vis_model( + gen_output = self.model( prompt=truncated_prompt, **gen_args ) @@ -287,7 +301,7 @@ def run_t2vis( f"default dimensions. Error: {e}" ) try: - gen_output = self.t2vis_model(prompt=truncated_prompt) + gen_output = self.model(prompt=truncated_prompt) gen_time = time.time() - start_time except Exception as fallback_error: bt.logging.error( @@ -307,80 +321,80 @@ def run_t2vis( 'prompt_long': prompt, 'gen_output': gen_output, # image or video 'time': time.time(), - 'model_name': self.t2vis_model_name, + 'model_name': self.model_name, 'gen_time': gen_time } - def load_t2vis_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None: + def load_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None: """Load a Hugging Face text-to-image or text-to-video model to a specific GPU.""" if model_name is not None: - self.t2vis_model_name = model_name - elif self.use_random_t2vis_model or model_name == 'random': - model_name = select_random_t2vis_model(modality) - self.t2vis_model_name = model_name + self.model_name = model_name + elif self.use_random_model or model_name == 'random': + model_name = select_random_model(modality) + self.model_name = model_name - bt.logging.info(f"Loading {self.t2vis_model_name}") + bt.logging.info(f"Loading {self.model_name}") - pipeline_cls = T2VIS_MODELS[model_name]['pipeline_cls'] - pipeline_args = T2VIS_MODELS[model_name]['from_pretrained_args'] + pipeline_cls = MODELS[model_name]['pipeline_cls'] + pipeline_args = MODELS[model_name]['from_pretrained_args'] - self.t2vis_model = pipeline_cls.from_pretrained( + self.model = pipeline_cls.from_pretrained( pipeline_args.get('base', model_name), cache_dir=HUGGINGFACE_CACHE_DIR, **pipeline_args, add_watermarker=False ) - self.t2vis_model.set_progress_bar_config(disable=True) + self.model.set_progress_bar_config(disable=True) # Load scheduler if specified - if 'scheduler' in T2VIS_MODELS[model_name]: - sched_cls = T2VIS_MODELS[model_name]['scheduler']['cls'] - sched_args = T2VIS_MODELS[model_name]['scheduler']['from_config_args'] - self.t2vis_model.scheduler = sched_cls.from_config( - self.t2vis_model.scheduler.config, + if 'scheduler' in MODELS[model_name]: + sched_cls = MODELS[model_name]['scheduler']['cls'] + sched_args = MODELS[model_name]['scheduler']['from_config_args'] + self.model.scheduler = sched_cls.from_config( + self.model.scheduler.config, **sched_args ) # Configure model optimizations - model_config = T2VIS_MODELS[model_name] + model_config = MODELS[model_name] if model_config.get('enable_model_cpu_offload', False): bt.logging.info(f"Enabling cpu offload for {model_name}") - self.t2vis_model.enable_model_cpu_offload() + self.model.enable_model_cpu_offload() if model_config.get('enable_sequential_cpu_offload', False): bt.logging.info(f"Enabling sequential cpu offload for {model_name}") - self.t2vis_model.enable_sequential_cpu_offload() + self.model.enable_sequential_cpu_offload() if model_config.get('vae_enable_slicing', False): bt.logging.info(f"Enabling vae slicing for {model_name}") try: - self.t2vis_model.vae.enable_slicing() + self.model.vae.enable_slicing() except Exception: try: - self.t2vis_model.enable_vae_slicing() + self.model.enable_vae_slicing() except Exception: - bt.logging.warning(f"Could not enable vae slicing for {self.t2vis_model}") + bt.logging.warning(f"Could not enable vae slicing for {self.model}") if model_config.get('vae_enable_tiling', False): bt.logging.info(f"Enabling vae tiling for {model_name}") try: - self.t2vis_model.vae.enable_tiling() + self.model.vae.enable_tiling() except Exception: try: - self.t2vis_model.enable_vae_tiling() + self.model.enable_vae_tiling() except Exception: - bt.logging.warning(f"Could not enable vae tiling for {self.t2vis_model}") + bt.logging.warning(f"Could not enable vae tiling for {self.model}") - self.t2vis_model.to(self.device) + self.model.to(self.device) bt.logging.info(f"Loaded {model_name} using {pipeline_cls.__name__}.") def clear_gpu(self) -> None: """Clear GPU memory by deleting models and running garbage collection.""" - if self.t2vis_model is not None: + if self.model is not None: bt.logging.info( "Deleting previous text-to-image or text-to-video model, " "freeing memory" ) - del self.t2vis_model - self.t2vis_model = None + del self.model + self.model = None gc.collect() torch.cuda.empty_cache() diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index 867f6609..a7d630b2 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -141,7 +141,7 @@ "from_pretrained_args": { "use_safetensors": True, "torch_dtype": torch.float16, - } + }, } } T2I_MODEL_NAMES: List[str] = list(T2I_MODELS.keys()) @@ -154,6 +154,12 @@ "use_safetensors": True, "torch_dtype": torch.float16, "variant": "fp16" + }, + "generate_args": { + "guidance_scale": 7.5, + "num_inference_steps": 50, + "strength": 0.99, + "generator": torch.Generator("cuda" if torch.cuda.is_available() else "cpu"), } } } @@ -213,27 +219,34 @@ T2V_MODEL_NAMES: List[str] = list(T2V_MODELS.keys()) # Combined model configurations -T2VIS_MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS} -T2VIS_MODEL_NAMES: List[str] = list(T2VIS_MODELS.keys()) +MODELS: Dict[str, Dict[str, Any]] = {**T2I_MODELS, **I2I_MODELS, **T2V_MODELS} +MODEL_NAMES: List[str] = list(MODELS.keys()) def get_modality(model_name): - if model_name in T2V_MODEL_NAMES: + if model_name in T2V_MODEL_NAMES: return 'video' + elif model_name in T2I_MODEL_NAMES + I2I_MODEL_NAMES: + return 'image' + + +def get_task(model_name): + if model_name in T2V_MODEL_NAMES: + return 't2v' elif model_name in T2I_MODEL_NAMES: - return 'image' + return 't2i' elif model_name in I2I_MODEL_NAMES: return 'i2i' -def select_random_t2vis_model(modality: Optional[str] = None) -> str: +def select_random_model(task: Optional[str] = None) -> str: """ Select a random text-to-image or text-to-video model based on the specified modality. Args: - modality: The type of model to select ('image', 'video', or 'random'). - If None or 'random', randomly chooses between image and video. + modality: The type of model to select ('t2v', 't2i', 'i2i', or 'random'). + If None or 'random', randomly chooses between the valid options Returns: The name of the selected model. @@ -241,22 +254,15 @@ def select_random_t2vis_model(modality: Optional[str] = None) -> str: Raises: NotImplementedError: If the specified modality is not supported. """ - if modality is None or modality == 'random': - modality = np.random.choice(['image', 'video']) + if task is None or task == 'random': + task = np.random.choice(['t2i', 'i2i', 't2v']) - if modality == 'image': + if task == 't2i': return np.random.choice(T2I_MODEL_NAMES) - elif modality == 'video': + elif task == 't2v': return np.random.choice(T2V_MODEL_NAMES) + elif task == 'i2i': + return np.random.choice(I2I_MODEL_NAMES) else: - raise NotImplementedError(f"Unsupported modality: {modality}") - - -def select_random_i2i_model() -> str: - """ - Select a random image-to-image inpainting model. + raise NotImplementedError(f"Unsupported task: {task}") - Returns: - The name of the selected model. - """ - return np.random.choice(I2I_MODEL_NAMES) From 694535d777730cf96660adc85da121c8d5730da4 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Mon, 6 Jan 2025 23:14:40 +0900 Subject: [PATCH 02/11] minor simplification of forward flow --- bitmind/validator/forward.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py index 9b242b9b..d6801556 100644 --- a/bitmind/validator/forward.py +++ b/bitmind/validator/forward.py @@ -58,22 +58,21 @@ async def forward(self): challenge_metadata['modality'] = modality bt.logging.info(f"Sampling data from {modality} cache") - + cache = self.media_cache[CHALLENGE_TYPE[label]][modality] + if modality == 'video': - cache = self.media_cache[CHALLENGE_TYPE[label]][modality] num_frames = random.randint( self.config.neuron.clip_frames_min, self.config.neuron.clip_frames_max) challenge = cache.sample(num_frames, min_fps=8, max_fps=30) + elif modality == 'image': - if label == 1: # synthetic - # 20% chance to use i2i (in-painting + if label == 1: + # 20% chance to use i2i (in-painting) synthetic_type = 'i2i' if np.random.rand() < 0.2 else 't2i' - cache = self.media_cache[CHALLENGE_TYPE[label]][modality][synthetic_type] challenge_metadata['synthetic_type'] = synthetic_type - bt.logging.info(f"Using {synthetic_type} image cache") - else: # real - cache = self.media_cache[CHALLENGE_TYPE[label]][modality] + cache = cache[synthetic_type] + challenge = cache.sample() if challenge is None: From 13348ff872cfe6fa3f86564d3d572896c00362fb Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 05:57:49 +0900 Subject: [PATCH 03/11] simplifying forward flow --- bitmind/validator/forward.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py index d6801556..e2a3188c 100644 --- a/bitmind/validator/forward.py +++ b/bitmind/validator/forward.py @@ -31,6 +31,19 @@ from bitmind.validator.config import CHALLENGE_TYPE, MAINNET_UID, TARGET_IMAGE_SIZE from bitmind.validator.reward import get_rewards +def determine_challenge_type(fake_prob=0.5): + modality = 'video' if np.random.rand() > 0.5 else 'image' + label = 0 if np.random.rand() > fake_prob else 1 + task = None + if label == 1: + if modality == 'video': + task = 't2v' + elif modality == 'image': + # 20% chance to use i2i (in-painting) + task = 'i2i' if np.random.rand() < 0.2 else 't2i' + + return label, modality, task + async def forward(self): """ @@ -52,10 +65,10 @@ async def forward(self): challenge_metadata = {} # for bookkeeping challenge = {} # for querying miners - modality = 'video' if np.random.rand() > 0.5 else 'image' - label = 0 if np.random.rand() > self._fake_prob else 1 + label, modality, source_model_task = determine_challenge_type() challenge_metadata['label'] = label challenge_metadata['modality'] = modality + challenge_metadata['source_model_task'] = source_model_task bt.logging.info(f"Sampling data from {modality} cache") cache = self.media_cache[CHALLENGE_TYPE[label]][modality] @@ -67,12 +80,6 @@ async def forward(self): challenge = cache.sample(num_frames, min_fps=8, max_fps=30) elif modality == 'image': - if label == 1: - # 20% chance to use i2i (in-painting) - synthetic_type = 'i2i' if np.random.rand() < 0.2 else 't2i' - challenge_metadata['synthetic_type'] = synthetic_type - cache = cache[synthetic_type] - challenge = cache.sample() if challenge is None: From 2c4c8bdb9dca96a0d88d7dd5b269f43b97c8e92f Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 05:58:46 +0900 Subject: [PATCH 04/11] standardizing cache structures with the introduction of task type subdirs --- bitmind/validator/config.py | 14 +++++++++----- neurons/validator.py | 16 ++++++++-------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index a7d630b2..91b0d574 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -30,15 +30,19 @@ # Cache directories HUGGINGFACE_CACHE_DIR: Path = Path.home() / '.cache' / 'huggingface' SN34_CACHE_DIR: Path = Path.home() / '.cache' / 'sn34' +SN34_CACHE_DIR.mkdir(parents=True, exist_ok=True) + +VALIDATOR_INFO_PATH: Path = SN34_CACHE_DIR / 'validator.yaml' + REAL_CACHE_DIR: Path = SN34_CACHE_DIR / 'real' SYNTH_CACHE_DIR: Path = SN34_CACHE_DIR / 'synthetic' + REAL_VIDEO_CACHE_DIR: Path = REAL_CACHE_DIR / 'video' REAL_IMAGE_CACHE_DIR: Path = REAL_CACHE_DIR / 'image' -SYNTH_VIDEO_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'video' -SYNTH_IMAGE_T2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'image' / 't2i' -SYNTH_IMAGE_I2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'image' / 'i2i' -VALIDATOR_INFO_PATH: Path = SN34_CACHE_DIR / 'validator.yaml' -SN34_CACHE_DIR.mkdir(parents=True, exist_ok=True) + +T2V_CACHE_DIR: Path = SYNTH_CACHE_DIR / 't2v' +T2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 't2i' +I2I_CACHE_DIR: Path = SYNTH_CACHE_DIR / 'i2i' # Update intervals in hours VIDEO_ZIP_CACHE_UPDATE_INTERVAL = 3 diff --git a/neurons/validator.py b/neurons/validator.py index ff6797d5..d144a441 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -33,14 +33,12 @@ MAINNET_UID, MAINNET_WANDB_PROJECT, TESTNET_WANDB_PROJECT, - IMAGE_DATASETS, - VIDEO_DATASETS, WANDB_ENTITY, REAL_VIDEO_CACHE_DIR, REAL_IMAGE_CACHE_DIR, - SYNTH_IMAGE_T2I_CACHE_DIR, - SYNTH_IMAGE_I2I_CACHE_DIR, - SYNTH_VIDEO_CACHE_DIR, + T2I_CACHE_DIR, + I2I_CACHE_DIR, + T2V_CACHE_DIR, VALIDATOR_INFO_PATH ) @@ -76,10 +74,12 @@ def __init__(self, config=None): # synthetic media caches are populated by the SyntheticDataGenerator process (started by start_validator.sh) self.synthetic_media_cache = { 'image': { - 't2i': ImageCache(SYNTH_IMAGE_T2I_CACHE_DIR), - 'i2i': ImageCache(SYNTH_IMAGE_I2I_CACHE_DIR) + 't2i': ImageCache(T2I_CACHE_DIR), + 'i2i': ImageCache(I2I_CACHE_DIR) }, - 'video': VideoCache(SYNTH_VIDEO_CACHE_DIR) + 'video': { + 't2v': VideoCache(T2V_CACHE_DIR) + } } self.media_cache = { From ca38347bd2125108295cc6490068a5d64e91e5fd Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 05:59:02 +0900 Subject: [PATCH 05/11] adding i2i models to batch generation --- .../synthetic_data_generator.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 3b5c72ab..4c487756 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -6,6 +6,7 @@ import warnings from pathlib import Path from typing import Dict, Optional, Any, Union +from itertools import zip_longest import bittensor as bt import numpy as np @@ -21,6 +22,7 @@ MODEL_NAMES, T2V_MODEL_NAMES, T2I_MODEL_NAMES, + I2I_MODEL_NAMES, TARGET_IMAGE_SIZE, select_random_model, get_task, @@ -137,12 +139,19 @@ def batch_generate(self, batch_size: int = 5) -> None: prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1)) bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}") - # shuffle and interleave models + # shuffle and interleave models to add stochasticity to initial validator challenges + i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES)) t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES)) t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES)) - model_names = [m for pair in zip(t2v_model_names, t2i_model_names) for m in pair] - for model_name in model_names: + model_names_interleaved = [ + m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) + for m in triple if m is not None + ] + + # for each model, generate an image/video from the prompt generated for its specific tokenizer max len + for model_name in model_names_interleaved: modality = get_modality(model_name) + task = get_task(model_name) for i, prompt in enumerate(prompts): bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}") @@ -150,7 +159,7 @@ def batch_generate(self, batch_size: int = 5) -> None: output = self._run_generation(prompt, model_name=model_name) bt.logging.info(f'Writing to cache {self.output_dir}') - base_path = self.output_dir / modality / str(output['time']) + base_path = self.output_dir / modality / task / str(output['time']) metadata = {k: v for k, v in output.items() if k != 'gen_output'} base_path.with_suffix('.json').write_text(json.dumps(metadata)) From caabd93e066d28bcfe5403054df0995c172391db Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 05:59:41 +0900 Subject: [PATCH 06/11] removing depracted InPaintingGenerator from run script --- bitmind/validator/scripts/run_data_generator.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py index 90865ec6..fd16c309 100644 --- a/bitmind/validator/scripts/run_data_generator.py +++ b/bitmind/validator/scripts/run_data_generator.py @@ -48,20 +48,12 @@ image_cache=image_cache, output_dir=args.t2i_output_dir) - ipg = InPaintingGenerator( - use_random_i2i_model=True, - device=args.device, - image_cache=image_cache, - output_dir=args.i2i_output_dir) - bt.logging.info("Starting data generator service") sdg.batch_generate(batch_size=1) - ipg.batch_generate(batch_size=1) while True: try: sdg.batch_generate(batch_size=args.batch_size) - ipg.batch_generate(batch_size=args.batch_size) except Exception as e: bt.logging.error(f"Error in batch generation: {str(e)}") time.sleep(5) From ba47e0c268b06f382f622316a15c5ff2a80e02e7 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 06:41:50 +0900 Subject: [PATCH 07/11] adding --clear-cache option for validator --- run_neuron.py | 7 +++++-- start_validator.sh | 13 +++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/run_neuron.py b/run_neuron.py index 8105cb6d..7904b256 100644 --- a/run_neuron.py +++ b/run_neuron.py @@ -61,6 +61,7 @@ def run_auto_update_self_heal(neuron_type, auto_update, self_heal): parser.add_argument("--miner", action="store_true") parser.add_argument("--no-self-heal", action="store_true", help="Disable the automatic restart of the PM2 process") parser.add_argument("--no-auto-update", action="store_true", help="Disable the automatic update of the local repository") + parser.add_argument("--clear-cache", action="store_true", help="Clear the cache before starting validator") args = parser.parse_args() if not (args.miner ^ args.validator): @@ -69,11 +70,13 @@ def run_auto_update_self_heal(neuron_type, auto_update, self_heal): neuron_type = 'miner' if args.miner else 'validator' - os.system(f"./start_{neuron_type}.sh") + if args.clear_cache and args.validator: + os.system(f"./start_{neuron_type}.sh --clear-cache") + else: + os.system(f"./start_{neuron_type}.sh") if not args.no_auto_update or not args.no_self_heal: run_auto_update_self_heal( neuron_type, auto_update=not args.no_auto_update, self_heal=not args.no_self_heal) - diff --git a/start_validator.sh b/start_validator.sh index 9ca0d22f..0b49e774 100755 --- a/start_validator.sh +++ b/start_validator.sh @@ -12,6 +12,19 @@ VALIDATOR_PROCESS_NAME="bitmind_validator" DATA_GEN_PROCESS_NAME="bitmind_data_generator" CACHE_UPDATE_PROCESS_NAME="bitmind_cache_updater" +# Clear cache if specified +while [[ $# -gt 0 ]]; do + case $1 in + --clear-cache) + rm -rf ~/.cache/sn34 + shift + ;; + *) + shift + ;; + esac +done + # Login to Weights & Biases if ! wandb login $WANDB_API_KEY; then echo "Failed to login to Weights & Biases with the provided API key." From 4ef2b4b46eca6598997c29185cfbb89884224ff7 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 08:47:19 +0000 Subject: [PATCH 08/11] updating SDG init params --- bitmind/synthetic_data_generation/__init__.py | 1 - bitmind/validator/scripts/run_data_generator.py | 2 +- bitmind/validator/verify_models.py | 13 +++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bitmind/synthetic_data_generation/__init__.py b/bitmind/synthetic_data_generation/__init__.py index 94c360d3..5a2682ac 100644 --- a/bitmind/synthetic_data_generation/__init__.py +++ b/bitmind/synthetic_data_generation/__init__.py @@ -1,3 +1,2 @@ from .synthetic_data_generator import SyntheticDataGenerator -from .in_painting_generator import InPaintingGenerator from .prompt_generator import PromptGenerator diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py index fd16c309..31fda239 100644 --- a/bitmind/validator/scripts/run_data_generator.py +++ b/bitmind/validator/scripts/run_data_generator.py @@ -43,7 +43,7 @@ sdg = SyntheticDataGenerator( prompt_type='annotation', - use_random_t2vis_model=True, + use_random_model=True, device=args.device, image_cache=image_cache, output_dir=args.t2i_output_dir) diff --git a/bitmind/validator/verify_models.py b/bitmind/validator/verify_models.py index 278a0ff3..a16b7123 100644 --- a/bitmind/validator/verify_models.py +++ b/bitmind/validator/verify_models.py @@ -1,6 +1,6 @@ import os from bitmind.synthetic_data_generation import SyntheticDataGenerator -from bitmind.validator.config import T2VIS_MODEL_NAMES as MODEL_NAMES, IMAGE_ANNOTATION_MODEL, TEXT_MODERATION_MODEL +from bitmind.validator.config import MODEL_NAMES, IMAGE_ANNOTATION_MODEL, TEXT_MODERATION_MODEL import bittensor as bt @@ -40,7 +40,8 @@ def main(): bt.logging.info("Verifying validator model downloads....") synthetic_image_generator = SyntheticDataGenerator( prompt_type='annotation', - use_random_t2vis_model=True + image_cache='test', + use_random_model=True ) # Check and load annotation and moderation models if not cached @@ -52,11 +53,11 @@ def main(): for model_name in MODEL_NAMES: if not is_model_cached(model_name): synthetic_image_generator = SyntheticDataGenerator( - prompt_type='annotation', - use_random_t2vis_model=False, - t2vis_model_name=model_name + prompt_type=None, + use_random_model=False, + model_name=model_name ) - synthetic_image_generator.load_t2vis_model(model_name) + synthetic_image_generator.load_model(model_name) synthetic_image_generator.clear_gpu() From 935fbf6b678380156c920ac9c0f47f34401b36b8 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 09:40:12 +0000 Subject: [PATCH 09/11] fixing last imports + directory structure references --- .../synthetic_data_generator.py | 6 +++--- bitmind/utils/mock.py | 2 +- bitmind/validator/forward.py | 11 ++++++----- bitmind/validator/scripts/run_data_generator.py | 10 +++------- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 4c487756..463b3a20 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -120,9 +120,9 @@ def __init__( self.output_dir = Path(output_dir) if output_dir else None if self.output_dir: - (self.output_dir / "video").mkdir(parents=True, exist_ok=True) - (self.output_dir / "image").mkdir(parents=True, exist_ok=True) - + (self.output_dir / "t2v").mkdir(parents=True, exist_ok=True) + (self.output_dir / "t2i").mkdir(parents=True, exist_ok=True) + (self.output_dir / "i2i").mkdir(parents=True, exist_ok=True) def batch_generate(self, batch_size: int = 5) -> None: """ diff --git a/bitmind/utils/mock.py b/bitmind/utils/mock.py index bfb6639a..8fae787f 100644 --- a/bitmind/utils/mock.py +++ b/bitmind/utils/mock.py @@ -6,7 +6,7 @@ from typing import List from PIL import Image -from bitmind.validator.config import T2VIS_MODEL_NAMES as MODEL_NAMES +from bitmind.validator.config import MODEL_NAMES from bitmind.validator.miner_performance_tracker import MinerPerformanceTracker diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py index e2a3188c..82b6aaf0 100644 --- a/bitmind/validator/forward.py +++ b/bitmind/validator/forward.py @@ -31,9 +31,11 @@ from bitmind.validator.config import CHALLENGE_TYPE, MAINNET_UID, TARGET_IMAGE_SIZE from bitmind.validator.reward import get_rewards -def determine_challenge_type(fake_prob=0.5): + +def determine_challenge_type(media_cache, fake_prob=0.5): modality = 'video' if np.random.rand() > 0.5 else 'image' label = 0 if np.random.rand() > fake_prob else 1 + cache = media_cache[CHALLENGE_TYPE[label]][modality] task = None if label == 1: if modality == 'video': @@ -41,8 +43,8 @@ def determine_challenge_type(fake_prob=0.5): elif modality == 'image': # 20% chance to use i2i (in-painting) task = 'i2i' if np.random.rand() < 0.2 else 't2i' - - return label, modality, task + cache = cache[task] + return label, modality, task, cache async def forward(self): @@ -65,13 +67,12 @@ async def forward(self): challenge_metadata = {} # for bookkeeping challenge = {} # for querying miners - label, modality, source_model_task = determine_challenge_type() + label, modality, source_model_task, cache = determine_challenge_type(self.media_cache) challenge_metadata['label'] = label challenge_metadata['modality'] = modality challenge_metadata['source_model_task'] = source_model_task bt.logging.info(f"Sampling data from {modality} cache") - cache = self.media_cache[CHALLENGE_TYPE[label]][modality] if modality == 'video': num_frames = random.randint( diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py index 31fda239..f9a9b213 100644 --- a/bitmind/validator/scripts/run_data_generator.py +++ b/bitmind/validator/scripts/run_data_generator.py @@ -5,12 +5,10 @@ from bitmind.validator.scripts.util import load_validator_info, init_wandb_run from bitmind.synthetic_data_generation import SyntheticDataGenerator -from bitmind.synthetic_data_generation import InPaintingGenerator from bitmind.validator.cache import ImageCache from bitmind.validator.config import ( REAL_IMAGE_CACHE_DIR, - SYNTH_IMAGE_T2I_CACHE_DIR, - SYNTH_IMAGE_I2I_CACHE_DIR + SYNTH_CACHE_DIR ) @@ -19,9 +17,7 @@ parser = argparse.ArgumentParser() parser.add_argument('--image-cache-dir', type=str, default=REAL_IMAGE_CACHE_DIR, help='Directory containing real images to use as reference') - parser.add_argument('--t2i-output-dir', type=str, default=SYNTH_IMAGE_T2I_CACHE_DIR, - help='Directory to save generated synthetic data') - parser.add_argument('--i2i-output-dir', type=str, default=SYNTH_IMAGE_I2I_CACHE_DIR, + parser.add_argument('--output-dir', type=str, default=SYNTH_CACHE_DIR, help='Directory to save generated inpainting data') parser.add_argument('--device', type=str, default='cuda', help='Device to run generation on (cuda/cpu)') @@ -46,7 +42,7 @@ use_random_model=True, device=args.device, image_cache=image_cache, - output_dir=args.t2i_output_dir) + output_dir=args.output_dir) bt.logging.info("Starting data generator service") sdg.batch_generate(batch_size=1) From a97c4a1d732f3c70b09882e89af16e1e34bd0233 Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 14:15:53 +0000 Subject: [PATCH 10/11] fixing images passed to generate function for i2i --- bitmind/synthetic_data_generation/image_utils.py | 4 ++-- .../synthetic_data_generator.py | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/bitmind/synthetic_data_generation/image_utils.py b/bitmind/synthetic_data_generation/image_utils.py index ab16dbb9..c8345d57 100644 --- a/bitmind/synthetic_data_generation/image_utils.py +++ b/bitmind/synthetic_data_generation/image_utils.py @@ -62,7 +62,7 @@ def save_images_to_disk(image_dataset, start_index, num_images, save_directory, print(f"Failed to save image {i}: {e}") -def create_random_mask(self, size: Tuple[int, int]) -> Image.Image: +def create_random_mask(size: Tuple[int, int]) -> Image.Image: """ Create a random mask for i2i transformation. """ @@ -106,4 +106,4 @@ def create_random_mask(self, size: Tuple[int, int]) -> Image.Image: fill=(255, 255, 255, opacity) ) - return mask \ No newline at end of file + return mask diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 463b3a20..b5d90fb0 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -132,9 +132,11 @@ def batch_generate(self, batch_size: int = 5) -> None: batch_size: Number of prompts to generate in each batch. """ prompts = [] + images = [] bt.logging.info(f"Generating {batch_size} prompts") for i in range(batch_size): image_sample = self.image_cache.sample() + images.append(image_sample['image']) bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}") prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1)) bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}") @@ -156,10 +158,10 @@ def batch_generate(self, batch_size: int = 5) -> None: bt.logging.info(f"Started generation {i+1}/{batch_size} | Model: {model_name} | Prompt: {prompt}") # Generate image/video from current model and prompt - output = self._run_generation(prompt, model_name=model_name) + output = self._run_generation(prompt, task=task, model_name=model_name, image=images[i]) bt.logging.info(f'Writing to cache {self.output_dir}') - base_path = self.output_dir / modality / task / str(output['time']) + base_path = self.output_dir / task / str(output['time']) metadata = {k: v for k, v in output.items() if k != 'gen_output'} base_path.with_suffix('.json').write_text(json.dumps(metadata)) @@ -179,7 +181,7 @@ def batch_generate(self, batch_size: int = 5) -> None: def generate( self, image: Optional[Image.Image] = None, - modality: str = 'image', + task: Optional[str] = None, model_name: Optional[str] = None ) -> Dict[str, Any]: """ @@ -198,7 +200,7 @@ def generate( """ prompt = self.generate_prompt(image, clear_gpu=True) bt.logging.info("Generating synthetic data...") - gen_data = self._run_generation(prompt, modality, model_name) + gen_data = self._run_generation(prompt, task, model_name, image) self.clear_gpu() return gen_data From 93461d2b1a7d3e20e1a972653ce6c3a7fa9b329a Mon Sep 17 00:00:00 2001 From: Dylan Uys Date: Tue, 7 Jan 2025 19:25:55 +0000 Subject: [PATCH 11/11] option to log masks/original images for i2i challenges --- .../synthetic_data_generation/synthetic_data_generator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index b5d90fb0..8899a5f5 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -162,7 +162,7 @@ def batch_generate(self, batch_size: int = 5) -> None: bt.logging.info(f'Writing to cache {self.output_dir}') base_path = self.output_dir / task / str(output['time']) - metadata = {k: v for k, v in output.items() if k != 'gen_output'} + metadata = {k: v for k, v in output.items() if k != 'gen_output' and 'image' not in k} base_path.with_suffix('.json').write_text(json.dumps(metadata)) if modality == 'image': @@ -333,7 +333,9 @@ def _run_generation( 'gen_output': gen_output, # image or video 'time': time.time(), 'model_name': self.model_name, - 'gen_time': gen_time + 'gen_time': gen_time, + 'mask_image': gen_args.get('mask_image', None), + 'image': gen_args.get('image', None) } def load_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None: