diff --git a/bitmind/__init__.py b/bitmind/__init__.py index da534563..bb041705 100644 --- a/bitmind/__init__.py +++ b/bitmind/__init__.py @@ -18,7 +18,7 @@ # DEALINGS IN THE SOFTWARE. -__version__ = "2.1.3" +__version__ = "2.1.4" version_split = __version__.split(".") __spec_version__ = ( (1000 * int(version_split[0])) diff --git a/bitmind/synthetic_data_generation/image_utils.py b/bitmind/synthetic_data_generation/image_utils.py index c8345d57..4593edb4 100644 --- a/bitmind/synthetic_data_generation/image_utils.py +++ b/bitmind/synthetic_data_generation/image_utils.py @@ -75,35 +75,35 @@ def create_random_mask(size: Tuple[int, int]) -> Image.Image: height = np.random.randint(h//4, h//2) # Center the rectangle with some random offset - x1 = (w - width) // 2 + np.random.randint(-width//4, width//4) - y1 = (h - height) // 2 + np.random.randint(-height//4, height//4) + x = (w - width) // 2 + np.random.randint(-width//4, width//4) + y = (h - height) // 2 + np.random.randint(-height//4, height//4) # Create mask with PIL draw for smoother edges draw = ImageDraw.Draw(mask) draw.rounded_rectangle( - [x1, y1, x1 + width, y1 + height], + [x, y, x + width, y + height], radius=min(width, height) // 10, # Smooth corners fill='white' ) else: # Circular mask with feathered edges draw = ImageDraw.Draw(mask) - center_x = w//2 - center_y = h//2 + x = w//2 + y = h//2 # Make radius proportional to image size radius = min(w, h) // 4 # Add small random offset to center - center_x += np.random.randint(-radius//4, radius//4) - center_y += np.random.randint(-radius//4, radius//4) + x += np.random.randint(-radius//4, radius//4) + y += np.random.randint(-radius//4, radius//4) # Draw multiple circles with decreasing opacity for feathered edge for r in range(radius, radius-10, -1): opacity = int(255 * (r - (radius-10)) / 10) draw.ellipse( - [center_x-r, center_y-r, center_x+r, center_y+r], + [x-r, y-r, x+r, y+r], fill=(255, 255, 255, opacity) ) - return mask + return mask, (x, y) diff --git a/bitmind/synthetic_data_generation/prompt_generator.py b/bitmind/synthetic_data_generation/prompt_generator.py index a8083b87..a8b8bc09 100644 --- a/bitmind/synthetic_data_generation/prompt_generator.py +++ b/bitmind/synthetic_data_generation/prompt_generator.py @@ -185,8 +185,7 @@ def generate( description += '.' moderated_description = self.moderate(description) - enhanced_description = self.enhance(description) - return enhanced_description + return self.enhance(moderated_description) def moderate(self, description: str, max_new_tokens: int = 80) -> str: """ diff --git a/bitmind/synthetic_data_generation/prompt_utils.py b/bitmind/synthetic_data_generation/prompt_utils.py index 7c5ce81e..f2407f88 100644 --- a/bitmind/synthetic_data_generation/prompt_utils.py +++ b/bitmind/synthetic_data_generation/prompt_utils.py @@ -1,20 +1,23 @@ - - def get_tokenizer_with_min_len(model): """ - Returns the tokenizer with the smallest maximum token length from the 't2vis_model` object. - - If a second tokenizer exists, it compares both and returns the one with the smaller - maximum token length. Otherwise, it returns the available tokenizer. + Returns the tokenizer with the smallest maximum token length. + Args: + model: Single pipeline or dict of pipeline stages. + Returns: - tuple: A tuple containing the tokenizer and its maximum token length. + tuple: (tokenizer, max_token_length) """ - # Check if a second tokenizer is available in the t2vis_model - if hasattr(model, 'tokenizer_2'): - if model.tokenizer.model_max_length > model.tokenizer_2.model_max_length: - return model.tokenizer_2, model.tokenizer_2.model_max_length - return model.tokenizer, model.tokenizer.model_max_length + # Get the model to check for tokenizers + pipeline = model['stage1'] if isinstance(model, dict) else model + + # If model has two tokenizers, return the one with smaller max length + if hasattr(pipeline, 'tokenizer_2'): + len_1 = pipeline.tokenizer.model_max_length + len_2 = pipeline.tokenizer_2.model_max_length + return (pipeline.tokenizer_2, len_2) if len_2 < len_1 else (pipeline.tokenizer, len_1) + + return pipeline.tokenizer, pipeline.tokenizer.model_max_length def truncate_prompt_if_too_long(prompt: str, model): diff --git a/bitmind/synthetic_data_generation/synthetic_data_generator.py b/bitmind/synthetic_data_generation/synthetic_data_generator.py index 7bce9d7a..fd4e1cd3 100644 --- a/bitmind/synthetic_data_generation/synthetic_data_generator.py +++ b/bitmind/synthetic_data_generation/synthetic_data_generator.py @@ -33,6 +33,13 @@ from bitmind.synthetic_data_generation.prompt_utils import truncate_prompt_if_too_long from bitmind.synthetic_data_generation.prompt_generator import PromptGenerator from bitmind.validator.cache import ImageCache +from bitmind.validator.model_utils import ( + load_hunyuanvideo_transformer, + load_annimatediff_motion_adapter, + JanusWrapper, + create_pipeline_generator, + enable_model_optimizations +) future_warning_modules_to_ignore = [ @@ -135,24 +142,30 @@ def batch_generate(self, batch_size: int = 5) -> None: prompts = [] images = [] bt.logging.info(f"Generating {batch_size} prompts") + + # Generate all prompts first for i in range(batch_size): image_sample = self.image_cache.sample() images.append(image_sample['image']) bt.logging.info(f"Sampled image {i+1}/{batch_size} for captioning: {image_sample['path']}") prompts.append(self.generate_prompt(image=image_sample['image'], clear_gpu=i==batch_size-1)) bt.logging.info(f"Caption {i+1}/{batch_size} generated: {prompts[-1]}") - - # shuffle and interleave models to add stochasticity to initial validator challenges - i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES)) - t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES)) - t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES)) - model_names_interleaved = [ - m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) - for m in triple if m is not None - ] - - # for each model, generate an image/video from the prompt generated for its specific tokenizer max len - for model_name in model_names_interleaved: + + # If specific model is set, use only that model + if not self.use_random_model and self.model_name: + model_names = [self.model_name] + else: + # shuffle and interleave models to add stochasticity + i2i_model_names = random.sample(I2I_MODEL_NAMES, len(I2I_MODEL_NAMES)) + t2i_model_names = random.sample(T2I_MODEL_NAMES, len(T2I_MODEL_NAMES)) + t2v_model_names = random.sample(T2V_MODEL_NAMES, len(T2V_MODEL_NAMES)) + model_names = [ + m for triple in zip_longest(t2v_model_names, t2i_model_names, i2i_model_names) + for m in triple if m is not None + ] + + # Generate for each model/prompt combination + for model_name in model_names: modality = get_modality(model_name) task = get_task(model_name) for i, prompt in enumerate(prompts): @@ -232,7 +245,6 @@ def _run_generation( model_name: Optional[str] = None, image: Optional[Image.Image] = None, generate_at_target_size: bool = False, - ) -> Dict[str, Any]: """ Generate synthetic data based on a text prompt. @@ -256,6 +268,7 @@ def _run_generation( bt.logging.info("Preparing generation arguments") gen_args = model_config.get('generate_args', {}).copy() + mask_center = None # prep inpainting-specific generation args if task == 'i2i': @@ -264,7 +277,7 @@ def _run_generation( if image.size[0] > target_size[0] or image.size[1] > target_size[1]: image = image.resize(target_size, Image.Resampling.LANCZOS) - gen_args['mask_image'] = create_random_mask(image.size) + gen_args['mask_image'], mask_center = create_random_mask(image.size) gen_args['image'] = image # Prepare generation arguments @@ -284,28 +297,24 @@ def _run_generation( gen_args['width'] = gen_args['resolution'][1] del gen_args['resolution'] - truncated_prompt = truncate_prompt_if_too_long( - prompt, - self.model - ) - + truncated_prompt = truncate_prompt_if_too_long(prompt, self.model) bt.logging.info(f"Generating media from prompt: {truncated_prompt}") bt.logging.info(f"Generation args: {gen_args}") start_time = time.time() + + # Create pipeline-specific generator + generate = create_pipeline_generator(model_config, self.model) + + # Handle autocast if needed if model_config.get('use_autocast', True): pretrained_args = model_config.get('from_pretrained_args', {}) torch_dtype = pretrained_args.get('torch_dtype', torch.bfloat16) - with torch.autocast(self.device, torch_dtype, cache_enabled=False): - gen_output = self.model( - prompt=truncated_prompt, - **gen_args - ) + with torch.autocast(self.device, torch_dtype, cache_enabled=False): + gen_output = generate(truncated_prompt, **gen_args) else: - gen_output = self.model( - prompt=truncated_prompt, - **gen_args - ) + gen_output = generate(truncated_prompt, **gen_args) + gen_time = time.time() - start_time except Exception as e: @@ -338,78 +347,90 @@ def _run_generation( 'model_name': self.model_name, 'gen_time': gen_time, 'mask_image': gen_args.get('mask_image', None), + 'mask_center': mask_center, 'image': gen_args.get('image', None) } def load_model(self, model_name: Optional[str] = None, modality: Optional[str] = None) -> None: - """Load a Hugging Face text-to-image or text-to-video model to a specific GPU.""" + """Load a Hugging Face text-to-image or text-to-video model.""" if model_name is not None: self.model_name = model_name elif self.use_random_model or model_name == 'random': - model_name = select_random_model(modality) - self.model_name = model_name + self.model_name = select_random_model(modality) bt.logging.info(f"Loading {self.model_name}") + + model_config = MODELS[self.model_name] + pipeline_cls = model_config['pipeline_cls'] + pipeline_args = model_config['from_pretrained_args'].copy() - pipeline_cls = MODELS[model_name]['pipeline_cls'] - pipeline_args = MODELS[model_name]['from_pretrained_args'] + # Handle custom loading functions passed as tuples for k, v in pipeline_args.items(): if isinstance(v, tuple) and callable(v[0]): pipeline_args[k] = v[0](**v[1]) - if 'model_id' in pipeline_args: - model_id = pipeline_args['model_id'] - del pipeline_args['model_id'] - else: - model_id = model_name + # Get model_id if specified, otherwise use model_name + model_id = pipeline_args.pop('model_id', self.model_name) + + # Handle multi-stage pipeline + if isinstance(pipeline_cls, dict): + self.model = {} + for stage_name, stage_cls in pipeline_cls.items(): + stage_args = pipeline_args.get(stage_name, {}) + base_model = stage_args.get('base', model_id) + stage_args_filtered = {k:v for k,v in stage_args.items() if k != 'base'} + + bt.logging.info(f"Loading {stage_name} from {base_model}") + self.model[stage_name] = stage_cls.from_pretrained( + base_model, + cache_dir=HUGGINGFACE_CACHE_DIR, + **stage_args_filtered, + add_watermarker=False + ) + + enable_model_optimizations( + model=self.model[stage_name], + device=self.device, + enable_cpu_offload=model_config.get('enable_model_cpu_offload', False), + enable_sequential_cpu_offload=model_config.get('enable_sequential_cpu_offload', False), + enable_vae_slicing=model_config.get('vae_enable_slicing', False), + enable_vae_tiling=model_config.get('vae_enable_tiling', False), + stage_name=stage_name + ) - self.model = pipeline_cls.from_pretrained( - model_id, - cache_dir=HUGGINGFACE_CACHE_DIR, - **pipeline_args, - add_watermarker=False - ) + # Disable watermarker + self.model[stage_name].watermarker = None + else: + # Single-stage pipeline + self.model = pipeline_cls.from_pretrained( + model_id, + cache_dir=HUGGINGFACE_CACHE_DIR, + **pipeline_args, + add_watermarker=False + ) - self.model.set_progress_bar_config(disable=True) + # Load scheduler if specified + if 'scheduler' in model_config: + sched_cls = model_config['scheduler']['cls'] + sched_args = model_config['scheduler']['from_config_args'] + self.model.scheduler = sched_cls.from_config( + self.model.scheduler.config, + **sched_args + ) - # Load scheduler if specified - if 'scheduler' in MODELS[model_name]: - sched_cls = MODELS[model_name]['scheduler']['cls'] - sched_args = MODELS[model_name]['scheduler']['from_config_args'] - self.model.scheduler = sched_cls.from_config( - self.model.scheduler.config, - **sched_args + enable_model_optimizations( + model=self.model, + device=self.device, + enable_cpu_offload=model_config.get('enable_model_cpu_offload', False), + enable_sequential_cpu_offload=model_config.get('enable_sequential_cpu_offload', False), + enable_vae_slicing=model_config.get('vae_enable_slicing', False), + enable_vae_tiling=model_config.get('vae_enable_tiling', False) ) - # Configure model optimizations - model_config = MODELS[model_name] - if model_config.get('enable_model_cpu_offload', False): - bt.logging.info(f"Enabling cpu offload for {model_name}") - self.model.enable_model_cpu_offload() - if model_config.get('enable_sequential_cpu_offload', False): - bt.logging.info(f"Enabling sequential cpu offload for {model_name}") - self.model.enable_sequential_cpu_offload() - if model_config.get('vae_enable_slicing', False): - bt.logging.info(f"Enabling vae slicing for {model_name}") - try: - self.model.vae.enable_slicing() - except Exception: - try: - self.model.enable_vae_slicing() - except Exception: - bt.logging.warning(f"Could not enable vae slicing for {self.model}") - if model_config.get('vae_enable_tiling', False): - bt.logging.info(f"Enabling vae tiling for {model_name}") - try: - self.model.vae.enable_tiling() - except Exception: - try: - self.model.enable_vae_tiling() - except Exception: - bt.logging.warning(f"Could not enable vae tiling for {self.model}") + # Disable watermarker + self.model.watermarker = None - self.model.to(self.device) - bt.logging.info(f"Loaded {model_name} using {pipeline_cls.__name__}.") + bt.logging.info(f"Loaded {self.model_name}") def clear_gpu(self) -> None: """Clear GPU memory by deleting models and running garbage collection.""" diff --git a/bitmind/synthetic_data_generation/test_in_painting.py b/bitmind/synthetic_data_generation/test_in_painting.py deleted file mode 100644 index ebb1e021..00000000 --- a/bitmind/synthetic_data_generation/test_in_painting.py +++ /dev/null @@ -1,152 +0,0 @@ -import argparse -from pathlib import Path -from PIL import Image -import bittensor as bt -import numpy as np - -from bitmind.synthetic_data_generation.in_painting_generator import InPaintingGenerator - -def create_inpaint_only_image(original: Image.Image, mask: Image.Image, result: Image.Image) -> Image.Image: - """Create an image showing only the inpainted region.""" - # Ensure all images are the same size as the result - size = result.size - original = original.resize(size, Image.Resampling.LANCZOS) - mask = mask.resize(size, Image.Resampling.LANCZOS) - - # Convert mask to boolean array (True where white/inpainted) - mask_array = np.array(mask.convert('L')) > 128 - - # Create blank (black) image - inpaint_only = Image.new('RGB', size, 'black') - - # Copy only the inpainted region - inpaint_array = np.array(result) - inpaint_only_array = np.array(inpaint_only) - inpaint_only_array[mask_array] = inpaint_array[mask_array] - - return Image.fromarray(inpaint_only_array) - - -def create_final_image(original: Image.Image, mask: Image.Image, inpainted: Image.Image) -> Image.Image: - """ - Create the final image by combining original and inpainted regions. - Only use inpainted content where the mask is white. - """ - # Ensure all images are in RGB mode - original = original.convert('RGB') - inpainted = inpainted.convert('RGB') - - # Ensure all images are the same size - size = original.size - mask = mask.resize(size, Image.Resampling.LANCZOS) - inpainted = inpainted.resize(size, Image.Resampling.LANCZOS) - - # Convert to numpy arrays with float32 for precise calculations - original_array = np.array(original, dtype=np.float32) - inpainted_array = np.array(inpainted, dtype=np.float32) - - # Create mask array (values between 0 and 1) - mask_array = np.array(mask.convert('L'), dtype=np.float32) / 255.0 - - # Expand mask dimensions to match RGB - mask_array = np.expand_dims(mask_array, axis=-1) - - # Blend images using the mask as alpha - result_array = (original_array * (1 - mask_array) + - inpainted_array * mask_array) - - # Convert back to uint8 with proper rounding - result_array = np.clip(np.round(result_array), 0, 255).astype(np.uint8) - - return Image.fromarray(result_array, mode='RGB') - - -def main(): - parser = argparse.ArgumentParser(description='Test InPainting Generator on a single image') - parser.add_argument('--image_path', type=str, required=True, help='Path to input image') - parser.add_argument('--output_dir', type=str, default='output', help='Directory to save outputs') - parser.add_argument('--custom_prompt', type=str, help='Optional custom prompt to use') - parser.add_argument('--device', type=str, default='cuda', help='Device to run on (cuda/cpu)') - args = parser.parse_args() - - # Create output directory - output_dir = Path(args.output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - - try: - # Load image - bt.logging.info(f"Loading image from {args.image_path}") - image = Image.open(args.image_path) - - # Initialize generator - generator = InPaintingGenerator( - use_random_i2i_model=True, - output_dir=output_dir, - device=args.device - ) - - # Generate transformation - result = generator.run_i2i( - prompt=args.custom_prompt if args.custom_prompt else generator.generate_prompt(image), - original_image=image - ) - - # Get final size from generated image - final_size = result['gen_output'].images[0].size - - # Save outputs - timestamp = str(int(result['time'])) - - # 1. Save the original image (resized to match output) - original_resized = image.resize(final_size, Image.Resampling.LANCZOS) - original_path = output_dir / f"1_original_{timestamp}.png" - original_resized.save(original_path) - bt.logging.info(f"Saved original image to {original_path}") - - # 2. Save the mask (resized to match output) - mask = generator.create_random_mask(final_size) - mask_path = output_dir / f"2_mask_{timestamp}.png" - mask.save(mask_path) - bt.logging.info(f"Saved mask to {mask_path}") - - # 3. Save the inpainted region only - inpaint_only = create_inpaint_only_image( - original_resized, - mask, - result['gen_output'].images[0] - ) - inpaint_path = output_dir / f"3_inpaint_only_{timestamp}.png" - inpaint_only.save(inpaint_path) - bt.logging.info(f"Saved inpainted region to {inpaint_path}") - - # 4. Save the final transformed image (properly combined) - final_image = create_final_image( - original_resized, - mask, - result['gen_output'].images[0] - ) - final_path = output_dir / f"4_final_{timestamp}.png" - final_image.save(final_path) - bt.logging.info(f"Saved final image to {final_path}") - - # Save the prompt/caption to a text file - caption_path = output_dir / f"caption_{timestamp}.txt" - with open(caption_path, 'w') as f: - f.write(f"Generated Prompt: {result['prompt']}\n") - if result['prompt'] != result['prompt_long']: - f.write(f"Full Prompt: {result['prompt_long']}\n") - f.write(f"\nGeneration Time: {result['gen_time']:.2f} seconds\n") - f.write(f"Model: {result['model_name']}\n") - bt.logging.info(f"Saved caption to {caption_path}") - - # Print generation info - bt.logging.info(f"Prompt used: {result['prompt']}") - bt.logging.info(f"Generation time: {result['gen_time']:.2f} seconds") - - except Exception as e: - bt.logging.error(f"Error during inpainting generation: {e}") - raise - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/bitmind/utils/image_transforms.py b/bitmind/utils/image_transforms.py index 7accca7c..5e79a9dd 100644 --- a/bitmind/utils/image_transforms.py +++ b/bitmind/utils/image_transforms.py @@ -18,15 +18,37 @@ def fn(img): class RandomResizedCropWithParams(transforms.RandomResizedCrop): - def __init__(self, *args, **kwargs): + def __init__(self, *args, include_point=None, **kwargs): super().__init__(*args, **kwargs) self.params = None + self.include_point = include_point + print(f"created RRC with point included: {self.include_point}") def forward(self, img, crop_params=None): + """ + Args: + img: PIL Image to be cropped and resized + crop_params: Optional pre-computed crop parameters (i, j, h, w) + """ if crop_params is None: i, j, h, w = super().get_params(img, self.scale, self.ratio) + if self.include_point is not None: + x, y = self.include_point + width, height = img.shape[1:] + + # adjust crop to keep mask point + if x < j: + j = max(0, x - 10) + elif x > j + w: + j = min(width - w, x - w + 10) + + if y < i: + i = max(0, y - 10) + elif y > i + h: + i = min(height - h, y - h + 10) else: i, j, h, w = crop_params + self.params = {'crop_params': (i, j, h, w)} return F.resized_crop(img, i, j, h, w, self.size, self.interpolation, antialias=self.antialias) @@ -34,29 +56,35 @@ def forward(self, img, crop_params=None): class RandomHorizontalFlipWithParams(transforms.RandomHorizontalFlip): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.params = None + self.params = {} - def forward(self, img, do_flip=False): - if do_flip or (torch.rand(1) < self.p): - self.params = {'do_flip': True} - return transforms.functional.hflip(img) + def forward(self, img, do_flip=None): + if do_flip is not None: + self.params = {'do_flip': do_flip} + return transforms.functional.hflip(img) if do_flip else img + elif not hasattr(self, 'params'): + do_flip = torch.rand(1) < self.p + self.params = {'do_flip': do_flip} + return transforms.functional.hflip(img) if do_flip else img else: - self.params = {'do_flip': False} - return img + return transforms.functional.hflip(img) if self.params.get('do_flip', False) else img class RandomVerticalFlipWithParams(transforms.RandomVerticalFlip): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.params = None + self.params = {} - def forward(self, img, do_flip=True): - if do_flip or (torch.rand(1) < self.p): - self.params = {'do_flip': True} - return transforms.functional.vflip(img) + def forward(self, img, do_flip=None): + if do_flip is not None: + self.params = {'do_flip': do_flip} + return transforms.functional.vflip(img) if do_flip else img + elif not hasattr(self, 'params'): + do_flip = torch.rand(1) < self.p + self.params = {'do_flip': do_flip} + return transforms.functional.vflip(img) if do_flip else img else: - self.params = {'do_flip': False} - return img + return transforms.functional.vflip(img) if self.params.get('do_flip', False) else img class RandomRotationWithParams(transforms.RandomRotation): @@ -134,6 +162,7 @@ def get_distortion_parameter(distortion_type, level): } return param_dict[distortion_type][level - 1] + def get_distortion_function(distortion_type): """Get distortion function based on type.""" func_dict = { @@ -146,6 +175,7 @@ def get_distortion_function(distortion_type): } return func_dict[distortion_type] + def rgb_to_bgr(tensor_img): """Convert a PyTorch tensor image from RGB to BGR format. @@ -156,6 +186,7 @@ def rgb_to_bgr(tensor_img): tensor_img = tensor_img[[2, 1, 0], ...] return tensor_img + def bgr_to_rgb(tensor_img): """Convert a PyTorch tensor image from BGR to RGB format. @@ -166,6 +197,7 @@ def bgr_to_rgb(tensor_img): tensor_img = tensor_img[[2, 1, 0], ...] return tensor_img + def bgr2ycbcr(img_bgr): """Convert BGR image to YCbCr color space.""" img_bgr = img_bgr.astype(np.float32) @@ -175,6 +207,7 @@ def bgr2ycbcr(img_bgr): img_ycbcr[:, :, 1:] = (img_ycbcr[:, :, 1:] * (240 - 16) + 16) / 255.0 return img_ycbcr + def ycbcr2bgr(img_ycbcr): """Convert YCbCr image to BGR color space.""" img_ycbcr = img_ycbcr.astype(np.float32) @@ -184,6 +217,7 @@ def ycbcr2bgr(img_ycbcr): img_bgr = cv2.cvtColor(img_ycrcb, cv2.COLOR_YCR_CB2BGR) return img_bgr + def color_saturation(img, param): """Apply color saturation distortion.""" ycbcr = bgr2ycbcr(img) @@ -192,11 +226,13 @@ def color_saturation(img, param): img = ycbcr2bgr(ycbcr).astype(np.uint8) return img + def color_contrast(img, param): """Apply color contrast distortion.""" img = img.astype(np.float32) * param return img.astype(np.uint8) + def block_wise(img, param): """Apply block-wise distortion.""" width = 8 @@ -208,6 +244,7 @@ def block_wise(img, param): img[r_h:r_h + width, r_w:r_w + width, :] = block return img + def gaussian_noise_color(img, param): """Apply colored Gaussian noise.""" ycbcr = bgr2ycbcr(img) / 255 @@ -216,10 +253,12 @@ def gaussian_noise_color(img, param): b = ycbcr2bgr(b) return np.clip(b, 0, 255).astype(np.uint8) + def gaussian_blur(img, param): """Apply Gaussian blur.""" return cv2.GaussianBlur(img, (param, param), param * 1.0 / 6) + def jpeg_compression(img, param): """Apply JPEG compression distortion.""" h, w, _ = img.shape @@ -237,8 +276,12 @@ def __init__(self, distortion_type, level_min=0, level_max=3): self.level_min = level_min self.level_max = level_max - def __call__(self, img): - self.level = random.randint(self.level_min, self.level_max) + def __call__(self, img, level=None): + if level is None: + self.level = random.randint(self.level_min, self.level_max) + else: + self.level = level + if self.level > 0: self.distortion_param = get_distortion_parameter(self.distortion_type, self.level) self.distortion_func = get_distortion_function(self.distortion_type) @@ -312,13 +355,10 @@ def __init__(self, transforms): self.transforms = transforms self.params = {} - def __call__(self, input_data): - transform_params = { - RandomResizedCropWithParams: 'RandomResizedCrop', - RandomHorizontalFlipWithParams: 'RandomHorizontalFlip', - RandomVerticalFlipWithParams: 'RandomVerticalFlip', - RandomRotationWithParams: 'RandomRotation' - } + def __call__(self, input_data, clear_params=True): + if clear_params: + self.params = {} + output_data = [] list_input = True if not isinstance(input_data, list): @@ -326,14 +366,18 @@ def __call__(self, input_data): list_input = False for img in input_data: - for t in self.transforms: - if type(t) in transform_params and transform_params[type(t)] in self.params: - params = self.params[transform_params[type(t)]] - img = t(img, **params) + for transform in self.transforms: + try: + name = transform.__name__ + except AttributeError: + name = transform.__class__.__name__ + + if name in self.params: + img = transform(img, **self.params[name]) else: - img = t(img) - if type(t) in transform_params: - self.params[transform_params[type(t)]] = t.params + img = transform(img) + if hasattr(transform, 'params'): + self.params[name] = transform.params output_data.append(img) if list_input: @@ -351,12 +395,13 @@ def get_base_transforms(target_image_size=TARGET_IMAGE_SIZE): ]) -def get_random_augmentations(target_image_size=TARGET_IMAGE_SIZE): +def get_random_augmentations(target_image_size=TARGET_IMAGE_SIZE, mask_point=None): return ComposeWithParams([ ConvertToRGB(), transforms.ToTensor(), RandomRotationWithParams(20, interpolation=transforms.InterpolationMode.BILINEAR), - RandomResizedCropWithParams(TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0)), + RandomResizedCropWithParams( + TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0), include_point=mask_point), RandomHorizontalFlipWithParams(), RandomVerticalFlipWithParams() ]) @@ -378,12 +423,13 @@ def get_tall_base_transforms(target_image_size=TARGET_IMAGE_SIZE): ]) # Medium difficulty transforms with mild distortions -def get_random_augmentations_medium(target_image_size=TARGET_IMAGE_SIZE): +def get_random_augmentations_medium(target_image_size=TARGET_IMAGE_SIZE, mask_point=None): return ComposeWithParams([ ConvertToRGB(), transforms.ToTensor(), RandomRotationWithParams(20, interpolation=transforms.InterpolationMode.BILINEAR), - RandomResizedCropWithParams(target_image_size, scale=(0.2, 1.0), ratio=(1.0, 1.0)), + RandomResizedCropWithParams( + TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0), include_point=mask_point), RandomHorizontalFlipWithParams(), RandomVerticalFlipWithParams(), ApplyDeeperForensicsDistortion('CS', level_min=0, level_max=1), @@ -392,12 +438,13 @@ def get_random_augmentations_medium(target_image_size=TARGET_IMAGE_SIZE): ]) # Hard difficulty transforms with more severe distortions -def get_random_augmentations_hard(target_image_size=TARGET_IMAGE_SIZE): +def get_random_augmentations_hard(target_image_size=TARGET_IMAGE_SIZE, mask_point=None): return ComposeWithParams([ ConvertToRGB(), transforms.ToTensor(), RandomRotationWithParams(20, interpolation=transforms.InterpolationMode.BILINEAR), - RandomResizedCropWithParams(target_image_size, scale=(0.2, 1.0), ratio=(1.0, 1.0)), + RandomResizedCropWithParams( + TARGET_IMAGE_SIZE, scale=(0.2, 1.0), ratio=(1.0, 1.0), include_point=mask_point), RandomHorizontalFlipWithParams(), RandomVerticalFlipWithParams(), ApplyDeeperForensicsDistortion('CS', level_min=0, level_max=2), @@ -408,7 +455,11 @@ def get_random_augmentations_hard(target_image_size=TARGET_IMAGE_SIZE): ]) -def apply_augmentation_by_level(image, target_image_size, level_probs={ +def apply_augmentation_by_level( + image, + target_image_size, + mask_point=None, + level_probs={ 0: 0.25, # No augmentations (base transforms) 1: 0.25, # Basic augmentations 2: 0.25, # Medium distortions @@ -449,16 +500,16 @@ def apply_augmentation_by_level(image, target_image_size, level_probs={ if rand_val <= cum_prob: level = curr_level break - + # Apply appropriate transform if level == 0: tforms = get_base_transforms(target_image_size) elif level == 1: - tforms = get_random_augmentations(target_image_size) + tforms = get_random_augmentations(target_image_size, mask_point) elif level == 2: - tforms = get_random_augmentations_medium(target_image_size) + tforms = get_random_augmentations_medium(target_image_size, mask_point) else: # level == 3 - tforms = get_random_augmentations_hard(target_image_size) + tforms = get_random_augmentations_hard(target_image_size, mask_point) transformed = tforms(image) diff --git a/bitmind/validator/cache/image_cache.py b/bitmind/validator/cache/image_cache.py index 2d583373..7ec81340 100644 --- a/bitmind/validator/cache/image_cache.py +++ b/bitmind/validator/cache/image_cache.py @@ -126,7 +126,8 @@ def sample(self, remove_from_cache=False) -> Optional[Dict[str, Any]]: 'image': image, 'path': str(image_path), 'dataset': metadata.get('dataset', None), - 'index': metadata.get('index', None) + 'index': metadata.get('index', None), + 'mask_center': metadata.get('mask_center', None) } except Exception as e: diff --git a/bitmind/validator/config.py b/bitmind/validator/config.py index ee697d1a..3f225297 100644 --- a/bitmind/validator/config.py +++ b/bitmind/validator/config.py @@ -12,10 +12,16 @@ HunyuanVideoPipeline, AnimateDiffPipeline, EulerDiscreteScheduler, - AutoPipelineForInpainting + AutoPipelineForInpainting, + IFPipeline, + IFSuperResolutionPipeline ) -from .model_utils import load_annimatediff_motion_adapter, load_hunyuanvideo_transformer +from .model_utils import ( + load_annimatediff_motion_adapter, + load_hunyuanvideo_transformer, + JanusWrapper +) TARGET_IMAGE_SIZE: tuple[int, int] = (256, 256) @@ -146,7 +152,72 @@ "use_safetensors": True, "torch_dtype": torch.float16, }, - } + }, + "DeepFloyd/IF": { + "pipeline_cls": { + "stage1": IFPipeline, + "stage2": IFSuperResolutionPipeline + }, + "from_pretrained_args": { + "stage1": { + "base": "DeepFloyd/IF-I-XL-v1.0", + "torch_dtype": torch.float16, + "variant": "fp16", + "clean_caption": False, + "watermarker": None, + "requires_safety_checker": False + }, + "stage2": { + "base": "DeepFloyd/IF-II-L-v1.0", + "torch_dtype": torch.float16, + "variant": "fp16", + "text_encoder": None, + "watermarker": None, + "requires_safety_checker": False + } + }, + "pipeline_stages": [ + { + "name": "stage1", + "args": { + "output_type": "pt", + "num_images_per_prompt": 1, + "return_dict": True + }, + "output_attr": "images", + "output_transform": lambda x: x[0].unsqueeze(0), + "save_prompt_embeds": True + }, + { + "name": "stage2", + "input_key": "image", + "args": { + "output_type": "pil", + "num_images_per_prompt": 1 + }, + "output_attr": "images", + "use_prompt_embeds": True + } + ], + "clear_memory_on_stage_end": True + }, + "deepseek-ai/Janus-Pro-7B": { + "pipeline_cls": JanusWrapper, + "from_pretrained_args": { + "torch_dtype": torch.bfloat16, + "use_safetensors": True, + }, + "generate_args": { + "temperature": 1.0, + "parallel_size": 4, + "cfg_weight": 5.0, + "image_token_num_per_image": 576, + "img_size": 384, + "patch_size": 16 + }, + "use_autocast": False, + "enable_model_cpu_offload": False + }, } T2I_MODEL_NAMES: List[str] = list(T2I_MODELS.keys()) @@ -223,8 +294,7 @@ "guidance_scale": 2, "num_videos_per_prompt": 1, "num_inference_steps": {"min": 50, "max": 125}, - "num_frames": 48, - "resolution": [720, 480] + "num_frames": 48 }, "save_args": {"fps": 8}, "enable_model_cpu_offload": True, @@ -310,4 +380,3 @@ def select_random_model(task: Optional[str] = None) -> str: return np.random.choice(I2I_MODEL_NAMES) else: raise NotImplementedError(f"Unsupported task: {task}") - diff --git a/bitmind/validator/forward.py b/bitmind/validator/forward.py index 82b6aaf0..ba955de9 100644 --- a/bitmind/validator/forward.py +++ b/bitmind/validator/forward.py @@ -19,6 +19,7 @@ import random import time +import re import numpy as np import pandas as pd @@ -47,6 +48,27 @@ def determine_challenge_type(media_cache, fake_prob=0.5): return label, modality, task, cache +def sample_video_frames(video_cache, min_frames, max_frames, min_fps=8, max_fps=30): + if np.random.rand() > 0.2: + num_frames = random.randint(min_frames, max_frames) + challenge = video_cache.sample(num_frames, min_fps=min_fps, max_fps=max_fps) + + else: + num_frames_A = random.randint(min_frames, max_frames - 1) + sample_A = video_cache.sample(num_frames_A, min_fps=min_fps, max_fps=max_fps) + if sample_A is None: + return None + num_frames_B = random.randint(min_frames, max(max_frames - num_frames_A, min_frames + 1)) + sample_B = video_cache.sample(num_frames_B, fps=sample_A['fps']) + challenge = { + 'videos': [sample_A['video'], sample_B['video']], # for wandb logging to handle different shapes + 'video': sample_A['video'] + sample_B['video'], + 'num_frames': sample_A['num_frames'] + sample_B['num_frames'], + 'fps': sample_A['fps'] + } + return challenge + + async def forward(self): """ The forward function is called by the validator every time step. @@ -75,11 +97,8 @@ async def forward(self): bt.logging.info(f"Sampling data from {modality} cache") if modality == 'video': - num_frames = random.randint( - self.config.neuron.clip_frames_min, - self.config.neuron.clip_frames_max) - challenge = cache.sample(num_frames, min_fps=8, max_fps=30) - + challenge = sample_video_frames( + cache, self.config.neuron.clip_frames_min, self.config.neuron.clip_frames_max) elif modality == 'image': challenge = cache.sample() @@ -90,8 +109,15 @@ async def forward(self): # prepare metadata for logging try: if modality == 'video': - video_arr = np.stack([np.array(img) for img in challenge['video']], axis=0) - challenge_metadata['video'] = wandb.Video(video_arr, fps=1) + if 'videos' in challenge: + for i, video in enumerate(challenge['videos']): + video_arr = np.stack([np.array(img) for img in video], axis=0) + video_arr = video_arr.transpose(0, 3, 1, 2) + challenge_metadata[f'video_{i}'] = wandb.Video(video_arr, fps=1) + else: + video_arr = np.stack([np.array(img) for img in challenge['video']], axis=0) + video_arr = video_arr.transpose(0, 3, 1, 2) + challenge_metadata['video'] = wandb.Video(video_arr, fps=1) challenge_metadata['fps'] = challenge['fps'] challenge_metadata['num_frames'] = challenge['num_frames'] elif modality == 'image': @@ -102,15 +128,19 @@ async def forward(self): return # update logging dict with everything except image/video data - challenge_metadata.update({k: v for k, v in challenge.items() if k != modality}) + challenge_metadata.update({ + k: v for k, v in challenge.items() + if re.match(r'^(?!image$|video$|videos$|video_\d+$).+', k) + }) input_data = challenge[modality] # extract video or image # apply data augmentation pipeline try: - input_data, level, data_aug_params = apply_augmentation_by_level(input_data, TARGET_IMAGE_SIZE) + input_data, level, data_aug_params = apply_augmentation_by_level( + input_data, TARGET_IMAGE_SIZE, challenge.get('mask_center', None)) except Exception as e: - level, data_aug_params = -1, {} - bt.logging.error(f"Unable to applay augmentations: {e}") + level, data_aug_params = -1, {} + bt.logging.error(f"Unable to apply augmentations: {e}") challenge_metadata['data_aug_params'] = data_aug_params challenge_metadata['data_aug_level'] = level diff --git a/bitmind/validator/model_utils.py b/bitmind/validator/model_utils.py index b0c414b0..5159d83d 100644 --- a/bitmind/validator/model_utils.py +++ b/bitmind/validator/model_utils.py @@ -1,7 +1,13 @@ import torch -from diffusers import MotionAdapter, HunyuanVideoTransformer3DModel +import numpy as np +from diffusers import MotionAdapter, HunyuanVideoTransformer3DModel, DiffusionPipeline from huggingface_hub import hf_hub_download from safetensors.torch import load_file +from transformers import AutoModelForCausalLM +from janus.models import VLChatProcessor +import PIL.Image +from typing import Dict, Any, Any, Optional +import bittensor as bt def load_hunyuanvideo_transformer( @@ -46,3 +52,254 @@ def load_annimatediff_motion_adapter( ) ) return adapter + + +class JanusWrapper(DiffusionPipeline): + def __init__(self, model, processor): + super().__init__() + self.model = model + self.processor = processor + self.tokenizer = self.processor.tokenizer + self.register_modules( + model=model, + processor=processor, + tokenizer=self.processor.tokenizer + ) + + @torch.inference_mode() + def __call__( + self, + prompt: str, + temperature: float = 1.0, + parallel_size: int = 4, + cfg_weight: float = 5.0, + image_token_num_per_image: int = 576, + img_size: int = 384, + patch_size: int = 16, + **kwargs + ): + conversation = [ + { + "role": "<|User|>", + "content": prompt, + }, + {"role": "<|Assistant|>", "content": ""}, + ] + + sft_format = self.processor.apply_sft_template_for_multi_turn_prompts( + conversations=conversation, + sft_format=self.processor.sft_format, + system_prompt="", + ) + prompt = sft_format + self.processor.image_start_tag + + input_ids = self.processor.tokenizer.encode(prompt) + input_ids = torch.LongTensor(input_ids).to(self.device) + + tokens = torch.zeros((parallel_size*2, len(input_ids)), dtype=torch.int).to(self.device) + for i in range(parallel_size*2): + tokens[i, :] = input_ids + if i % 2 != 0: + tokens[i, 1:-1] = self.processor.pad_id + + inputs_embeds = self.model.language_model.get_input_embeddings()(tokens) + generated_tokens = torch.zeros((parallel_size, image_token_num_per_image), dtype=torch.int).to(self.device) + outputs = None + + for i in range(image_token_num_per_image): + outputs = self.model.language_model.model( + inputs_embeds=inputs_embeds, + use_cache=True, + past_key_values=outputs.past_key_values if i != 0 else None + ) + hidden_states = outputs.last_hidden_state + + logits = self.model.gen_head(hidden_states[:, -1, :]) + logit_cond = logits[0::2, :] + logit_uncond = logits[1::2, :] + + logits = logit_uncond + cfg_weight * (logit_cond-logit_uncond) + probs = torch.softmax(logits / temperature, dim=-1) + + next_token = torch.multinomial(probs, num_samples=1) + generated_tokens[:, i] = next_token.squeeze(dim=-1) + + next_token = torch.cat([next_token.unsqueeze(dim=1), next_token.unsqueeze(dim=1)], dim=1).view(-1) + img_embeds = self.model.prepare_gen_img_embeds(next_token) + inputs_embeds = img_embeds.unsqueeze(dim=1) + + dec = self.model.gen_vision_model.decode_code( + generated_tokens.to(dtype=torch.int), + shape=[parallel_size, 8, img_size//patch_size, img_size//patch_size] + ) + dec = dec.to(torch.float32).cpu().numpy().transpose(0, 2, 3, 1) + dec = np.clip((dec + 1) / 2 * 255, 0, 255) + + images = [] + for i in range(parallel_size): + images.append(PIL.Image.fromarray(dec[i].astype(np.uint8))) + + # Return object with images attribute + class Output: + def __init__(self, images): + self.images = images + + return Output(images) + + @classmethod + def from_pretrained(cls, model_path, **kwargs): + model, processor = load_janus_model(model_path, **kwargs) + return cls(model=model, processor=processor) + + def to(self, device): + self.model = self.model.to(device) + return self + + +def load_janus_model(model_path: str, **kwargs): + processor = VLChatProcessor.from_pretrained(model_path) + + # Filter kwargs to only include what Janus expects + janus_kwargs = { + 'trust_remote_code': True, + 'torch_dtype': kwargs.get('torch_dtype', torch.bfloat16) + } + + # Let device placement be handled by diffusers like other models + model = AutoModelForCausalLM.from_pretrained( + model_path, + **janus_kwargs + ).eval() + + return model, processor + + +def create_pipeline_generator(model_config: Dict[str, Any], model: Any) -> callable: + """ + Creates a generator function based on pipeline configuration. + + Args: + model_config: Model configuration dictionary + model: Loaded model instance(s) + + Returns: + Callable that handles the generation process for the model + """ + if isinstance(model_config.get('pipeline_stages'), list): + def generate(prompt: str, **kwargs): + output = None + prompt_embeds = None + negative_embeds = None + + for stage in model_config['pipeline_stages']: + stage_args = {**kwargs} # Copy base args + + # Add stage-specific args + if stage.get('input_key') and output is not None: + stage_args[stage['input_key']] = output + + # Add any stage-specific generation args + if stage.get('args'): + stage_args.update(stage['args']) + + # Handle prompt embeddings + if stage.get('use_prompt_embeds') and prompt_embeds is not None: + stage_args['prompt_embeds'] = prompt_embeds + stage_args['negative_prompt_embeds'] = negative_embeds + stage_args.pop('prompt', None) + elif stage.get('save_prompt_embeds'): + # Get embeddings directly from encode_prompt + prompt_embeds, negative_embeds = model[stage['name']].encode_prompt( + prompt=prompt, + device=model[stage['name']].device, + num_images_per_prompt=stage_args.get('num_images_per_prompt', 1), + ) + stage_args['prompt_embeds'] = prompt_embeds + stage_args['negative_prompt_embeds'] = negative_embeds + stage_args.pop('prompt', None) + else: + stage_args['prompt'] = prompt + + # Run stage + result = model[stage['name']](**stage_args) + + # Extract output based on stage config + output = getattr(result, stage.get('output_attr', 'images')) + + # Clear memory if configured + if model_config.get('clear_memory_on_stage_end'): + import gc + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() + gc.collect() + + return result + return generate + + # Default single-stage pipeline + return lambda prompt, **kwargs: model(prompt=prompt, **kwargs) + + +def enable_model_optimizations( + model: Any, + device: str, + enable_cpu_offload: bool = False, + enable_sequential_cpu_offload: bool = False, + enable_vae_slicing: bool = False, + enable_vae_tiling: bool = False, + disable_progress_bar: bool = True, + stage_name: Optional[str] = None, +) -> None: + """ + Enables various model optimizations for better memory usage and performance. + + Args: + model: The model to optimize + device: Device to move model to ('cuda', 'cpu', etc) + enable_cpu_offload: Whether to enable model CPU offloading + enable_sequential_cpu_offload: Whether to enable sequential CPU offloading + enable_vae_slicing: Whether to enable VAE slicing + enable_vae_tiling: Whether to enable VAE tiling + disable_progress_bar: Whether to disable the progress bar + stage_name: Optional name of pipeline stage for logging + """ + model_name = f"{stage_name} " if stage_name else "" + + if disable_progress_bar: + bt.logging.info(f"Disabling progress bar for {model_name}model") + model.set_progress_bar_config(disable=True) + + # Handle CPU offloading + if enable_cpu_offload: + bt.logging.info(f"Enabling CPU offload for {model_name}model") + model.enable_model_cpu_offload(device=device) + elif enable_sequential_cpu_offload: + bt.logging.info(f"Enabling sequential CPU offload for {model_name}model") + model.enable_sequential_cpu_offload() + else: + # Only move to device if not using CPU offload + bt.logging.info(f"Moving {model_name}model to {device}") + model.to(device) + + # Handle VAE optimizations if not using CPU offload + if not enable_cpu_offload: + if enable_vae_slicing: + bt.logging.info(f"Enabling VAE slicing for {model_name}model") + try: + model.vae.enable_slicing() + except Exception: + try: + model.enable_vae_slicing() + except Exception as e: + bt.logging.warning(f"Failed to enable VAE slicing for {model_name}model: {e}") + + if enable_vae_tiling: + bt.logging.info(f"Enabling VAE tiling for {model_name}model") + try: + model.vae.enable_tiling() + except Exception: + try: + model.enable_vae_tiling() + except Exception as e: + bt.logging.warning(f"Failed to enable VAE tiling for {model_name}model: {e}") diff --git a/bitmind/validator/scripts/run_cache_updater.py b/bitmind/validator/scripts/run_cache_updater.py index b2f94186..12ddc819 100644 --- a/bitmind/validator/scripts/run_cache_updater.py +++ b/bitmind/validator/scripts/run_cache_updater.py @@ -23,51 +23,62 @@ async def main(args): + caches = [] - image_cache = ImageCache( - cache_dir=args.image_cache_dir, - datasets=IMAGE_DATASETS['real'], - parquet_update_interval=args.image_parquet_interval, - image_update_interval=args.image_interval, - num_parquets_per_dataset=5, - num_images_per_source=100, - max_extracted_size_gb=MAX_EXTRACTED_GB, - max_compressed_size_gb=MAX_COMPRESSED_GB - ) - image_cache.start_updater() + if args.mode in ['all', 'image']: + bt.logging.info("Starting image cache updater") + image_cache = ImageCache( + cache_dir=args.image_cache_dir, + datasets=IMAGE_DATASETS['real'], + parquet_update_interval=args.image_parquet_interval, + image_update_interval=args.image_interval, + num_parquets_per_dataset=5, + num_images_per_source=100, + max_extracted_size_gb=MAX_EXTRACTED_GB, + max_compressed_size_gb=MAX_COMPRESSED_GB + ) + image_cache.start_updater() + caches.append(image_cache) - video_cache = VideoCache( - cache_dir=args.video_cache_dir, - datasets=VIDEO_DATASETS['real'], - video_update_interval=args.video_interval, - zip_update_interval=args.video_zip_interval, - num_zips_per_dataset=2, - num_videos_per_zip=50, - max_extracted_size_gb=MAX_EXTRACTED_GB, - max_compressed_size_gb=MAX_COMPRESSED_GB - ) - video_cache.start_updater() + if args.mode in ['all', 'video']: + bt.logging.info("Starting video cache updater") + video_cache = VideoCache( + cache_dir=args.video_cache_dir, + datasets=VIDEO_DATASETS['real'], + video_update_interval=args.video_interval, + zip_update_interval=args.video_zip_interval, + num_zips_per_dataset=2, + num_videos_per_zip=50, + max_extracted_size_gb=MAX_EXTRACTED_GB, + max_compressed_size_gb=MAX_COMPRESSED_GB + ) + video_cache.start_updater() + caches.append(video_cache) + + if not caches: + raise ValueError(f"Invalid mode: {args.mode}") while True: - bt.logging.info("Caches running...") + bt.logging.info(f"Running cache updaters for: {args.mode}") await asyncio.sleep(600) # Status update every 10 minutes if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser.add_argument('--mode', type=str, default='all', choices=['all', 'video', 'image'], + help='Which cache updater(s) to run') parser.add_argument('--video-cache-dir', type=str, default=REAL_VIDEO_CACHE_DIR, - help='Directory to cache video data') + help='Directory to cache video data') parser.add_argument('--image-cache-dir', type=str, default=REAL_IMAGE_CACHE_DIR, - help='Directory to cache image data') + help='Directory to cache image data') parser.add_argument('--image-interval', type=int, default=IMAGE_CACHE_UPDATE_INTERVAL, - help='Update interval for images in hours') + help='Update interval for images in hours') parser.add_argument('--image-parquet-interval', type=int, default=IMAGE_PARQUET_CACHE_UPDATE_INTERVAL, - help='Update interval for image parquet files in hours') + help='Update interval for image parquet files in hours') parser.add_argument('--video-interval', type=int, default=VIDEO_CACHE_UPDATE_INTERVAL, - help='Update interval for videos in hours') + help='Update interval for videos in hours') parser.add_argument('--video-zip-interval', type=int, default=VIDEO_ZIP_CACHE_UPDATE_INTERVAL, - help='Update interval for video zip files in hours') + help='Update interval for video zip files in hours') args = parser.parse_args() bt.logging.set_info() diff --git a/bitmind/validator/scripts/run_data_generator.py b/bitmind/validator/scripts/run_data_generator.py index 2543384d..51839e86 100644 --- a/bitmind/validator/scripts/run_data_generator.py +++ b/bitmind/validator/scripts/run_data_generator.py @@ -8,12 +8,12 @@ from bitmind.validator.cache import ImageCache from bitmind.validator.config import ( REAL_IMAGE_CACHE_DIR, - SYNTH_CACHE_DIR + SYNTH_CACHE_DIR, + MODEL_NAMES, + get_task ) - if __name__ == '__main__': - parser = argparse.ArgumentParser() parser.add_argument('--image-cache-dir', type=str, default=REAL_IMAGE_CACHE_DIR, help='Directory containing real images to use as reference') @@ -23,8 +23,15 @@ help='Device to run generation on (cuda/cpu)') parser.add_argument('--batch-size', type=int, default=3, help='Number of images to generate per batch') + parser.add_argument('--model', type=str, default=None, choices=MODEL_NAMES, + help='Specific model to test. If not specified, uses random models') args = parser.parse_args() + if args.model: + bt.logging.info(f"Using model {args.model} ({get_task(args.model)})") + else: + bt.logging.info(f"No model selected.") + bt.logging.set_info() init_wandb_run(run_base_name='data-generator', **load_validator_info()) @@ -39,7 +46,8 @@ sdg = SyntheticDataGenerator( prompt_type='annotation', - use_random_model=True, + use_random_model=args.model is None, + model_name=args.model, device=args.device, image_cache=image_cache, output_dir=args.output_dir) diff --git a/requirements.txt b/requirements.txt index 7c4725f5..8a5483d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ sentencepiece==0.2.0 timm==1.0.12 einops==0.8.0 ultralytics==8.3.44 +janus @ git+https://github.com/deepseek-ai/Janus.git # Image/Video processing datasets==3.1.0 diff --git a/setup_env.sh b/setup_env.sh index 0934c966..8b1b2470 100755 --- a/setup_env.sh +++ b/setup_env.sh @@ -33,6 +33,7 @@ sudo npm install -g pm2@latest ############################ pip install -e . +pip install -r requirements.txt ############################ # Environment Files Setup # diff --git a/start_validator.sh b/start_validator.sh index 0b49e774..87604a73 100755 --- a/start_validator.sh +++ b/start_validator.sh @@ -37,12 +37,24 @@ if ! huggingface-cli login --token $HUGGING_FACE_TOKEN; then exit 1 fi -# VALIDATOR PROCESS +# STOP VALIDATOR PROCESS if pm2 list | grep -q "$VALIDATOR_PROCESS_NAME"; then echo "Process '$VALIDATOR_PROCESS_NAME' is already running. Deleting it..." pm2 delete $VALIDATOR_PROCESS_NAME fi +# STOP REAL DATA CACHE UPDATER PROCESS +if pm2 list | grep -q "$CACHE_UPDATE_PROCESS_NAME"; then + echo "Process '$CACHE_UPDATE_PROCESS_NAME' is already running. Deleting it..." + pm2 delete $CACHE_UPDATE_PROCESS_NAME +fi + +# STOP SYNTHETIC DATA GENERATOR PROCESS +if pm2 list | grep -q "$DATA_GEN_PROCESS_NAME"; then + echo "Process '$DATA_GEN_PROCESS_NAME' is already running. Deleting it..." + pm2 delete $DATA_GEN_PROCESS_NAME +fi + echo "Verifying access to synthetic image generation models. This may take a few minutes." if ! python3 bitmind/validator/verify_models.py; then echo "Failed to verify diffusion models. Please check the configurations or model access permissions." @@ -59,21 +71,9 @@ pm2 start neurons/validator.py --name $VALIDATOR_PROCESS_NAME -- \ --axon.port $VALIDATOR_AXON_PORT \ --proxy.port $VALIDATOR_PROXY_PORT -# REAL DATA CACHE UPDATER PROCESS -if pm2 list | grep -q "$CACHE_UPDATE_PROCESS_NAME"; then - echo "Process '$CACHE_UPDATE_PROCESS_NAME' is already running. Deleting it..." - pm2 delete $CACHE_UPDATE_PROCESS_NAME -fi - echo "Starting real data cache updater process" pm2 start bitmind/validator/scripts/run_cache_updater.py --name $CACHE_UPDATE_PROCESS_NAME -# SYNTHETIC DATA GENERATOR PROCESS -if pm2 list | grep -q "$DATA_GEN_PROCESS_NAME"; then - echo "Process '$DATA_GEN_PROCESS_NAME' is already running. Deleting it..." - pm2 delete $DATA_GEN_PROCESS_NAME -fi - echo "Starting synthetic data generation process" pm2 start bitmind/validator/scripts/run_data_generator.py --name $DATA_GEN_PROCESS_NAME -- \ --device $DEVICE