Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions bitmind/synthetic_data_generation/synthetic_data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import bittensor as bt
import numpy as np
import random
import torch
from diffusers.utils import export_to_video
from PIL import Image
Expand Down Expand Up @@ -266,21 +267,22 @@ def _run_generation(
gen_args['mask_image'] = create_random_mask(image.size)
gen_args['image'] = image

# Process generation arguments
# Prepare generation arguments
for k, v in gen_args.items():
if isinstance(v, dict):
gen_args[k] = np.random.randint(
gen_args[k]['min'],
gen_args[k]['max']
)
for dim in ('height', 'width'):
if isinstance(gen_args.get(dim), list):
gen_args[dim] = np.random.choice(gen_args[dim])
if "min" in v and "max" in v:
gen_args[k] = np.random.randint(v['min'], v['max'])
if "options" in v:
gen_args[k] = random.choice(v['options'])

try:
if generate_at_target_size:
gen_args['height'] = TARGET_IMAGE_SIZE[0]
gen_args['width'] = TARGET_IMAGE_SIZE[1]
elif 'resolution' in gen_args:
gen_args['height'] = gen_args['resolution'][0]
gen_args['width'] = gen_args['resolution'][1]
del gen_args['resolution']

truncated_prompt = truncate_prompt_if_too_long(
prompt,
Expand All @@ -289,6 +291,7 @@ def _run_generation(

bt.logging.info(f"Generating media from prompt: {truncated_prompt}")
bt.logging.info(f"Generation args: {gen_args}")

start_time = time.time()
if model_config.get('use_autocast', True):
pretrained_args = model_config.get('from_pretrained_args', {})
Expand Down Expand Up @@ -347,12 +350,21 @@ def load_model(self, model_name: Optional[str] = None, modality: Optional[str] =
self.model_name = model_name

bt.logging.info(f"Loading {self.model_name}")

pipeline_cls = MODELS[model_name]['pipeline_cls']
pipeline_args = MODELS[model_name]['from_pretrained_args']
for k, v in pipeline_args.items():
if isinstance(v, tuple) and callable(v[0]):
pipeline_args[k] = v[0](**v[1])

if 'model_id' in pipeline_args:
model_id = pipeline_args['model_id']
del pipeline_args['model_id']
else:
model_id = model_name

self.model = pipeline_cls.from_pretrained(
pipeline_args.get('base', model_name),
model_id,
cache_dir=HUGGINGFACE_CACHE_DIR,
**pipeline_args,
add_watermarker=False
Expand Down
54 changes: 48 additions & 6 deletions bitmind/validator/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
FluxPipeline,
CogVideoXPipeline,
MochiPipeline,
HunyuanVideoPipeline,
AnimateDiffPipeline,
EulerDiscreteScheduler,
AutoPipelineForInpainting
AutoPipelineForInpainting,
)

from .model_utils import load_annimatediff_motion_adapter
from .model_utils import load_annimatediff_motion_adapter, load_hunyuanvideo_transformer


TARGET_IMAGE_SIZE: tuple[int, int] = (256, 256)
Expand Down Expand Up @@ -171,16 +172,46 @@

# Text-to-video model configurations
T2V_MODELS: Dict[str, Dict[str, Any]] = {
"tencent/HunyuanVideo": {
"pipeline_cls": HunyuanVideoPipeline,
"from_pretrained_args": {
"model_id": "tencent/HunyuanVideo",
"transformer": ( # custom functions supplied as tuple of (fn, args)
load_hunyuanvideo_transformer,
{
"model_id": "tencent/HunyuanVideo",
"subfolder": "transformer",
"torch_dtype": torch.bfloat16,
"revision": 'refs/pr/18'
}
),
"revision": 'refs/pr/18',
"torch_dtype": torch.bfloat16
},
"generate_args": {
"num_frames": {"min": 61, "max": 129},
"resolution": {"options": [
[720, 1280], [1280, 720], [1104, 832], [832,1104], [960,960],
[544, 960], [960, 544], [624, 832], [832, 624], [720, 720]
]},
"num_inference_steps": {"min": 30, "max": 50},
},
"save_args": {"fps": 30},
"use_autocast": False,
"vae_enable_tiling": True
},
"genmo/mochi-1-preview": {
"pipeline_cls": MochiPipeline,
"from_pretrained_args": {
"variant": "bf16",
"torch_dtype": torch.bfloat16
},
"generate_args": {
"num_frames": 84
"num_frames": 84,
"num_inference_steps": {"min": 30, "max": 65},
"resolution": [480, 848]
},
#"enable_model_cpu_offload": True,
"save_args": {"fps": 30},
"vae_enable_tiling": True
},
'THUDM/CogVideoX-5b': {
Expand All @@ -194,7 +225,9 @@
"num_videos_per_prompt": 1,
"num_inference_steps": {"min": 50, "max": 125},
"num_frames": 48,
"resolution": [720, 480]
},
"save_args": {"fps": 8},
"enable_model_cpu_offload": True,
#"enable_sequential_cpu_offload": True,
"vae_enable_slicing": True,
Expand All @@ -203,14 +236,23 @@
'ByteDance/AnimateDiff-Lightning': {
"pipeline_cls": AnimateDiffPipeline,
"from_pretrained_args": {
"base": "emilianJR/epiCRealism",
"model_id": "emilianJR/epiCRealism",
"torch_dtype": torch.bfloat16,
"motion_adapter": load_annimatediff_motion_adapter()
"motion_adapter": (
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

store a tuple of (fn, args) so this can be invoked when loading the model and not when importing this file

load_annimatediff_motion_adapter,
{"step": 4}
)
},
"generate_args": {
"guidance_scale": 2,
"num_inference_steps": {"min": 50, "max": 125},
"resolution": {"options": [
[512, 512], [512, 768], [512, 1024],
[768, 512], [768, 768], [768, 1024],
[1024, 512], [1024, 768], [1024, 1024]
]}
},
"save_args": {"fps": 15},
"scheduler": {
"cls": EulerDiscreteScheduler,
"from_config_args": {
Expand Down
13 changes: 12 additions & 1 deletion bitmind/validator/model_utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
import torch
from diffusers import MotionAdapter
from diffusers import MotionAdapter, HunyuanVideoTransformer3DModel
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file


def load_hunyuanvideo_transformer(
model_id: str = "tencent/HunyuanVideo",
subfolder: str = "transformer",
torch_dtype: torch.dtype = torch.bfloat16,
revision: str = 'refs/pr/18'
):
return HunyuanVideoTransformer3DModel.from_pretrained(
model_id, subfolder=subfolder, torch_dtype=torch_dtype, revision=revision
)


def load_annimatediff_motion_adapter(
step: int = 4
) -> MotionAdapter:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ scikit-learn==1.5.2

# Deep learning tools
transformers==4.47.0
#git+https://github.com/huggingface/diffusers.git@6a51427b6a226591ccc40249721c486855f53e1c#egg=diffusers
diffusers==0.32.2
accelerate==1.2.0
bitsandbytes==0.45.0
sentencepiece==0.2.0
Expand Down
1 change: 0 additions & 1 deletion setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ sudo npm install -g pm2@latest
# Python Package Installation
############################

pip install git+https://github.com/huggingface/diffusers.git@6a51427b6a226591ccc40249721c486855f53e1c
pip install -e .

############################
Expand Down
Loading