Fix Helios Context Parallelism by SHYuanBest · Pull Request #13223 · huggingface/diffusers

SHYuanBest · 2026-03-08T07:57:07Z

Helios-Distilled

# CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port 12311 --nproc_per_node 2 infer_distilled.py

import os
import argparse

import torch
import torch.distributed as dist
from diffusers import AutoModel, ContextParallelConfig, HeliosPyramidPipeline
from diffusers.utils import export_to_video, load_video, load_image

def parse_args():
    parser = argparse.ArgumentParser(description="Generate video with model")
    parser.add_argument(
        "--cp_backend",
        type=str,
        choices=["ring", "ulysses", "unified", "ulysses_anything"],
        default="ulysses",
        help="Context parallel backend to use.",
    )
    return parser.parse_args()

def main():
    args = parse_args()
    if dist.is_available() and "RANK" in os.environ:
        if args.cp_backend == "ulysses_anything":
            dist.init_process_group(backend="cpu:gloo,cuda:nccl")
        else:
            dist.init_process_group(backend="nccl")
        rank = dist.get_rank()
        device = torch.device("cuda", rank % torch.cuda.device_count())
        world_size = dist.get_world_size()
        torch.cuda.set_device(device)
    else:
        rank = 0
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        world_size = 1

    vae = AutoModel.from_pretrained("BestWishYsh/Helios-Distilled", subfolder="vae", torch_dtype=torch.float32)

    pipeline = HeliosPyramidPipeline.from_pretrained(
        "BestWishYsh/Helios-Distilled",
        vae=vae,
        torch_dtype=torch.bfloat16
    )
    pipeline.to("cuda")

    if args.cp_backend == "ring":
        cp_config = ContextParallelConfig(ring_degree=world_size)
    elif args.cp_backend == "unified":
        cp_config = ContextParallelConfig(ring_degree=world_size // 2, ulysses_degree=world_size // 2)
    elif args.cp_backend == "ulysses":
        cp_config = ContextParallelConfig(ulysses_degree=world_size)
    elif args.cp_backend == "ulysses_anything":
        cp_config = ContextParallelConfig(ulysses_degree=world_size, ulysses_anything=True)
    else:
        raise ValueError(f"Unsupported cp_backend: {args.cp_backend}")

    pipeline.transformer.enable_parallelism(config=cp_config)

    negative_prompt = """
    Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality,
    low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured,
    misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards
    """

    # --- T2V ---
    prompt = """
    A vibrant tropical fish swimming gracefully among colorful coral reefs in a clear, turquoise ocean. The fish has bright blue 
    and yellow scales with a small, distinctive orange spot on its side, its fins moving fluidly. The coral reefs are alive with 
    a variety of marine life, including small schools of colorful fish and sea turtles gliding by. The water is crystal clear, 
    allowing for a view of the sandy ocean floor below. The reef itself is adorned with a mix of hard and soft corals in shades 
    of red, orange, and green. The photo captures the fish from a slightly elevated angle, emphasizing its lively movements and 
    the vivid colors of its surroundings. A close-up shot with dynamic movement.
    """

    output = pipeline(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_frames=240,
        pyramid_num_inference_steps_list=[2, 2, 2],
        guidance_scale=1.0,
        is_amplify_first_chunk=True,
        generator=torch.Generator("cuda").manual_seed(42),
    ).frames[0]
    export_to_video(output, "helios_distilled_t2v_output.mp4", fps=24)

if __name__ == "__main__":
    main()

Helios-Base

# CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port 12311 --nproc_per_node 2 infer_base.py

import os
import argparse

import torch
import torch.distributed as dist
from diffusers import AutoModel, ContextParallelConfig, HeliosPipeline
from diffusers.utils import export_to_video, load_video, load_image

def parse_args():
    parser = argparse.ArgumentParser(description="Generate video with model")
    parser.add_argument(
        "--cp_backend",
        type=str,
        choices=["ring", "ulysses", "unified", "ulysses_anything"],
        default="ulysses",
        help="Context parallel backend to use.",
    )
    return parser.parse_args()

def main():
    args = parse_args()
    if dist.is_available() and "RANK" in os.environ:
        if args.cp_backend == "ulysses_anything":
            dist.init_process_group(backend="cpu:gloo,cuda:nccl")
        else:
            dist.init_process_group(backend="nccl")
        rank = dist.get_rank()
        device = torch.device("cuda", rank % torch.cuda.device_count())
        world_size = dist.get_world_size()
        torch.cuda.set_device(device)
    else:
        rank = 0
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        world_size = 1

    vae = AutoModel.from_pretrained("BestWishYsh/Helios-Base", subfolder="vae", torch_dtype=torch.float32)

    pipeline = HeliosPipeline.from_pretrained(
        "BestWishYsh/Helios-Base",
        vae=vae,
        torch_dtype=torch.bfloat16
    )
    pipeline.to("cuda")

    if args.cp_backend == "ring":
        cp_config = ContextParallelConfig(ring_degree=world_size)
    elif args.cp_backend == "unified":
        cp_config = ContextParallelConfig(ring_degree=world_size // 2, ulysses_degree=world_size // 2)
    elif args.cp_backend == "ulysses":
        cp_config = ContextParallelConfig(ulysses_degree=world_size)
    elif args.cp_backend == "ulysses_anything":
        cp_config = ContextParallelConfig(ulysses_degree=world_size, ulysses_anything=True)
    else:
        raise ValueError(f"Unsupported cp_backend: {args.cp_backend}")

    pipeline.transformer.enable_parallelism(config=cp_config)

    negative_prompt = """
    Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality,
    low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured,
    misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards
    """

    # --- T2V ---
    prompt = """
    A vibrant tropical fish swimming gracefully among colorful coral reefs in a clear, turquoise ocean. The fish has bright blue 
    and yellow scales with a small, distinctive orange spot on its side, its fins moving fluidly. The coral reefs are alive with 
    a variety of marine life, including small schools of colorful fish and sea turtles gliding by. The water is crystal clear, 
    allowing for a view of the sandy ocean floor below. The reef itself is adorned with a mix of hard and soft corals in shades 
    of red, orange, and green. The photo captures the fish from a slightly elevated angle, emphasizing its lively movements and 
    the vivid colors of its surroundings. A close-up shot with dynamic movement.
    """

    output = pipeline(
        prompt=prompt,
        negative_prompt=negative_prompt,
        num_frames=99,
        num_inference_steps=50,
        guidance_scale=5.0,
        generator=torch.Generator("cuda").manual_seed(42),
    ).frames[0]
    export_to_video(output, "helios_base_t2v_output.mp4", fps=24)

if __name__ == "__main__":
    main()

SHYuanBest · 2026-03-08T07:57:46Z

Helios-Base with 2 GPUs

helios_base_t2v_output.mp4

Helios-Distilled with 2 GPUs

helios_distilled_t2v_output.mp4

yiyixuxu

thanks!

fix Helios Context Parallelism

1f60b19

SHYuanBest changed the title ~~fix Helios Context Parallelism~~ Fix Helios Context Parallelism Mar 8, 2026

SHYuanBest added 2 commits March 8, 2026 09:13

refacotr

fe61e62

make style and quality

645930a

This was referenced Mar 8, 2026

[1/N] fix CP for Helios sgl-project/sglang#20118

Open

[1/N] fix CP for Helios vllm-project/vllm-omni#1729

Merged

[modular] helios #13216

Merged

yiyixuxu approved these changes Mar 8, 2026

View reviewed changes

yiyixuxu merged commit 9254417 into huggingface:main Mar 8, 2026
11 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix Helios Context Parallelism#13223

Fix Helios Context Parallelism#13223
yiyixuxu merged 3 commits intohuggingface:mainfrom
SHYuanBest:fix-context-parallelism-for-Helios

SHYuanBest commented Mar 8, 2026

Uh oh!

SHYuanBest commented Mar 8, 2026

Uh oh!

yiyixuxu left a comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

SHYuanBest commented Mar 8, 2026

Helios-Distilled

Helios-Base

Uh oh!

SHYuanBest commented Mar 8, 2026

Helios-Base with 2 GPUs

Helios-Distilled with 2 GPUs

Uh oh!

yiyixuxu left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants