[modular]support klein #13002

yiyixuxu · 2026-01-20T01:22:02Z

testing klein

from diffusers import ModularPipeline
import torch
import gc

device = "cuda"
dtype = torch.bfloat16

# 4b-base
repo_id = "black-forest-labs/FLUX.2-klein-base-4B"

pipeline = ModularPipeline.from_pretrained(repo_id)


pipeline.load_components(torch_dtype=dtype)
pipeline.to(device)
prompt = "a photo of a forest with mist swirling around the tree trunks. The word 'FLUX.2' is painted over it in big, red brush strokes with visible texture"
height = 768
width = 1360
num_inference_steps = 50
generator = torch.Generator(device=device).manual_seed(42)

out = pipeline(
    prompt=prompt,
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,
    generator=generator
).images[0]

out.save("output_4b_base.png")

del pipeline
gc.collect()
torch.cuda.empty_cache()

# 9b-base
repo_id = "black-forest-labs/FLUX.2-klein-base-9B"

pipeline = ModularPipeline.from_pretrained(repo_id)
pipeline.load_components(torch_dtype=dtype)
pipeline.to(device)
prompt = "a photo of a forest with mist swirling around the tree trunks. The word 'FLUX.2' is painted over it in big, red brush strokes with visible texture"
height = 768
width = 1360
num_inference_steps = 50
generator = torch.Generator(device=device).manual_seed(42)

out = pipeline(
    prompt=prompt,
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,
    generator=generator
).images[0]

out.save("output_9b_base.png")


# 4b

repo_id = "black-forest-labs/FLUX.2-klein-4B"
pipeline = ModularPipeline.from_pretrained(repo_id)
pipeline.load_components(torch_dtype=dtype)
pipeline.to(device)

num_inference_steps = 4
generator = torch.Generator(device=device).manual_seed(42)

out = pipeline(
    prompt=prompt,
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,
    generator=generator
).images[0]

out.save("output_4b.png")

del pipeline
gc.collect()
torch.cuda.empty_cache()

# 9b
repo_id = "black-forest-labs/FLUX.2-klein-9B"
pipeline = ModularPipeline.from_pretrained(repo_id)
pipeline.load_components(torch_dtype=dtype)
pipeline.to(device)

num_inference_steps = 4
generator = torch.Generator(device=device).manual_seed(42)

out = pipeline(
    prompt=prompt,
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,
    generator=generator
).images[0]

out.save("output_9b.png")

del pipeline
gc.collect()
torch.cuda.empty_cache()

testing klein with modular setting

from diffusers import ModularPipeline
import torch
import gc

device = "cuda"
dtype = torch.bfloat16

prompt = "a photo of a forest with mist swirling around the tree trunks. The word 'FLUX.2' is painted over it in big, red brush strokes with visible texture"
height = 768
width = 1360
# 4b

repo_ids = ["black-forest-labs/FLUX.2-klein-4B", "black-forest-labs/FLUX.2-klein-9B", "black-forest-labs/FLUX.2-klein-base-4B", "black-forest-labs/FLUX.2-klein-base-9B"]
for repo_id in repo_ids:
    print(f" =========================================")
    print(f" {repo_id}")
    blocks = ModularPipeline.from_pretrained(repo_id).blocks

    text_encoder_node = blocks.sub_blocks["text_encoder"].init_pipeline(repo_id)
    vae_encoder_node = blocks.sub_blocks["vae_encoder"].init_pipeline(repo_id)
    decoder_node = blocks.sub_blocks["decode"].init_pipeline(repo_id)
    denoise_node = blocks.sub_blocks["denoise"].init_pipeline(repo_id)


    text_encoder_node.load_components(torch_dtype=dtype)
    text_encoder_node.to(device)
    print(" ")
    print(f" text_encoder block: {text_encoder_node.blocks.doc}")
    text_embeddings = text_encoder_node(prompt=prompt).get_by_kwargs("denoiser_input_fields")

    denoise_node.load_components(torch_dtype=dtype)
    denoise_node.to(device)
    print(" ")
    print(f" denoise block: {denoise_node.blocks.doc}")
    if "base" in repo_id:
        num_inference_steps = 50
    else:
        num_inference_steps = 4

    latents = denoise_node(
        **text_embeddings, 
        height=height, 
        width=width, 
        num_inference_steps=num_inference_steps, 
        generator=torch.Generator(device=device).manual_seed(42)
        ).latents

    decoder_node.load_components(torch_dtype=dtype)
    decoder_node.to(device)
    print(" ")
    print(f" decoder block: {decoder_node.blocks.doc}")
    print(" ")
    image = decoder_node(latents=latents).images[0]
    image.save(f"output_{repo_id.split("/")[-1]}.png")

testing flux2-dev

from diffusers import ModularPipeline, ComponentsManager
from transformers import AutoProcessor
import torch

device = "cuda"
dtype = torch.bfloat16
repo_id = "black-forest-labs/FLUX.2-dev"

components = ComponentsManager()
# there is an issue in transformer `sub_folder` arg is ignored 
cached_tokenizer_path = "/local/path/to/tokenizer/folder"
tokenizer = PixtralProcessor.from_pretrained(cached_tokenizer_path)

pipeline = ModularPipeline.from_pretrained(repo_id, components_manager=components)
pipeline.update_components(tokenizer=tokenizer)
pipeline.load_components(torch_dtype=dtype)

components.enable_auto_cpu_offload(device=device)

prompt = "a photo of a forest with mist swirling around the tree trunks. The word 'FLUX.2' is painted over it in big, red brush strokes with visible texture"
height = 768
width = 1360
num_inference_steps = 50
generator = torch.Generator(device=device).manual_seed(42)

print(f" pipeline: {pipeline.blocks.doc}")

out = pipeline(
    prompt=prompt,
    height=height,
    width=width,
    num_inference_steps=num_inference_steps,
    generator=generator
).images[0]

out.save("output_dev.png")

modular seetting

from diffusers import ModularPipeline, ComponentsManager
from transformers import PixtralProcessor
import torch

device = "cuda"
dtype = torch.bfloat16

components = ComponentsManager()
# dev
repo_id = "black-forest-labs/FLUX.2-dev"

blocks = ModularPipeline.from_pretrained(repo_id).blocks
text_node = blocks.sub_blocks["text_encoder"].init_pipeline(repo_id, components_manager=components)
denoise_node = blocks.sub_blocks["denoise"].init_pipeline(repo_id, components_manager=components)
decoder_node = blocks.sub_blocks["decode"].init_pipeline(repo_id, components_manager=components)


cached_tokenizer_path = "/path/to/local/folder/tokenizer"
tokenizer = PixtralProcessor.from_pretrained(cached_tokenizer_path)
print(f" tokenizer: {tokenizer}")

text_node.update_components(tokenizer=tokenizer)
text_node.load_components(torch_dtype=dtype)
denoise_node.load_components(torch_dtype=dtype)
decoder_node.load_components(torch_dtype=dtype)

components.enable_auto_cpu_offload(device=device)

prompt = "a photo of a forest with mist swirling around the tree trunks. The word 'FLUX.2' is painted over it in big, red brush strokes with visible texture"
height = 768
width = 1360
num_inference_steps = 50
generator = torch.Generator(device=device).manual_seed(42)

print(f" text_node: {text_node.blocks.doc}")
text_embeddings = text_node(prompt=prompt).get_by_kwargs("denoiser_input_fields")

print(f" denoise_node: {denoise_node.blocks.doc}")
latents = denoise_node(**text_embeddings, height=height, width=width, num_inference_steps=num_inference_steps, generator=generator).latents
# print(f" latents: {latents}")

print(f" decoder_node: {decoder_node.blocks.doc}")
image = decoder_node(latents=latents).images[0]
print(f" image: {image}")
image.save("output_dev.png")

yiyixuxu · 2026-01-20T01:23:07Z

src/diffusers/modular_pipelines/flux2/before_denoise.py

    def inputs(self) -> List[InputParam]:
        return [
            InputParam(name="prompt_embeds", required=True),
-            InputParam(name="latent_ids"),


removed because latent_ids are not used in this block I think

yiyixuxu · 2026-01-20T01:28:26Z

src/diffusers/modular_pipelines/flux2/encoders.py

    def inputs(self) -> List[InputParam]:
        return [
            InputParam("prompt"),
-            InputParam("prompt_embeds", type_hint=torch.Tensor, required=False),


in modular, we can just pop out the text_encoder block to use in standalone manner if we want to compute them separately. so I'm trying to remove these arguments everywhere to simplify code a bit (same with image_latents too, it gets really complicated for some inpaiting pipeline)

blocks = .... text_node = blocks.pop("text_encoder").init_pipeline(repo_id) pipe = blocks.init_pipeline(repo_id) prompt_embeds = text_node(prompt = ..) out = pipe(prompt_embeds = ...)

HuggingFaceDocBuilderDev · 2026-01-20T01:30:22Z

The docs for this PR live here. All of your documentation changes will be reflected on that endpoint. The docs are available until 30 days after the last update.

sayakpaul

Not a requirement but should we add a small test for it as well? 👀

sayakpaul · 2026-01-20T02:54:41Z

src/diffusers/modular_pipelines/flux2/before_denoise.py

        return [
            InputParam(name="prompt_embeds", required=True),
-            InputParam(name="latent_ids"),
+            InputParam(name="negative_prompt_embeds", required=False),


No strong opinions but WDYT of creating a separate block for Klein altogether? I think this way it will be a bit easier to debug and also separate concerns?

My suggestions mainly comes from the fact that Flux.2-Dev doesn't use negative_prompt_embeds while Flux.2-Klein does. So, maybe that warrants creating separate blocks.

It's a fair point, but on the other hand, I've personally found that having too many blocks can become overwhelming - each time you need to add something, you still need to go through all of them and understand which ones to use.
I think it makes sense to just add the code in the same blocks here, it is so small and fits in. but this is really a matter of preference, not right or wrong. Maybe we'll know better in the future though after building more pipelines :)

Actually, I changed my mind - I agree it's better to separate them out. Otherwise negative_prompt_embeds will show up as an optional argument in the auto docstring for both Klein and Dev, which is confusing.
Note that in Qwen (https://github.com/huggingface/diffusers/blob/main/src/diffusers/modular_pipelines/qwenimage/inputs.py#L232), I'm experimenting with more composable blocks for situations like this that you can just reuse. But it also makes the blocks more complex, and I'm not sure if I'm over-engineering. So let's keep them simple here and see how it goes.

src/diffusers/modular_pipelines/flux2/denoise.py

src/diffusers/modular_pipelines/flux2/encoders.py

src/diffusers/modular_pipelines/flux2/denoise.py

src/diffusers/modular_pipelines/flux2/encoders.py

sayakpaul · 2026-01-20T03:06:07Z

src/diffusers/modular_pipelines/flux2/modular_pipeline.py

+    def get_default_blocks_name(self, config_dict: Optional[Dict[str, Any]]) -> Optional[str]:
+        if config_dict is not None and "is_distilled" in config_dict and config_dict["is_distilled"]:
+            return "Flux2KleinAutoBlocks"
+        else:
+            return "Flux2KleinBaseAutoBlocks"


Maybe naming them as Flux2KleinDistilledBlocks and Flux2KleinBaseAutoBlocks is slightly better?

src/diffusers/modular_pipelines/flux2/modular_blocks_flux2_klein.py

Co-authored-by: Sayak Paul <[email protected]> Co-authored-by: Álvaro Somoza <[email protected]>

src/diffusers/modular_pipelines/flux2/encoders.py

… modular-klein

yiyixuxu · 2026-01-21T11:20:30Z

src/diffusers/modular_pipelines/flux2/before_denoise.py

        block_state.timesteps = timesteps
        block_state.num_inference_steps = num_inference_steps

-        batch_size = block_state.batch_size * block_state.num_images_per_prompt


separated this to a prepare_guidance block

yiyixuxu · 2026-01-21T11:24:57Z

src/diffusers/modular_pipelines/flux2/decoders.py



-class Flux2DecodeStep(ModularPipelineBlocks):
+class Flux2UnpackLatentsStep(ModularPipelineBlocks):


move the "unpacking latent' out of decode step -> this is just to make it easier for decode step work in standalone manner, i.e. user only need to pass latent (not latent_id)

yiyixuxu · 2026-01-21T11:28:14Z

src/diffusers/modular_pipelines/flux2/modular_blocks_flux2.py

 AUTO_BLOCKS = InsertableDict(
    [
        ("text_encoder", Flux2TextEncoderStep()),
-        ("text_input", Flux2TextInputStep()),


rearrangeed a bit so this works with mellon

support klein

618a8a9

yiyixuxu commented Jan 20, 2026

View reviewed changes

[email protected] added 2 commits January 20, 2026 01:31

style

fb2cb18

copies

9357d8f

yiyixuxu requested a review from DN6 January 20, 2026 01:34

sayakpaul reviewed Jan 20, 2026

View reviewed changes

asomoza reviewed Jan 20, 2026

View reviewed changes

src/diffusers/modular_pipelines/flux2/modular_blocks_flux2_klein.py Outdated Show resolved Hide resolved

src/diffusers/modular_pipelines/flux2/modular_blocks_flux2_klein.py Outdated Show resolved Hide resolved

Apply suggestions from code review

3c7494a

Co-authored-by: Sayak Paul <[email protected]> Co-authored-by: Álvaro Somoza <[email protected]>

yiyixuxu commented Jan 20, 2026

View reviewed changes

src/diffusers/modular_pipelines/flux2/encoders.py Show resolved Hide resolved

yiyixuxu added 8 commits January 20, 2026 09:25

Update src/diffusers/modular_pipelines/flux2/encoders.py

d295367

Merge branch 'main' into modular-klein

e1e1629

a few fix: unpack latents before decoder etc

c10041e

Merge branch 'modular-klein' of github.com:huggingface/diffusers into…

dea47aa

… modular-klein

style

e13377e

remove guidannce to its own block

5c1fc44

style

f49c68c

flux2-dev work in modular setting

1c500c8

yiyixuxu commented Jan 21, 2026

View reviewed changes

yiyixuxu added 3 commits January 21, 2026 12:29

up

a232cd9

up up

eb221d5

add tests

a81893c



		class Flux2DecodeStep(ModularPipelineBlocks):
		class Flux2UnpackLatentsStep(ModularPipelineBlocks):

[modular]support klein #13002

Are you sure you want to change the base?

[modular]support klein #13002

Conversation

yiyixuxu commented Jan 20, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

testing klein

testing klein with modular setting

testing flux2-dev

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

HuggingFaceDocBuilderDev commented Jan 20, 2026

Uh oh!

sayakpaul left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

yiyixuxu Jan 20, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

yiyixuxu commented Jan 20, 2026 •

edited

Loading

yiyixuxu Jan 20, 2026 •

edited

Loading