Skip to content

Commit 1b26554

Browse files
authored
Merge branch 'main' into ideogram4-lora-training
2 parents c2b68ad + 7104cb4 commit 1b26554

18 files changed

Lines changed: 1834 additions & 2 deletions

File tree

docs/source/en/_toctree.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,8 @@
355355
title: Ideogram4Transformer2DModel
356356
- local: api/models/transformer_joyimage
357357
title: JoyImageEditTransformer3DModel
358+
- local: api/models/krea2_transformer2d
359+
title: Krea2Transformer2DModel
358360
- local: api/models/latte_transformer3d
359361
title: LatteTransformer3DModel
360362
- local: api/models/longcat_image_transformer2d
@@ -563,6 +565,8 @@
563565
title: Kandinsky 5.0 Image
564566
- local: api/pipelines/kolors
565567
title: Kolors
568+
- local: api/pipelines/krea2
569+
title: Krea 2
566570
- local: api/pipelines/latent_consistency_models
567571
title: Latent Consistency Models
568572
- local: api/pipelines/latent_diffusion
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<!--Copyright 2026 Krea AI and The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
9+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
specific language governing permissions and limitations under the License.
11+
-->
12+
13+
# Krea2Transformer2DModel
14+
15+
The single-stream MMDiT flow-matching transformer used by [Krea 2](https://github.com/krea-ai/krea-2).
16+
17+
## Krea2Transformer2DModel
18+
19+
[[autodoc]] Krea2Transformer2DModel
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
<!--Copyright 2026 Krea AI and The HuggingFace Team. All rights reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
4+
the License. You may obtain a copy of the License at
5+
6+
http://www.apache.org/licenses/LICENSE-2.0
7+
8+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
9+
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
specific language governing permissions and limitations under the License.
11+
-->
12+
13+
# Krea 2
14+
15+
Krea 2 (K2) is a flow-matching text-to-image model built around a single-stream MMDiT with grouped-query attention. A
16+
Qwen3-VL text encoder provides the conditioning: instead of the last hidden state, hidden states from twelve decoder
17+
layers are tapped per token and fused inside the transformer by a small text-fusion stage. Images are decoded with the
18+
Qwen-Image VAE.
19+
20+
Two checkpoints are released, sharing the same architecture but with different recommended sampler settings:
21+
22+
- **Base (midtrain)** — use the full sampler with classifier-free guidance: `num_inference_steps=28`,
23+
`guidance_scale=4.5`.
24+
- **TDM (distilled)** — distilled for few-step sampling, run with `num_inference_steps=8` and guidance disabled
25+
(`guidance_scale=0.0`).
26+
27+
`guidance_scale` follows the Krea 2 convention: the velocity is computed as `cond + guidance_scale * (cond - uncond)`
28+
and guidance is enabled whenever `guidance_scale > 0` (this equals the usual CFG formulation with scale
29+
`1 + guidance_scale`).
30+
31+
## Text-to-image
32+
33+
```python
34+
import torch
35+
from diffusers import Krea2Pipeline
36+
37+
# Load from a local directory produced by the Krea 2 conversion (no hub repo yet).
38+
pipe = Krea2Pipeline.from_pretrained("path/to/krea2-diffusers", torch_dtype=torch.bfloat16)
39+
pipe.to("cuda")
40+
41+
prompt = "a fox in the snow"
42+
image = pipe(
43+
prompt,
44+
height=1024,
45+
width=1024,
46+
num_inference_steps=28,
47+
guidance_scale=4.5,
48+
generator=torch.Generator("cuda").manual_seed(0),
49+
).images[0]
50+
image.save("krea2.png")
51+
```
52+
53+
## Krea2Pipeline
54+
55+
[[autodoc]] Krea2Pipeline
56+
- all
57+
- __call__
58+
59+
## Krea2PipelineOutput
60+
61+
[[autodoc]] pipelines.krea2.pipeline_output.Krea2PipelineOutput

src/diffusers/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@
277277
"JoyImageEditTransformer3DModel",
278278
"Kandinsky3UNet",
279279
"Kandinsky5Transformer3DModel",
280+
"Krea2Transformer2DModel",
280281
"LatteTransformer3DModel",
281282
"LongCatAudioDiTTransformer",
282283
"LongCatAudioDiTVae",
@@ -646,6 +647,7 @@
646647
"KandinskyV22Pipeline",
647648
"KandinskyV22PriorEmb2EmbPipeline",
648649
"KandinskyV22PriorPipeline",
650+
"Krea2Pipeline",
649651
"LatentConsistencyModelImg2ImgPipeline",
650652
"LatentConsistencyModelPipeline",
651653
"LattePipeline",
@@ -1137,6 +1139,7 @@
11371139
JoyImageEditTransformer3DModel,
11381140
Kandinsky3UNet,
11391141
Kandinsky5Transformer3DModel,
1142+
Krea2Transformer2DModel,
11401143
LatteTransformer3DModel,
11411144
LongCatAudioDiTTransformer,
11421145
LongCatAudioDiTVae,
@@ -1481,6 +1484,7 @@
14811484
KandinskyV22Pipeline,
14821485
KandinskyV22PriorEmb2EmbPipeline,
14831486
KandinskyV22PriorPipeline,
1487+
Krea2Pipeline,
14841488
LatentConsistencyModelImg2ImgPipeline,
14851489
LatentConsistencyModelPipeline,
14861490
LattePipeline,

src/diffusers/loaders/lora_conversion_utils.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,11 +551,19 @@ def assign_remaining_weights(assignments, source):
551551
for target_fmt, source_fmt, transform in assignments:
552552
target_key = target_fmt.format(lora_key=lora_key)
553553
source_key = source_fmt.format(orig_lora_key=orig_lora_key)
554-
value = source.pop(source_key)
555-
if transform:
554+
value = source.pop(source_key, None)
555+
if value is None:
556+
continue
557+
if transform and lora_key == "lora_B":
556558
value = transform(value)
557559
ait_sd[target_key] = value
558560

561+
# Consume any leftover final_layer alpha keys so they don't
562+
# reach the remaining_keys guard and cause a false "Incompatible keys" error.
563+
for key in list(source.keys()):
564+
if "final_layer" in key and key.endswith(".alpha"):
565+
source.pop(key)
566+
559567
if any("guidance_in" in k for k in sds_sd):
560568
_convert_to_ai_toolkit(
561569
sds_sd,

src/diffusers/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
_import_structure["transformers.transformer_ideogram4"] = ["Ideogram4Transformer2DModel"]
123123
_import_structure["transformers.transformer_joyimage"] = ["JoyImageEditTransformer3DModel"]
124124
_import_structure["transformers.transformer_kandinsky"] = ["Kandinsky5Transformer3DModel"]
125+
_import_structure["transformers.transformer_krea2"] = ["Krea2Transformer2DModel"]
125126
_import_structure["transformers.transformer_longcat_audio_dit"] = ["LongCatAudioDiTTransformer"]
126127
_import_structure["transformers.transformer_longcat_image"] = ["LongCatImageTransformer2DModel"]
127128
_import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
@@ -255,6 +256,7 @@
255256
Ideogram4Transformer2DModel,
256257
JoyImageEditTransformer3DModel,
257258
Kandinsky5Transformer3DModel,
259+
Krea2Transformer2DModel,
258260
LatteTransformer3DModel,
259261
LongCatAudioDiTTransformer,
260262
LongCatImageTransformer2DModel,

src/diffusers/models/transformers/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from .transformer_ideogram4 import Ideogram4Transformer2DModel
4444
from .transformer_joyimage import JoyImageEditTransformer3DModel
4545
from .transformer_kandinsky import Kandinsky5Transformer3DModel
46+
from .transformer_krea2 import Krea2Transformer2DModel
4647
from .transformer_longcat_audio_dit import LongCatAudioDiTTransformer
4748
from .transformer_longcat_image import LongCatImageTransformer2DModel
4849
from .transformer_ltx import LTXVideoTransformer3DModel

0 commit comments

Comments
 (0)