-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfig_model.json
More file actions
178 lines (178 loc) · 7.82 KB
/
config_model.json
File metadata and controls
178 lines (178 loc) · 7.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
{
"model_path": "/mnt/DataSSD/AI/models/RaiFlow/RaiFlow",
"model_type": "raiflow",
"latent_type": "latent",
"embed_type": "token",
"sampler_config": {
"weighting_scheme": "mode",
"logit_mean": -0.2,
"logit_std": 1.5,
"mode_scale": 0.64,
"timestep_bias": 0.5,
"shift": 2.5
},
"loss_weighting": "none",
"sensitive_keys": [],
"override_sensitive_keys": false,
"optimizer": "sdnq.optim.AdamW",
"optimizer_args": {"weight_decay": 0.01, "betas": [0.9, 0.999], "final_norm_mode": "rms_clip", "use_quantized_buffers": true, "quantized_buffers_dtype": "uint8", "offload_buffers": true, "use_torch_compile": false, "use_kahan": false},
"optimizer_args_sensitive": {"weight_decay": 0.01, "betas": [0.9, 0.999], "final_norm_mode": "rms_clip", "use_quantized_buffers": true, "quantized_buffers_dtype": "uint8", "offload_buffers": true, "use_torch_compile": false, "use_kahan": false},
"fused_optimizer": true,
"optimizer_cpu_offload": false,
"optimizer_offload_gradients": false,
"lr_scheduler": "SequentialLR",
"lr_scheduler_args": {
"milestones": [8192], "schedulers": ["LinearLR", "CosineAnnealingLR"],
"args": [{"start_factor": 0.001, "end_factor": 1, "total_iters": 8192}, {"T_max": 20480000, "eta_min": 1e-7}]
},
"epochs": 100,
"batch_size": 64,
"learning_rate": 4e-5,
"learning_rate_sensitive": 4e-5,
"gradient_accumulation_steps": 1,
"gradient_checkpointing": true,
"loss_type": "mse",
"loss_reduction": "mean",
"weights_dtype": "float16",
"mixed_precision": "no",
"dynamo_backend": "no",
"use_grad_scaler": true,
"quantization_config": {
"weights_dtype": "uint16",
"quantized_matmul_dtype": "int8",
"group_size": 32,
"svd_rank": 32,
"use_svd": false,
"use_quantized_matmul": false,
"use_static_quantization": true,
"use_stochastic_rounding": true,
"non_blocking": false,
"add_skip_keys": true,
"modules_to_not_convert": [],
"modules_dtype_dict": {}
},
"max_grad_norm": 0.0,
"skip_grad_norm": 0.0,
"skip_grad_norm_steps": 5120,
"max_grad_clip": 0.0,
"log_grad_stats": false,
"do_nan_embed_check": false,
"dropout_rate": 0.0,
"mask_rate": 0.25,
"mask_high_rate": 8,
"mask_low_rate": -2,
"self_correct_rate": 0.25,
"tunableop": null,
"cudnn_enabled": true,
"allow_tf32": true,
"allow_reduced_precision": true,
"flash_sdp": true,
"mem_efficient_sdp": true,
"dynamic_sdp": false,
"math_sdp": true,
"math_sdp_reduction": true,
"ema_decay": 0.99,
"ema_update_steps": 4,
"use_ema": true,
"use_foreach_ema": false,
"update_ema_on_cpu": false,
"offload_ema_to_cpu": true,
"offload_ema_non_blocking": false,
"offload_ema_pin_memory": false,
"ema_weights_dtype": "float16",
"gc_steps": 64,
"checkpoint_save_steps": 256,
"checkpoints_limit": 20,
"max_load_workers": 1,
"load_queue_lenght": 8,
"dataloader_pin_memory": false,
"reshuffle": true,
"do_file_check": false,
"use_latent_dataset": false,
"use_embed_dataset": false,
"encode_latents_with_cpu": false,
"encode_embeds_with_cpu": false,
"offload_latent_encoder_to_cpu": false,
"offload_latent_encoder_non_blocking": false,
"offload_latent_encoder_non_blocking_cpu": false,
"offload_embed_encoder_to_cpu": false,
"offload_embed_encoder_non_blocking": false,
"offload_embed_encoder_non_blocking_cpu": false,
"offload_diffusion_model_to_cpu": false,
"offload_diffusion_model_non_blocking": false,
"offload_diffusion_model_non_blocking_cpu": false,
"latent_encoder_dtype": "float16",
"embed_encoder_dtype": "bfloat16",
"quantize_embed_encoder": true,
"embed_encoder_quantization_config": {
"weights_dtype": "uint4",
"quantized_matmul_dtype": "int8",
"group_size": 0,
"svd_rank": 32,
"svd_steps": 8,
"dynamic_loss_threshold": 1e-2,
"use_svd": false,
"use_quantized_matmul": false,
"use_dynamic_quantization": true,
"non_blocking": false,
"add_skip_keys": true,
"modules_to_not_convert": [],
"modules_dtype_dict": {}
},
"log_with": "wandb",
"project_name": "raiflow",
"resume_from": "latest",
"dataset_index": "cache/dataset_index_256_64",
"project_dir": "/mnt/DataSSD/AI/models/RaiFlow/train_model",
"dataset_paths": [
{
"path": "/mnt/DataSSD/AI/anime_image_dataset/dataset/danbooru/danbooru-jxl",
"bucket_list": "/mnt/DataSSD/AI/anime_image_dataset/dataset/danbooru/metadata/bucket_list-256px.json",
"text_embeds": [
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/danbooru/danbooru-florence2", "repeats": 3},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/danbooru/danbooru-wd", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/danbooru/danbooru-wd-medium", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/danbooru/danbooru-wd-short", "repeats": 2},
{"path": "empty_embed", "repeats": 1}
],
"repeats": 1
},
{
"path": "/mnt/DataSSD/AI/anime_image_dataset/dataset/anime-pictures/anime-pictures-jxl",
"bucket_list": "/mnt/DataSSD/AI/anime_image_dataset/dataset/anime-pictures/metadata/bucket_list-256px.json",
"text_embeds": [
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/anime-pictures/anime-pictures-florence2", "repeats": 3},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/anime-pictures/anime-pictures-wd", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/anime-pictures/anime-pictures-wd-medium", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/anime-pictures/anime-pictures-wd-short", "repeats": 2},
{"path": "empty_embed", "repeats": 1}
],
"repeats": 1
},
{
"path": "/mnt/DataSSD/AI/anime_image_dataset/dataset/pixiv/pixiv-jxl",
"bucket_list": "/mnt/DataSSD/AI/anime_image_dataset/dataset/pixiv/metadata/bucket_list-256px.json",
"text_embeds": [
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/pixiv/pixiv-florence2", "repeats": 3},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/pixiv/pixiv-wd", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/pixiv/pixiv-wd-medium", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/pixiv/pixiv-wd-short", "repeats": 2},
{"path": "empty_embed", "repeats": 1}
],
"repeats": 1
},
{
"path": "/mnt/DataSSD/AI/anime_image_dataset/dataset/vncg/vncg-jxl",
"bucket_list": "/mnt/DataSSD/AI/anime_image_dataset/dataset/vncg/metadata/bucket_list-256px.json",
"text_embeds": [
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/vncg/vncg-qwen3", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/vncg/vncg-florence2", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/vncg/vncg-wd", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/vncg/vncg-wd-medium", "repeats": 2},
{"path": "/mnt/DataSSD/AI/anime_image_dataset/captions/vncg/vncg-wd-short", "repeats": 1},
{"path": "empty_embed", "repeats": 1}
],
"repeats": 1
}
]
}