Photo Background - 2d Compositing|写真背景・二次元合成
Trained on 2d illustrations composited on a photo background.
This is a small LoRA I thought would be interesting to see how models trained on illustrations or real world images/video can produce the composite, mixed reality effect.
ℹ️ LoRA work best when applied to the base models on which they are trained. Please read the About This Version on the appropriate base models and workflow/training information.
Metadata is included in all uploaded files, you can drag the generated videos into ComfyUI to use the embedded workflows.
Description
Trained on Anima Base 1
Updated dataset with a mix of natural language and tag captions
Partitioned dataset and trained at multi-res 512, 768, 1024, 1280, 1536
Training config:
# trained using diffusion-pipe commit b0aa4f1e03169f3280c8518d37570a448420f8be
# NCCL_P2P_DISABLE="1" NCCL_IB_DISABLE="1" NCCL_CUMEM_ENABLE="0" deepspeed --num_gpus=1 train.py --deepspeed --config anima-lora.toml --i_know_what_i_am_doing
output_dir = '/mnt/d/anima/training_output/anima-base-1-niji-v4'
dataset = 'dataset-anima-niji.toml'
# training settings
epochs = 2
# Per-resolution batch sizes
micro_batch_size_per_gpu = [[512, 64], [768, 48], [1024, 32], [1280, 24], [1536, 16]]
pipeline_stages = 1
gradient_accumulation_steps = 1
gradient_clipping = 1
warmup_steps = 30
lr_scheduler = 'cosine'
# misc settings
save_every_n_epochs = 1
activation_checkpointing = true
partition_method = 'parameters'
save_dtype = 'bfloat16'
caching_batch_size = 1
map_num_proc = 8
steps_per_print = 1
compile = true
[model]
type = 'anima'
transformer_path = '/mnt/c/workspace/models/diffusion_models/anima-base-v1.0.safetensors'
vae_path = '/mnt/c/workspace/models/vae/qwen_image_vae.safetensors'
llm_path = '/mnt/c/workspace/models/text_encoders/qwen_3_06b_base.safetensors'
dtype = 'bfloat16'
#cache_text_embeddings = false
llm_adapter_lr = 0
#timestep_sample_method = 'uniform'
flux_shift = true
multiscale_loss_weight = 0.5
sigmoid_scale = 1.3
[adapter]
type = 'lora'
rank = 32
dtype = 'bfloat16'
[optimizer]
type = 'adamw_optimi'
lr = 4e-5
betas = [0.9, 0.99]
weight_decay = 0.01
eps = 1e-8resolutions = [512, 768, 1024, 1280, 1536]
enable_ar_bucket = true
min_ar = 0.5
max_ar = 2.0
num_ar_buckets = 9
# micro_batch_size_per_gpu = [[512, 64], [768, 768, 48], [1024, 32], [1280, 24], [1536, 16]]
# images_niji\1536x1536\captions.json with 124 entries.
[[directory]]
path = '/mnt/d/training_data/images_niji/1536x1536'
resolutions = [512, 1024, 1280, 1536]
# images_niji\1280x1280\captions.json with 28 entries.
[[directory]]
path = '/mnt/d/training_data/images_niji/1280x1280'
resolutions = [512, 1024, 1280]
# images_niji\1024x1024\captions.json with 137 entries.
[[directory]]
path = '/mnt/d/training_data/images_niji/1024x1024'
resolutions = [512, 768, 1024]
# images_niji\768x768\captions.json with 43 entries.
[[directory]]
path = '/mnt/d/training_data/images_niji/768x768'
resolutions = [512, 768]
# images_niji\512x512\captions.json with 5 entries.
[[directory]]
path = '/mnt/d/training_data/images_niji/512x512'
resolutions = [512]









