Merge pull request #462 from DefTruth/main

[Parallel] Avoid OOM while batch size > 1
THUDM · Nov 6, 2024 · 4aebdb4 · 4aebdb4
2 parents 3710a61 + bb69713
commit 4aebdb4
Showing 1 changed file with 4 additions and 1 deletion.
diff --git a/tools/parallel_inference/parallel_inference_xdit.py b/tools/parallel_inference/parallel_inference_xdit.py
@@ -61,11 +61,14 @@ def main():
     )
     if args.enable_sequential_cpu_offload:
         pipe.enable_model_cpu_offload(gpu_id=local_rank)
-        pipe.vae.enable_tiling()
     else:
         device = torch.device(f"cuda:{local_rank}")
         pipe = pipe.to(device)
 
+    # Always enable tiling and slicing to avoid VAE OOM while batch size > 1
+    pipe.vae.enable_slicing()
+    pipe.vae.enable_tiling()
+
     torch.cuda.reset_peak_memory_stats()
     start_time = time.time()