From 29798efaae3b315f0140f48651aa20216865e719 Mon Sep 17 00:00:00 2001 From: vbuterin Date: Fri, 14 Jun 2024 10:11:10 +0200 Subject: [PATCH] Add an option to do the final step on a CPU I needed to do this to be able to run the model on a 4070 (with 8 GM RAM). --- stable_audio_tools/inference/generation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stable_audio_tools/inference/generation.py b/stable_audio_tools/inference/generation.py index 843ab4b7..281c7bb2 100644 --- a/stable_audio_tools/inference/generation.py +++ b/stable_audio_tools/inference/generation.py @@ -104,6 +104,7 @@ def generate_diffusion_cond( init_noise_level: float = 1.0, mask_args: dict = None, return_latents = False, + cpu_final_step:bool = False, **sampler_kwargs ) -> torch.Tensor: """ @@ -235,6 +236,8 @@ def generate_diffusion_cond( # v-diffusion: #sampled = sample(model.model, noise, steps, 0, **conditioning_tensors, embedding_scale=cfg_scale) + if cpu_final_step: + model.to('cpu') del noise del conditioning_tensors del conditioning_inputs @@ -244,6 +247,8 @@ def generate_diffusion_cond( if model.pretransform is not None and not return_latents: #cast sampled latents to pretransform dtype sampled = sampled.to(next(model.pretransform.parameters()).dtype) + if cpu_final_step: + sampled = sampled.to('cpu') sampled = model.pretransform.decode(sampled) # Return audio