-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Inference benchmarking metrics #69
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"metrics": {"compile_time": 34.46660280227661, "inference_time_1": 5.136486291885376, "inference_time_2": 5.136287450790405, "inference_time_3": 5.136406660079956, "inference_time_4": 5.136098861694336, "gcs_metrics": true, "save_config_to_gcs": false, "log_period": 100, "from_pt": false, "split_head_dim": true, "norm_num_groups": 32, "train_new_unet": false, "dcn_data_parallelism": -1, "dcn_fsdp_parallelism": 1, "dcn_tensor_parallelism": 1, "ici_data_parallelism": -1, "ici_fsdp_parallelism": 1, "ici_tensor_parallelism": 1, "resolution": 1024, "center_crop": false, "random_flip": false, "tokenize_captions_num_proc": 4, "transform_images_num_proc": 4, "reuse_example_batch": false, "enable_data_shuffling": true, "cache_latents_text_encoder_outputs": true, "learning_rate": 4e-07, "scale_lr": false, "max_train_samples": -1, "max_train_steps": 200, "num_train_epochs": 1, "seed": 0, "per_device_batch_size": 2, "warmup_steps_fraction": 0.0, "learning_rate_schedule_steps": 200, "adam_b1": 0.9, "adam_b2": 0.999, "adam_eps": 1e-08, "adam_weight_decay": 0.01, "enable_profiler": true, "skip_first_n_steps_for_profiler": 1, "profiler_steps": 5, "guidance_scale": 9, "guidance_rescale": 0.0, "num_inference_steps": 20, "lightning_from_pt": true, "enable_mllog": false, "use_controlnet": false, "controlnet_from_pt": true, "controlnet_conditioning_scale": 0.5}, "dimensions": {"date": "20240531-215048", "run_name": "my_run", "metrics_file": "", "model_name": "SDXL-1.0", "pretrained_model_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0", "revision": "refs/pr/95", "dtype": "bfloat16", "attention": "dot_product", "flash_block_sizes": "{}", "diffusion_scheduler_config": "{'_class_name': '', 'prediction_type': '', 'rescale_zero_terminal_snr': False, 'timestep_spacing': ''}", "base_output_directory": "", "mesh_axes": "['data', 'fsdp', 'tensor']", "logical_axis_rules": "(('batch', 'data'), ('activation_batch', 'data'), ('activation_length', 'fsdp'), ('out_channels', 'fsdp'), ('conv_out', 'fsdp'), ('length', 'fsdp'))", "data_sharding": "(('data', 'fsdp', 'tensor'),)", "dataset_name": "diffusers/pokemon-gpt4-captions", "dataset_save_location": "/tmp/pokemon-gpt4-captions_xl", "train_data_dir": "", "dataset_config_name": "", "cache_dir": "", "image_column": "image", "caption_column": "text", "output_dir": "sdxl-model-finetuned", "prompt": "A magical castle in the middle of a forest, artistic drawing", "negative_prompt": "purple, red", "lightning_repo": "", "lightning_ckpt": "", "controlnet_model_name_or_path": "diffusers/controlnet-canny-sdxl-1.0", "controlnet_image": "https://upload.wikimedia.org/wikipedia/commons/thumb/c/c1/Google_%22G%22_logo.svg/1024px-Google_%22G%22_logo.svg.png", "tensorboard_dir": "sdxl-model-finetuned/my_run/tensorboard/", "checkpoint_dir": "sdxl-model-finetuned/my_run/checkpoints/", "metrics_dir": "sdxl-model-finetuned/my_run/metrics/"}} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,10 @@ | |
limitations under the License. | ||
""" | ||
|
||
import datetime | ||
import json | ||
import os | ||
import time | ||
from typing import Sequence | ||
from absl import app | ||
|
||
|
@@ -30,6 +33,8 @@ | |
|
||
cc.set_cache_dir(os.path.expanduser("~/jax_cache")) | ||
|
||
NUM_ITER = 5 | ||
|
||
def run(config): | ||
|
||
rng = jax.random.PRNGKey(config.seed) | ||
|
@@ -67,7 +72,11 @@ def run(config): | |
negative_prompt_ids = shard(negative_prompt_ids) | ||
processed_image = shard(processed_image) | ||
|
||
output = pipe( | ||
metrics_dict = {} | ||
for iter in range(NUM_ITER): | ||
if iter == 0: | ||
s = time.time() | ||
output = pipe( | ||
prompt_ids=prompt_ids, | ||
image=processed_image, | ||
params=p_params, | ||
|
@@ -76,7 +85,48 @@ def run(config): | |
neg_prompt_ids=negative_prompt_ids, | ||
controlnet_conditioning_scale=controlnet_conditioning_scale, | ||
jit=True, | ||
).images | ||
).images | ||
|
||
metrics_dict["compile_time"] = time.time() - s | ||
else: | ||
s = time.time() | ||
output = pipe( | ||
prompt_ids=prompt_ids, | ||
image=processed_image, | ||
params=p_params, | ||
prng_seed=rng, | ||
num_inference_steps=config.num_inference_steps, | ||
neg_prompt_ids=negative_prompt_ids, | ||
controlnet_conditioning_scale=controlnet_conditioning_scale, | ||
jit=True, | ||
Comment on lines
+93
to
+101
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: indent |
||
).images | ||
inference_time = time.time() - s | ||
metrics_dict[f"inference_time_{iter}"] = inference_time | ||
Comment on lines
+103
to
+104
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two are the only lines different between if and else block. Can you just use if..else around these lines? |
||
|
||
dimensions_dict = {} | ||
current_dt = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") | ||
dimensions_dict["date"] = current_dt | ||
|
||
|
||
for dim in config.get_keys(): | ||
val = config.get(dim) | ||
if isinstance(val, str): | ||
if dim == "model_name": | ||
dimensions_dict[dim] = "ControlNet" + str(config.get(dim)) | ||
else: | ||
dimensions_dict[dim] = str(config.get(dim)) | ||
elif isinstance(val, int) or isinstance(val, float): # noqa: E721 | ||
metrics_dict[dim] = val | ||
else: | ||
dimensions_dict[dim] = str(val) | ||
|
||
final_dict = {} | ||
final_dict["metrics"] = metrics_dict | ||
final_dict["dimensions"] = dimensions_dict | ||
|
||
with open("metrics.json", 'w') as f: | ||
f.write(json.dumps(final_dict)) | ||
|
||
|
||
output_images = pipe.numpy_to_pil(np.asarray(output.reshape((num_samples,) + output.shape[-3:]))) | ||
output_images[0].save("generated_image.png") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,8 @@ | |
limitations under the License. | ||
""" | ||
|
||
import datetime | ||
import json | ||
import os | ||
import functools | ||
from absl import app | ||
|
@@ -42,8 +44,6 @@ | |
create_device_mesh, | ||
get_dtype, | ||
get_states, | ||
activate_profiler, | ||
deactivate_profiler, | ||
device_put_replicated, | ||
get_flash_block_sizes, | ||
) | ||
|
@@ -55,6 +55,8 @@ | |
|
||
cc.set_cache_dir(os.path.expanduser("~/jax_cache")) | ||
|
||
NUM_ITER = 5 | ||
|
||
def loop_body(step, args, model, pipeline, added_cond_kwargs, prompt_embeds, guidance_scale, guidance_rescale): | ||
latents, scheduler_state, state = args | ||
latents_input = jnp.concatenate([latents] * 2) | ||
|
@@ -254,27 +256,42 @@ def run_inference(unet_state, vae_state, params, rng, config, batch_size, pipeli | |
out_shardings=None | ||
) | ||
|
||
s = time.time() | ||
p_run_inference(unet_state, vae_state, params).block_until_ready() | ||
print("compile time: ", (time.time() - s)) | ||
s = time.time() | ||
images = p_run_inference(unet_state, vae_state, params).block_until_ready() | ||
images.block_until_ready() | ||
print("inference time: ",(time.time() - s)) | ||
s = time.time() | ||
images = p_run_inference(unet_state, vae_state, params).block_until_ready() #run_inference(unet_state, vae_state, latents, scheduler_state) | ||
images.block_until_ready() | ||
print("inference time: ",(time.time() - s)) | ||
s = time.time() | ||
images = p_run_inference(unet_state, vae_state, params).block_until_ready() # run_inference(unet_state, vae_state, latents, scheduler_state) | ||
images.block_until_ready() | ||
print("inference time: ",(time.time() - s)) | ||
s = time.time() | ||
activate_profiler(config) | ||
images = p_run_inference(unet_state, vae_state, params).block_until_ready() | ||
deactivate_profiler(config) | ||
images.block_until_ready() | ||
print("inference time: ",(time.time() - s)) | ||
metrics_dict = {} | ||
for iter in range(NUM_ITER): | ||
if iter == 0: | ||
s = time.time() | ||
p_run_inference(unet_state, vae_state, params).block_until_ready() | ||
metrics_dict["compile_time"] = time.time() - s | ||
else: | ||
s = time.time() | ||
images = p_run_inference(unet_state, vae_state, params).block_until_ready() | ||
images.block_until_ready() | ||
inference_time = time.time() - s | ||
metrics_dict[f"inference_time_{iter}"] = inference_time | ||
|
||
dimensions_dict = {} | ||
current_dt = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") | ||
dimensions_dict["date"] = current_dt | ||
|
||
for dim in config.get_keys(): | ||
val = config.get(dim) | ||
if isinstance(val, str): | ||
dimensions_dict[str(dim)] = str(config.get(dim)) | ||
elif isinstance(val, int) or isinstance(val, float): | ||
metrics_dict[dim] = val | ||
else: | ||
dimensions_dict[str(dim)] = str(val) | ||
|
||
final_dict = {} | ||
final_dict["metrics"] = metrics_dict | ||
final_dict["dimensions"] = dimensions_dict | ||
|
||
print("final_dict is ", final_dict) | ||
|
||
with open("metrics.json", 'w') as f: | ||
f.write(json.dumps(final_dict)) | ||
|
||
Comment on lines
+259
to
+293
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks same as that in |
||
|
||
images = jax.experimental.multihost_utils.process_allgather(images) | ||
numpy_images = np.array(images) | ||
images = VaeImageProcessor.numpy_to_pil(numpy_images) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this an example file? Do we need this commited?