From 98d74f2b9e90221637402d6482f6ff63a72a29c7 Mon Sep 17 00:00:00 2001 From: Patrick Clay Date: Thu, 6 May 2021 10:26:17 -0700 Subject: [PATCH] Set --create_and_boot_post_delay to 2 seconds by default. EC2 seems to have a race condition when adding VMs to a clustered placement group. If you call `aws ec2 run-instances` in parallel adding multiple VMs to a clustered placement group, it incorrectly claims: ``` The perfkit-db9d2a668-731b3c6e8f4b Placement Group has already been used in another Availability Zone. Specify the correct Availability Zone and try again. ``` Sleep two seconds to boot this. If you want to measure how fast a cloud can boot many VMs without this sleep, use the large_scale_boot benchmark, which does not use clustered placement groups and uses a single run_instances command on AWS. If you would like to repro the race condition run: ``` ./pkb.py --benchmarks=cluster_boot --cloud=AWS --num_vms=20 --create_and_boot_post_task_delay=0 ``` PiperOrigin-RevId: 372371889 --- CHANGES.next.md | 2 ++ perfkitbenchmarker/benchmark_spec.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.next.md b/CHANGES.next.md index ab973ce028..4239ce5e67 100644 --- a/CHANGES.next.md +++ b/CHANGES.next.md @@ -120,3 +120,5 @@ - Added an option to install GCP NCCL plugins. - Updated hbase binding (from hbase10 to hbase12) for cloud bigtable ycsb benchmark and hbase ycsb benchmark. +- Set a default `--create_and_boot_post_task_delay` of 2 seconds to fix EC2 + clustered placement groups. diff --git a/perfkitbenchmarker/benchmark_spec.py b/perfkitbenchmarker/benchmark_spec.py index cfa4416ac5..e7eb478285 100644 --- a/perfkitbenchmarker/benchmark_spec.py +++ b/perfkitbenchmarker/benchmark_spec.py @@ -87,7 +87,7 @@ def UnPickleLock(locked, *args): 'Script to run right after vm boot.') flags.DEFINE_string('postrun_script', None, 'Script to run right after run stage.') -flags.DEFINE_integer('create_and_boot_post_task_delay', None, +flags.DEFINE_integer('create_and_boot_post_task_delay', 2, 'Delay in seconds to delay in between boot tasks.') # pyformat: disable flags.DEFINE_enum('benchmark_compatibility_checking', SUPPORTED,