From ea438a2c4f61b4465890fb9ed2863d5f75a57586 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 10 Oct 2023 15:02:24 -0400 Subject: [PATCH 1/7] Fix deadlock. --- hoomd_validation/alj_2d.py | 9 ++++----- hoomd_validation/hard_disk.py | 9 ++++----- hoomd_validation/hard_sphere.py | 9 ++++----- hoomd_validation/lj_fluid.py | 15 ++++++--------- hoomd_validation/lj_union.py | 18 ++++++++---------- hoomd_validation/simple_polygon.py | 9 ++++----- 6 files changed, 30 insertions(+), 39 deletions(-) diff --git a/hoomd_validation/alj_2d.py b/hoomd_validation/alj_2d.py index f5ea8af6..e876e90f 100644 --- a/hoomd_validation/alj_2d.py +++ b/hoomd_validation/alj_2d.py @@ -91,7 +91,8 @@ def alj_2d_create_initial_state(*jobs): device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=job.fn('create_initial_state.log'), + notice_level=5) num_particles = job.statepoint['num_particles'] density = job.statepoint['density'] @@ -136,8 +137,7 @@ def alj_2d_create_initial_state(*jobs): mode='wb') if communicator.rank == 0: - print(f'completed alj_2d_create_initial_state: ' - f'{job} in {communicator.walltime} s') + print(f'completed alj_2d_create_initial_state: {job}') def make_md_simulation(job, @@ -329,8 +329,7 @@ def alj_2d_nve_md_job(*jobs): complete_filename=f'{sim_mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed alj_2d_{sim_mode}_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed alj_2d_{sim_mode}_{device_name}: {job}') nve_md_sampling_jobs.append(alj_2d_nve_md_job) diff --git a/hoomd_validation/hard_disk.py b/hoomd_validation/hard_disk.py index 8a2067a7..cfbee609 100644 --- a/hoomd_validation/hard_disk.py +++ b/hoomd_validation/hard_disk.py @@ -101,7 +101,8 @@ def hard_disk_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=job.fn('create_initial_state.log'), + notice_level=5) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -129,8 +130,7 @@ def hard_disk_create_initial_state(*jobs): mode='wb') if communicator.rank == 0: - print(f'completed hard_disk_create_initial_state: ' - f'{job} in {communicator.walltime} s') + print(f'completed hard_disk_create_initial_state: {job}') def make_mc_simulation(job, @@ -573,8 +573,7 @@ def sampling_operation(*jobs): job, device, complete_filename=f'{mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed hard_disk_{mode}_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed hard_disk_{mode}_{device_name}: {job}') sampling_jobs.append(sampling_operation) diff --git a/hoomd_validation/hard_sphere.py b/hoomd_validation/hard_sphere.py index 9c66c2d4..b7a8daab 100644 --- a/hoomd_validation/hard_sphere.py +++ b/hoomd_validation/hard_sphere.py @@ -96,7 +96,8 @@ def hard_sphere_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=job.fn('create_initial_state.log'), + notice_level=5) snap = hoomd.Snapshot(device.communicator) if device.communicator.rank == 0: @@ -124,8 +125,7 @@ def hard_sphere_create_initial_state(*jobs): mode='wb') if communicator.rank == 0: - print(f'completed hard_sphere_create_initial_state: ' - f'{job} in {communicator.walltime} s') + print(f'completed hard_sphere_create_initial_state: {job}') def make_mc_simulation(job, @@ -452,8 +452,7 @@ def sampling_operation(*jobs): job, device, complete_filename=f'{mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed hard_sphere_{mode}_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed hard_sphere_{mode}_{device_name}: {job}') sampling_jobs.append(sampling_operation) diff --git a/hoomd_validation/lj_fluid.py b/hoomd_validation/lj_fluid.py index 9ca6a645..2c5e084a 100644 --- a/hoomd_validation/lj_fluid.py +++ b/hoomd_validation/lj_fluid.py @@ -111,7 +111,8 @@ def lj_fluid_create_initial_state(*jobs): sp = job.sp device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=job.fn('create_initial_state.log'), + notice_level=5) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -152,8 +153,7 @@ def lj_fluid_create_initial_state(*jobs): mode='wb') if communicator.rank == 0: - print(f'completed lj_fluid_create_initial_state: ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_fluid_create_initial_state: {job}') ################################# @@ -419,8 +419,7 @@ def md_sampling_operation(*jobs): complete_filename=f'{sim_mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed lj_fluid_{sim_mode}_{device_name}: ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_fluid_{sim_mode}_{device_name}: {job}') md_sampling_jobs.append(md_sampling_operation) @@ -816,8 +815,7 @@ def sampling_operation(*jobs): job, device, complete_filename=f'{mode}_mc_{device_name}_complete') if communicator.rank == 0: - print(f'completed lj_fluid_{mode}_mc_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_fluid_{mode}_mc_{device_name}: {job}') mc_sampling_jobs.append(sampling_operation) @@ -1348,8 +1346,7 @@ def lj_fluid_nve_md_job(*jobs): complete_filename=f'{sim_mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed lj_fluid_{sim_mode}_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_fluid_{sim_mode}_{device_name} {job}') nve_md_sampling_jobs.append(lj_fluid_nve_md_job) diff --git a/hoomd_validation/lj_union.py b/hoomd_validation/lj_union.py index 2bc060d7..40eb28a7 100644 --- a/hoomd_validation/lj_union.py +++ b/hoomd_validation/lj_union.py @@ -99,7 +99,8 @@ def lj_union_create_initial_state(*jobs): sp = job.sp device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=job.fn('create_initial_state.log'), + notice_level=5) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -158,8 +159,7 @@ def lj_union_create_initial_state(*jobs): mode='wb') if communicator.rank == 0: - print(f'completed lj_union_create_initial_state: ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_union_create_initial_state: {job}') ################################# @@ -449,8 +449,7 @@ def md_sampling_operation(*jobs): complete_filename=f'{sim_mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed lj_union_{sim_mode}_{device_name}: ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_union_{sim_mode}_{device_name}: {job}') md_sampling_jobs.append(md_sampling_operation) @@ -874,8 +873,7 @@ def sampling_operation(*jobs): job, device, complete_filename=f'{mode}_mc_{device_name}_complete') if communicator.rank == 0: - print(f'completed lj_union_{mode}_mc_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_union_{mode}_mc_{device_name} {job}') mc_sampling_jobs.append(sampling_operation) @@ -1423,15 +1421,15 @@ def lj_union_nve_md_job(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log')) + message_filename=job.fn(f'{sim_mode}_{device_name}.log'), + notice_level=10) run_nve_md_sim(job, device, run_length=run_length, complete_filename=f'{sim_mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed lj_union_{sim_mode}_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed lj_union_{sim_mode}_{device_name} {job}') nve_md_sampling_jobs.append(lj_union_nve_md_job) diff --git a/hoomd_validation/simple_polygon.py b/hoomd_validation/simple_polygon.py index e768af17..f66e31d7 100644 --- a/hoomd_validation/simple_polygon.py +++ b/hoomd_validation/simple_polygon.py @@ -112,7 +112,8 @@ def simple_polygon_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=job.fn('create_initial_state.log'), + notice_level=5) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -146,8 +147,7 @@ def simple_polygon_create_initial_state(*jobs): ) if communicator.rank == 0: - print(f'completed simple_polygon_create_initial_state: ' - f'{job} in {communicator.walltime} s') + print(f'completed simple_polygon_create_initial_state: {job}') def make_mc_simulation(job, @@ -495,8 +495,7 @@ def sampling_operation(*jobs): job, device, complete_filename=f'{mode}_{device_name}_complete') if communicator.rank == 0: - print(f'completed simple_polygon_{mode}_{device_name} ' - f'{job} in {communicator.walltime} s') + print(f'completed simple_polygon_{mode}_{device_name} {job}') sampling_jobs.append(sampling_operation) From 528b7376d6096c493400cc1d9a0cee517a235621 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 10 Oct 2023 15:07:25 -0400 Subject: [PATCH 2/7] Remove non-default notice_level. --- hoomd_validation/alj_2d.py | 3 +-- hoomd_validation/hard_disk.py | 3 +-- hoomd_validation/hard_sphere.py | 3 +-- hoomd_validation/lj_fluid.py | 3 +-- hoomd_validation/lj_union.py | 6 ++---- hoomd_validation/simple_polygon.py | 3 +-- 6 files changed, 7 insertions(+), 14 deletions(-) diff --git a/hoomd_validation/alj_2d.py b/hoomd_validation/alj_2d.py index e876e90f..4c74dd76 100644 --- a/hoomd_validation/alj_2d.py +++ b/hoomd_validation/alj_2d.py @@ -91,8 +91,7 @@ def alj_2d_create_initial_state(*jobs): device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log'), - notice_level=5) + message_filename=job.fn('create_initial_state.log')) num_particles = job.statepoint['num_particles'] density = job.statepoint['density'] diff --git a/hoomd_validation/hard_disk.py b/hoomd_validation/hard_disk.py index cfbee609..195169ef 100644 --- a/hoomd_validation/hard_disk.py +++ b/hoomd_validation/hard_disk.py @@ -101,8 +101,7 @@ def hard_disk_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log'), - notice_level=5) + message_filename=job.fn('create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: diff --git a/hoomd_validation/hard_sphere.py b/hoomd_validation/hard_sphere.py index b7a8daab..d2c6de26 100644 --- a/hoomd_validation/hard_sphere.py +++ b/hoomd_validation/hard_sphere.py @@ -96,8 +96,7 @@ def hard_sphere_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log'), - notice_level=5) + message_filename=job.fn('create_initial_state.log')) snap = hoomd.Snapshot(device.communicator) if device.communicator.rank == 0: diff --git a/hoomd_validation/lj_fluid.py b/hoomd_validation/lj_fluid.py index 2c5e084a..9dba4319 100644 --- a/hoomd_validation/lj_fluid.py +++ b/hoomd_validation/lj_fluid.py @@ -111,8 +111,7 @@ def lj_fluid_create_initial_state(*jobs): sp = job.sp device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log'), - notice_level=5) + message_filename=job.fn('create_initial_state.log')) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) diff --git a/hoomd_validation/lj_union.py b/hoomd_validation/lj_union.py index 40eb28a7..f675b0b8 100644 --- a/hoomd_validation/lj_union.py +++ b/hoomd_validation/lj_union.py @@ -99,8 +99,7 @@ def lj_union_create_initial_state(*jobs): sp = job.sp device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log'), - notice_level=5) + message_filename=job.fn('create_initial_state.log')) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -1421,8 +1420,7 @@ def lj_union_nve_md_job(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log'), - notice_level=10) + message_filename=job.fn(f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, run_length=run_length, diff --git a/hoomd_validation/simple_polygon.py b/hoomd_validation/simple_polygon.py index f66e31d7..b82da41f 100644 --- a/hoomd_validation/simple_polygon.py +++ b/hoomd_validation/simple_polygon.py @@ -112,8 +112,7 @@ def simple_polygon_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log'), - notice_level=5) + message_filename=job.fn('create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: From 8be1756d73aa48b1912327f184d2621e23d0c765 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Mon, 23 Oct 2023 15:49:05 -0400 Subject: [PATCH 3/7] Add frontier recommendations. --- documentation/frontier.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 documentation/frontier.md diff --git a/documentation/frontier.md b/documentation/frontier.md new file mode 100644 index 00000000..c7a51992 --- /dev/null +++ b/documentation/frontier.md @@ -0,0 +1,38 @@ +# Tips for running on OLCF Frontier + +# Recommended configuration + +``` +max_cores_sim: 56 +max_cores_submission: 7168 +max_gpus_submission: 256 +max_walltime: 2 +enable_llvm: false +enable_gpu: true +``` + +## Recommended template + +``` +{% extends "frontier.sh" %} + +{% block header %} + {{- super () -}} +#SBATCH -C nvme +{% endblock header %} +{% block custom_content %} + +echo "Loading software environment." + +export GLOTZERLAB_SOFTWARE_ROOT=/mnt/bb/${USER}/software +time srun --ntasks-per-node 1 mkdir ${GLOTZERLAB_SOFTWARE_ROOT} +time srun --ntasks-per-node 1 tar --directory ${GLOTZERLAB_SOFTWARE_ROOT} -xpf ${MEMBERWORK}/mat110/software.tar +source ${GLOTZERLAB_SOFTWARE_ROOT}/variables.sh + +{% endblock custom_content %} +{% block body %} + {{- super () -}} + +echo "Completed job in $SECONDS seconds" +{% endblock body %} +``` From 4e27707e2cca69bd67abe1ed5ebd027a6407390f Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Mon, 23 Oct 2023 16:05:15 -0400 Subject: [PATCH 4/7] Disable patchy particle tests when enable_llvm is False. --- hoomd_validation/patchy_particle_pressure.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hoomd_validation/patchy_particle_pressure.py b/hoomd_validation/patchy_particle_pressure.py index 3a960c6c..672fbd99 100644 --- a/hoomd_validation/patchy_particle_pressure.py +++ b/hoomd_validation/patchy_particle_pressure.py @@ -149,6 +149,7 @@ def _single_patch_kern_frenkel_code(delta_rad, sq_well_lambda, sigma, kT, return patch_code +@Project.pre(lambda *jobs: CONFIG['enable_llvm']) @Project.post.isfile('patchy_particle_pressure_initial_state.gsd') @Project.operation( directives=dict(executable=CONFIG["executable"], From 61d503cdacabb462d9cc795f919e162f3ec784b1 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 24 Oct 2023 10:20:05 -0400 Subject: [PATCH 5/7] Keep multiple message file outputs. Disambiguate them with cluster job id prefixes. --- hoomd_validation/alj_2d.py | 4 ++-- hoomd_validation/hard_disk.py | 4 ++-- hoomd_validation/hard_sphere.py | 4 ++-- hoomd_validation/lj_fluid.py | 8 ++++---- hoomd_validation/lj_union.py | 8 ++++---- hoomd_validation/patchy_particle_pressure.py | 4 ++-- hoomd_validation/simple_polygon.py | 4 ++-- hoomd_validation/util.py | 10 ++++++++++ 8 files changed, 28 insertions(+), 18 deletions(-) diff --git a/hoomd_validation/alj_2d.py b/hoomd_validation/alj_2d.py index 4c74dd76..8fb49a3c 100644 --- a/hoomd_validation/alj_2d.py +++ b/hoomd_validation/alj_2d.py @@ -91,7 +91,7 @@ def alj_2d_create_initial_state(*jobs): device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) num_particles = job.statepoint['num_particles'] density = job.statepoint['density'] @@ -322,7 +322,7 @@ def alj_2d_nve_md_job(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, complete_filename=f'{sim_mode}_{device_name}_complete') diff --git a/hoomd_validation/hard_disk.py b/hoomd_validation/hard_disk.py index 195169ef..2e252f31 100644 --- a/hoomd_validation/hard_disk.py +++ b/hoomd_validation/hard_disk.py @@ -101,7 +101,7 @@ def hard_disk_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -566,7 +566,7 @@ def sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/hard_sphere.py b/hoomd_validation/hard_sphere.py index d2c6de26..93e3680d 100644 --- a/hoomd_validation/hard_sphere.py +++ b/hoomd_validation/hard_sphere.py @@ -96,7 +96,7 @@ def hard_sphere_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) snap = hoomd.Snapshot(device.communicator) if device.communicator.rank == 0: @@ -445,7 +445,7 @@ def sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'run_{mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'run_{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/lj_fluid.py b/hoomd_validation/lj_fluid.py index 9dba4319..28ad7b14 100644 --- a/hoomd_validation/lj_fluid.py +++ b/hoomd_validation/lj_fluid.py @@ -111,7 +111,7 @@ def lj_fluid_create_initial_state(*jobs): sp = job.sp device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -409,7 +409,7 @@ def md_sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) run_md_sim(job, device, @@ -808,7 +808,7 @@ def sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{mode}_mc_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{mode}_mc_{device_name}.log')) globals().get(f'run_{mode}_mc_sim')( job, device, complete_filename=f'{mode}_mc_{device_name}_complete') @@ -1338,7 +1338,7 @@ def lj_fluid_nve_md_job(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, run_length=run_length, diff --git a/hoomd_validation/lj_union.py b/hoomd_validation/lj_union.py index f675b0b8..cc21d9eb 100644 --- a/hoomd_validation/lj_union.py +++ b/hoomd_validation/lj_union.py @@ -99,7 +99,7 @@ def lj_union_create_initial_state(*jobs): sp = job.sp device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -439,7 +439,7 @@ def md_sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) run_md_sim(job, device, @@ -866,7 +866,7 @@ def sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{mode}_mc_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{mode}_mc_{device_name}.log')) globals().get(f'run_{mode}_mc_sim')( job, device, complete_filename=f'{mode}_mc_{device_name}_complete') @@ -1420,7 +1420,7 @@ def lj_union_nve_md_job(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{sim_mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, run_length=run_length, diff --git a/hoomd_validation/patchy_particle_pressure.py b/hoomd_validation/patchy_particle_pressure.py index 672fbd99..47647eab 100644 --- a/hoomd_validation/patchy_particle_pressure.py +++ b/hoomd_validation/patchy_particle_pressure.py @@ -192,7 +192,7 @@ def patchy_particle_pressure_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -602,7 +602,7 @@ def sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/simple_polygon.py b/hoomd_validation/simple_polygon.py index b82da41f..3d1eeb6a 100644 --- a/hoomd_validation/simple_polygon.py +++ b/hoomd_validation/simple_polygon.py @@ -112,7 +112,7 @@ def simple_polygon_create_initial_state(*jobs): # create snapshot device = hoomd.device.CPU( communicator=communicator, - message_filename=job.fn('create_initial_state.log')) + message_filename=util.get_message_filename(job, 'create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -488,7 +488,7 @@ def sampling_operation(*jobs): device = device_cls( communicator=communicator, - message_filename=job.fn(f'{mode}_{device_name}.log')) + message_filename=util.get_message_filename(job, f'{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/util.py b/hoomd_validation/util.py index 8c6676b8..e39f60e4 100644 --- a/hoomd_validation/util.py +++ b/hoomd_validation/util.py @@ -5,6 +5,7 @@ import numpy import signac +import os def true_all(*jobs, key): @@ -28,6 +29,15 @@ def get_job_filename(sim_mode, device, name, type): return f"{sim_mode}_{suffix}_{name}.{type}" +def get_message_filename(job, filename): + """Get a cluster job unique message filename. + """ + cluster_id = os.environ.get('SLURM_JOB_ID', None) + if cluster_id is not None: + return job.fn(f'{cluster_id}-{filename}') + else: + return job.fn(filename) + def run_up_to_walltime(sim, end_step, steps, walltime_stop): """Run a simulation, stopping early if a walltime limit is reached. From 0682c707604cc1a68dcfdc4f2e77ea04afcd064d Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 24 Oct 2023 11:22:15 -0400 Subject: [PATCH 6/7] Fix failing compare_modes jobs when enable_llvm is False. --- hoomd_validation/lj_fluid.py | 21 ++++++++++++--------- hoomd_validation/lj_union.py | 7 ++++--- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/hoomd_validation/lj_fluid.py b/hoomd_validation/lj_fluid.py index 28ad7b14..026081d4 100644 --- a/hoomd_validation/lj_fluid.py +++ b/hoomd_validation/lj_fluid.py @@ -1049,19 +1049,22 @@ def lj_fluid_compare_modes(*jobs): separate_nvt_npt=True) if quantity_name == "density": - print(f"Average npt_mc_cpu density {num_particles}:", - avg_quantity['npt_mc_cpu'], '+/-', - stderr_quantity['npt_mc_cpu']) + if 'npt_mc_cpu' in avg_quantity: + print(f"Average npt_mc_cpu density {num_particles}:", + avg_quantity['npt_mc_cpu'], '+/-', + stderr_quantity['npt_mc_cpu']) print(f"Average npt_md_cpu density {num_particles}:", avg_quantity['npt_bussi_md_cpu'], '+/-', stderr_quantity['npt_bussi_md_cpu']) if quantity_name == "pressure": - print(f"Average nvt_mc_cpu pressure {num_particles}:", - avg_quantity['nvt_mc_cpu'], '+/-', - stderr_quantity['nvt_mc_cpu']) - print(f"Average npt_mc_cpu pressure {num_particles}:", - avg_quantity['npt_mc_cpu'], '+/-', - stderr_quantity['npt_mc_cpu']) + if 'nvt_mc_cpu' in avg_quantity: + print(f"Average nvt_mc_cpu pressure {num_particles}:", + avg_quantity['nvt_mc_cpu'], '+/-', + stderr_quantity['nvt_mc_cpu']) + if 'npt_mc_cpu' in avg_quantity: + print(f"Average npt_mc_cpu pressure {num_particles}:", + avg_quantity['npt_mc_cpu'], '+/-', + stderr_quantity['npt_mc_cpu']) filename = f'lj_fluid_compare_kT{kT}_density{round(set_density, 2)}_' \ f'r_cut{round(jobs[0].statepoint.r_cut, 2)}_' \ diff --git a/hoomd_validation/lj_union.py b/hoomd_validation/lj_union.py index cc21d9eb..bcfd713b 100644 --- a/hoomd_validation/lj_union.py +++ b/hoomd_validation/lj_union.py @@ -1104,9 +1104,10 @@ def lj_union_compare_modes(*jobs): separate_nvt_npt=True) if quantity_name == "density": - print(f"Average npt_mc_cpu density {num_particles}:", - avg_quantity['npt_mc_cpu'], '+/-', - stderr_quantity['npt_mc_cpu']) + if 'npt_mc_cpu' in avg_quantity: + print(f"Average npt_mc_cpu density {num_particles}:", + avg_quantity['npt_mc_cpu'], '+/-', + stderr_quantity['npt_mc_cpu']) print(f"Average npt_md_cpu density {num_particles}:", avg_quantity['npt_bussi_md_cpu'], '+/-', stderr_quantity['npt_bussi_md_cpu']) From 4e13f89e1b322120c2e48b1073fb380d030f35fc Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 24 Oct 2023 11:23:25 -0400 Subject: [PATCH 7/7] Run pre-commit. --- hoomd_validation/alj_2d.py | 12 +++++----- hoomd_validation/hard_disk.py | 12 +++++----- hoomd_validation/hard_sphere.py | 12 +++++----- hoomd_validation/lj_fluid.py | 24 ++++++++++---------- hoomd_validation/lj_union.py | 24 ++++++++++---------- hoomd_validation/patchy_particle_pressure.py | 12 +++++----- hoomd_validation/simple_polygon.py | 12 +++++----- hoomd_validation/util.py | 4 ++-- 8 files changed, 56 insertions(+), 56 deletions(-) diff --git a/hoomd_validation/alj_2d.py b/hoomd_validation/alj_2d.py index 8fb49a3c..97a2be53 100644 --- a/hoomd_validation/alj_2d.py +++ b/hoomd_validation/alj_2d.py @@ -89,9 +89,9 @@ def alj_2d_create_initial_state(*jobs): init_diameter = CIRCUMCIRCLE_RADIUS * 2 * 1.15 - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) num_particles = job.statepoint['num_particles'] density = job.statepoint['density'] @@ -320,9 +320,9 @@ def alj_2d_nve_md_job(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, complete_filename=f'{sim_mode}_{device_name}_complete') diff --git a/hoomd_validation/hard_disk.py b/hoomd_validation/hard_disk.py index 2e252f31..1a895b26 100644 --- a/hoomd_validation/hard_disk.py +++ b/hoomd_validation/hard_disk.py @@ -99,9 +99,9 @@ def hard_disk_create_initial_state(*jobs): position_2d = list(itertools.product(x, repeat=2))[:num_particles] # create snapshot - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -564,9 +564,9 @@ def sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/hard_sphere.py b/hoomd_validation/hard_sphere.py index 93e3680d..01327f49 100644 --- a/hoomd_validation/hard_sphere.py +++ b/hoomd_validation/hard_sphere.py @@ -94,9 +94,9 @@ def hard_sphere_create_initial_state(*jobs): position = list(itertools.product(x, repeat=3))[:num_particles] # create snapshot - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) snap = hoomd.Snapshot(device.communicator) if device.communicator.rank == 0: @@ -443,9 +443,9 @@ def sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'run_{mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'run_{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/lj_fluid.py b/hoomd_validation/lj_fluid.py index 026081d4..e51ba2e7 100644 --- a/hoomd_validation/lj_fluid.py +++ b/hoomd_validation/lj_fluid.py @@ -109,9 +109,9 @@ def lj_fluid_create_initial_state(*jobs): print('starting lj_fluid_create_initial_state:', job) sp = job.sp - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -407,9 +407,9 @@ def md_sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{sim_mode}_{device_name}.log')) run_md_sim(job, device, @@ -806,9 +806,9 @@ def sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{mode}_mc_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{mode}_mc_{device_name}.log')) globals().get(f'run_{mode}_mc_sim')( job, device, complete_filename=f'{mode}_mc_{device_name}_complete') @@ -1339,9 +1339,9 @@ def lj_fluid_nve_md_job(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, run_length=run_length, diff --git a/hoomd_validation/lj_union.py b/hoomd_validation/lj_union.py index bcfd713b..64b0d218 100644 --- a/hoomd_validation/lj_union.py +++ b/hoomd_validation/lj_union.py @@ -97,9 +97,9 @@ def lj_union_create_initial_state(*jobs): print('starting lj_union_create_initial_state:', job) sp = job.sp - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) box_volume = sp["num_particles"] / sp["density"] L = box_volume**(1 / 3.) @@ -437,9 +437,9 @@ def md_sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{sim_mode}_{device_name}.log')) run_md_sim(job, device, @@ -864,9 +864,9 @@ def sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{mode}_mc_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{mode}_mc_{device_name}.log')) globals().get(f'run_{mode}_mc_sim')( job, device, complete_filename=f'{mode}_mc_{device_name}_complete') @@ -1419,9 +1419,9 @@ def lj_union_nve_md_job(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{sim_mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{sim_mode}_{device_name}.log')) run_nve_md_sim(job, device, run_length=run_length, diff --git a/hoomd_validation/patchy_particle_pressure.py b/hoomd_validation/patchy_particle_pressure.py index 47647eab..d8300057 100644 --- a/hoomd_validation/patchy_particle_pressure.py +++ b/hoomd_validation/patchy_particle_pressure.py @@ -190,9 +190,9 @@ def patchy_particle_pressure_create_initial_state(*jobs): position = list(itertools.product(x, repeat=3))[:num_particles] # create snapshot - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -600,9 +600,9 @@ def sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/simple_polygon.py b/hoomd_validation/simple_polygon.py index 3d1eeb6a..af1a61e8 100644 --- a/hoomd_validation/simple_polygon.py +++ b/hoomd_validation/simple_polygon.py @@ -110,9 +110,9 @@ def simple_polygon_create_initial_state(*jobs): position_2d = list(itertools.product(x, repeat=2))[:num_particles] # create snapshot - device = hoomd.device.CPU( - communicator=communicator, - message_filename=util.get_message_filename(job, 'create_initial_state.log')) + device = hoomd.device.CPU(communicator=communicator, + message_filename=util.get_message_filename( + job, 'create_initial_state.log')) snap = hoomd.Snapshot(communicator) if communicator.rank == 0: @@ -486,9 +486,9 @@ def sampling_operation(*jobs): elif device_name == 'cpu': device_cls = hoomd.device.CPU - device = device_cls( - communicator=communicator, - message_filename=util.get_message_filename(job, f'{mode}_{device_name}.log')) + device = device_cls(communicator=communicator, + message_filename=util.get_message_filename( + job, f'{mode}_{device_name}.log')) globals().get(f'run_{mode}_sim')( job, device, complete_filename=f'{mode}_{device_name}_complete') diff --git a/hoomd_validation/util.py b/hoomd_validation/util.py index e39f60e4..f442eb7f 100644 --- a/hoomd_validation/util.py +++ b/hoomd_validation/util.py @@ -30,14 +30,14 @@ def get_job_filename(sim_mode, device, name, type): def get_message_filename(job, filename): - """Get a cluster job unique message filename. - """ + """Get a cluster job unique message filename.""" cluster_id = os.environ.get('SLURM_JOB_ID', None) if cluster_id is not None: return job.fn(f'{cluster_id}-{filename}') else: return job.fn(filename) + def run_up_to_walltime(sim, end_step, steps, walltime_stop): """Run a simulation, stopping early if a walltime limit is reached.