Skip to content

Commit

Permalink
fix: job states for job arrays in e-mails (issue #109)
Browse files Browse the repository at this point in the history
  • Loading branch information
neilmunday committed Jan 19, 2024
1 parent ba1860c commit c38287e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 87 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Version 4.10
Date: 2024-01-16

* Add support for plain text e-mails (issue #108).
* Fix job array states in e-mails (issue #109).

Version 4.9
-----------
Expand Down
11 changes: 9 additions & 2 deletions src/slurmmail/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,17 @@ def __process_spool_file(
job.max_rss_str = sacct_dict["MaxRSS"]
continue

if "{0}".format(first_job_id) not in sacct_dict["JobId"]:
job_id = int(sacct_dict["JobIdRaw"])

if not array_summary and job_id != int(first_job_id):
logging.debug("skipping %s, it does not equal %s", job_id, first_job_id)
print("skipping %s, it does not equal %s" % (job_id, first_job_id))
continue

if array_summary and "{0}".format(first_job_id) not in sacct_dict["JobId"]:
logging.debug("skipping %s for job array summary", sacct_dict["JobId"])
continue

job_id = int(sacct_dict["JobIdRaw"])
if "_" in sacct_dict["JobId"]:
job = Job(
options.datetime_format,
Expand Down
92 changes: 7 additions & 85 deletions tests/unit/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,26 +948,7 @@ def test_job_array_ended_no_summary(
sacct_output += "7_0.batch||||1675460419|1675460599|COMPLETED||4832K|1|00:00.010|1||00:03:00|0:0|||test|node01|||8.batch|batch\n" # noqa
sacct_output += "7_1|root|root|all|1675460599|1675460779|COMPLETED|500M||1|1|00:00.010|1|/root|00:03:00|0:0|||test|node01|00:05:00|5|7|test.jcf\n" # noqa
sacct_output += "7_1.batch||||1675460599|1675460779|COMPLETED||4784K|1|00:00.010|1||00:03:00|0:0|||test|node01|||7.batch|batch" # noqa
scontrol_output_1 = (
"JobId=8 ArrayJobId=7 ArrayTaskId=0 JobName=test.jcf UserId=root(0)"
" GroupId=root(0) MCS_label=N/A Priority=4294901756 Nice=0 Account=root"
" QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1"
" Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0 RunTime=00:03:00"
" TimeLimit=00:05:00 TimeMin=N/A SubmitTime=1675460418"
" EligibleTime=1675460419 AccrueTime=1675460419 StartTime=1675460419"
" EndTime=1675460599 Deadline=N/A SuspendTime=None SecsPreSuspend=0"
" LastSchedEval=1675460419 Scheduler=Main Partition=all"
" AllocNode:Sid=4d366bf54ae3:228 ReqNodeList=(null) ExcNodeList=(null)"
" NodeList=node01 BatchHost=node01 NumNodes=1 NumCPUs=1 NumTasks=1"
" CPUs/Task=1 ReqB:S:C:T=0:0:*:* TRES=cpu=1,node=1,billing=1"
" Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=1"
" MinMemoryNode=0 MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00"
" OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)"
" Command=/root/test.jcf WorkDir=/root StdErr=/root/slurm-7_0.out"
" StdIn=/dev/null StdOut=/root/slurm-7_0.out Power= MailUser=root"
" MailType=INVALID_DEPEND,BEGIN,END,FAIL,REQUEUE,STAGE_OUT"
)
scontrol_output_2 = (
scontrol_output = (
"JobId=7 ArrayJobId=7 ArrayTaskId=1 JobName=test.jcf UserId=root(0)"
" GroupId=root(0) MCS_label=N/A Priority=4294901756 Nice=0 Account=root"
" QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1"
Expand All @@ -986,38 +967,18 @@ def test_job_array_ended_no_summary(
" StdIn=/dev/null StdOut=/root/slurm-7_1.out Power= MailUser=root"
" MailType=INVALID_DEPEND,BEGIN,END,FAIL,REQUEUE,STAGE_OUT\n"
)
scontrol_output_2 += (
"JobId=8 ArrayJobId=7 ArrayTaskId=0 JobName=test.jcf UserId=root(0)"
" GroupId=root(0) MCS_label=N/A Priority=4294901756 Nice=0 Account=root"
" QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1"
" Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0 RunTime=00:03:00"
" TimeLimit=00:05:00 TimeMin=N/A SubmitTime=1675460418"
" EligibleTime=1675460419 AccrueTime=1675460419 StartTime=1675460419"
" EndTime=1675460599 Deadline=N/A SuspendTime=None SecsPreSuspend=0"
" LastSchedEval=1675460419 Scheduler=Main Partition=all"
" AllocNode:Sid=4d366bf54ae3:228 ReqNodeList=(null) ExcNodeList=(null)"
" NodeList=node01 BatchHost=node01 NumNodes=1 NumCPUs=1 NumTasks=1"
" CPUs/Task=1 ReqB:S:C:T=0:0:*:* TRES=cpu=1,node=1,billing=1"
" Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=1"
" MinMemoryNode=0 MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00"
" OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)"
" Command=/root/test.jcf WorkDir=/root StdErr=/root/slurm-7_0.out"
" StdIn=/dev/null StdOut=/root/slurm-7_0.out Power= MailUser=root"
" MailType=INVALID_DEPEND,BEGIN,END,FAIL,REQUEUE,STAGE_OUT"
)
mock_slurmmail_cli_run_command.side_effect = [
(0, sacct_output, ""),
(0, scontrol_output_1, ""),
(0, scontrol_output_2, ""),
(0, scontrol_output, ""),
]
slurmmail.cli.__dict__["__process_spool_file"](
pathlib.Path("/tmp/foo"),
smtplib.SMTP(),
mock_slurmmail_cli_process_spool_file_options,
)
assert mock_slurmmail_cli_run_command.call_count == 3
assert mock_slurmmail_cli_run_command.call_count == 2
mock_slurmmail_cli_delete_spool_file.assert_called_once()
assert mock_smtp_sendmail.call_count == 2
mock_smtp_sendmail.assert_called_once()
# Note: call.args was added in Python 3.8 so we can't use it here.
for call in mock_smtp_sendmail.mock_calls:
_, args, _ = call
Expand Down Expand Up @@ -1053,26 +1014,7 @@ def test_job_array_ended_no_summary_max_notifications_exceeded(
sacct_output += "7_0.batch||||1675460419|1675460599|COMPLETED||4832K|1|00:00.010|1||00:03:00|0:0|||test|node01|||8.batch|batch\n" # noqa
sacct_output += "7_1|root|root|all|1675460599|1675460779|COMPLETED|500M||1|1|00:00.010|1|/root|00:03:00|0:0|||test|node01|00:05:00|5|7|test.jcf\n" # noqa
sacct_output += "7_1.batch||||1675460599|1675460779|COMPLETED||4784K|1|00:00.010|1||00:03:00|0:0|||test|node01|||7.batch|batch" # noqa
scontrol_output_1 = (
"JobId=8 ArrayJobId=7 ArrayTaskId=0 JobName=test.jcf UserId=root(0)"
" GroupId=root(0) MCS_label=N/A Priority=4294901756 Nice=0 Account=root"
" QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1"
" Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0 RunTime=00:03:00"
" TimeLimit=00:05:00 TimeMin=N/A SubmitTime=1675460418"
" EligibleTime=1675460419 AccrueTime=1675460419 StartTime=1675460419"
" EndTime=1675460599 Deadline=N/A SuspendTime=None SecsPreSuspend=0"
" LastSchedEval=1675460419 Scheduler=Main Partition=all"
" AllocNode:Sid=4d366bf54ae3:228 ReqNodeList=(null) ExcNodeList=(null)"
" NodeList=node01 BatchHost=node01 NumNodes=1 NumCPUs=1 NumTasks=1"
" CPUs/Task=1 ReqB:S:C:T=0:0:*:* TRES=cpu=1,node=1,billing=1"
" Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=1"
" MinMemoryNode=0 MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00"
" OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)"
" Command=/root/test.jcf WorkDir=/root StdErr=/root/slurm-7_0.out"
" StdIn=/dev/null StdOut=/root/slurm-7_0.out Power= MailUser=root"
" MailType=INVALID_DEPEND,BEGIN,END,FAIL,REQUEUE,STAGE_OUT"
)
scontrol_output_2 = (
scontrol_output = (
"JobId=7 ArrayJobId=7 ArrayTaskId=1 JobName=test.jcf UserId=root(0)"
" GroupId=root(0) MCS_label=N/A Priority=4294901756 Nice=0 Account=root"
" QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1"
Expand All @@ -1091,36 +1033,16 @@ def test_job_array_ended_no_summary_max_notifications_exceeded(
" StdIn=/dev/null StdOut=/root/slurm-7_1.out Power= MailUser=root"
" MailType=INVALID_DEPEND,BEGIN,END,FAIL,REQUEUE,STAGE_OUT\n"
)
scontrol_output_2 += (
"JobId=8 ArrayJobId=7 ArrayTaskId=0 JobName=test.jcf UserId=root(0)"
" GroupId=root(0) MCS_label=N/A Priority=4294901756 Nice=0 Account=root"
" QOS=normal JobState=COMPLETED Reason=None Dependency=(null) Requeue=1"
" Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0 RunTime=00:03:00"
" TimeLimit=00:05:00 TimeMin=N/A SubmitTime=1675460418"
" EligibleTime=1675460419 AccrueTime=1675460419 StartTime=1675460419"
" EndTime=1675460599 Deadline=N/A SuspendTime=None SecsPreSuspend=0"
" LastSchedEval=1675460419 Scheduler=Main Partition=all"
" AllocNode:Sid=4d366bf54ae3:228 ReqNodeList=(null) ExcNodeList=(null)"
" NodeList=node01 BatchHost=node01 NumNodes=1 NumCPUs=1 NumTasks=1"
" CPUs/Task=1 ReqB:S:C:T=0:0:*:* TRES=cpu=1,node=1,billing=1"
" Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=* MinCPUsNode=1"
" MinMemoryNode=0 MinTmpDiskNode=0 Features=(null) DelayBoot=00:00:00"
" OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)"
" Command=/root/test.jcf WorkDir=/root StdErr=/root/slurm-7_0.out"
" StdIn=/dev/null StdOut=/root/slurm-7_0.out Power= MailUser=root"
" MailType=INVALID_DEPEND,BEGIN,END,FAIL,REQUEUE,STAGE_OUT"
)
mock_slurmmail_cli_run_command.side_effect = [
(0, sacct_output, ""),
(0, scontrol_output_1, ""),
(0, scontrol_output_2, ""),
(0, scontrol_output, ""),
]
slurmmail.cli.__dict__["__process_spool_file"](
pathlib.Path("/tmp/foo"),
smtplib.SMTP(),
mock_slurmmail_cli_process_spool_file_options,
)
assert mock_slurmmail_cli_run_command.call_count == 3
assert mock_slurmmail_cli_run_command.call_count == 2
mock_slurmmail_cli_delete_spool_file.assert_called_once()
assert mock_smtp_sendmail.call_count == mock_slurmmail_cli_process_spool_file_options.array_max_notifications
# Note: call.args was added in Python 3.8 so we can't use it here.
Expand Down

0 comments on commit c38287e

Please sign in to comment.