-
Notifications
You must be signed in to change notification settings - Fork 0
/
pcluster-config-extras.yaml
60 lines (60 loc) · 2.42 KB
/
pcluster-config-extras.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# This file is settings to be merged into the pcluster-config.yaml file after the initial configuration.
HeadNode:
LocalStorage:
RootVolume:
Encrypted: false
Scheduling:
SlurmSettings:
EnableMemoryBasedScheduling: true
ScaledownIdletime: 30 # default is 10 minutes
CustomSlurmSettings:
- SlurmctldDebug: info # default is "verbose"
- Waittime: 30 # default is 0
- MaxJobCount: 1000 # default is 10000
- MaxStepCount: 4000 # default is 40000
- MaxTasksPerNode: 32 # default is 512
- PropagateResourceLimits: "NONE" # default is "ALL"
- AccountingStoreFlags: job_comment
- PrologFlags: "contain" # default is "none"
- CompleteWait: 8 # default is 0, recommended somwhere between 0 and 32
- DefMemPerCPU: 1280
- MaxMemPerCPU: 1945
- MpiDefault: pmix # default is none
- RequeueExit: 200 # 200 is special return code for spot instance termination
SlurmQueues:
- Name: shared
CapacityType: "SPOT" # default is "ONDEMAND"
ComputeSettings:
LocalStorage:
EphemeralVolume:
MountDir: /scratch
CustomSlurmSettings:
MaxNodes: 4
MaxTime: "4:00:00"
ComputeResources:
- MinCount: 1 # always keep one around for quick usage
SchedulableMemory: 15500 # see: https://docs.aws.amazon.com/parallelcluster/latest/ug/slurm-mem-based-scheduling-v3.html#slurm-mem-based-scheduling-realmemory-v3
# TODO: DisableSimultaneousMultithreading: true
- Name: gpu-shared
ComputeResources:
- Name: g5
Instances:
- InstanceType: g5.xlarge # 2xlarge would give 8 vCPUs and 32 GB of memory for +$0.15/hr; no difference in GPU though
MinCount: 0
MaxCount: 25
#SchedulableMemory: 15500 # see: https://docs.aws.amazon.com/parallelcluster/latest/ug/slurm-mem-based-scheduling-v3.html#slurm-mem-based-scheduling-realmemory-v3
Networking:
SubnetIds:
- subnet-09886d02611b86d68
CapacityType: "SPOT" # default is "ONDEMAND"
ComputeSettings:
LocalStorage:
EphemeralVolume:
MountDir: /scratch
CustomSlurmSettings:
MaxNodes: 1
MaxTime: "2:00:00"
CustomActions:
OnNodeStart:
Sequence:
- Script: https://raw.githubusercontent.com/MoravianUniversity/AWSParallelClusterSetup/main/compute-node-setup.sh