-
Notifications
You must be signed in to change notification settings - Fork 2
/
gejulia
executable file
·119 lines (100 loc) · 4.01 KB
/
gejulia
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env julia
# Hey Emacs! This is a -*- julia -*- file
#
# Parallel launcher for Julia jobs on UCL resources (Legion/Grace)
# !! BETA
# Dr Owain Kenway, October 2016
# Julia supports parallel launching of jobs via SSH. Unfortunately, invoking
# julia --machinefile -p <n> on Legion results in communication errors.
# Weirdly, adding processors from within Julia is fine. This means we need
# to be a bit sneaky, so this wrapper script processes the SGE environment,
# launches the appropriate worker processes, pulls the user script name out of
# the arguments and then launches the user's script with the remaining
# arguments.
# This results in output in stderr of the form:
# WARNING: imported binding for ARGS overwritten in module Main
# This can safely be ignored.
# For security reasons it's a really good idea to have Julia 0.5.0 or higher
# when doing parallel workers. This is true on any computer connected to the
# Internet while running Julia.
# Minimum Julia version that we support
min_version = v"1.0.0"
if VERSION < min_version
# Exit to protect the users.
error("need Julia $(min_version) or greater.")
end
using DelimitedFiles, Distributed
GE_tmpdir = get(ENV, "TMPDIR", "")
# This is a local run if no TMPDIR is set.
GE_job = GE_tmpdir != ""
GE_dir = get(ENV, "GERUN_PATH", "/shared/ucl/apps/GERun")
# Do we want to tunnel Julia comms over SSH?
# This is not strictly necessary due to cookies but is a good idea.
# Unfortunately it's unreliable on Legion.
GE_ssh_tunnel = get(ENV, "GERUN_SSH_TUNNEL", "false") == "true"
# Do we want to restrict ports for Local multinode processes to loopback?
# It's not clear whether this will break things - to investigate!
GE_restrict_ports = get(ENV, "GERUN_RESTRICT_PORTS", "false") == "true"
GE_silent = get(ENV, "GERUN_SILENT", "false") == "true"
# Create a print statement that checks GE_silent.
function GE_print(t...)
@static if !GE_silent
println(t...)
end
end
if isfile(joinpath(GE_dir, "local-message-julia"))
t = open(joinpath(GE_dir, "local-message-julia")) do f
read(f, String)
end
GE_print(t)
else
if isfile(joinpath(GE_dir, "gerun-message-julia"))
t = open(joinpath(GE_dir, "gerun-message-julia")) do f
read(f, String)
end
GE_print(t)
end
end
# We are inside GE.
if GE_job
# Note: if TMPDIR is set but NSLOTS is not, we're probably outside GE. This
# could happen but the code will fallback to local workers only As the
# machinefile test will fail.
GE_nprocs = parse(Int, get(ENV, "NSLOTS", 1))
GE_print("Detected ", GE_nprocs, " slots.")
GE_machinefile=GE_tmpdir * "/machines"
GE_print("Machinefile should be ", GE_machinefile)
# If there's a machinefile, launch the workers listed in it. Note, if
# the user has manually made NSLOTS different from the number of entries
# this will still launch all the entries in the machinefile.
# We only care about $NSLOTS in the SMP environment.
if isfile(GE_machinefile)
GE_print("Machinefile located.")
GE_MACHINES=readdlm(GE_machinefile)
GE_print(GE_MACHINES)
for a in GE_MACHINES
# Check to see if the entry is for the local machine or a remove one.
# Launch local processes by forking, remote by SSH.
if (a == gethostname()) # do local launch without ssh
GE_print("Adding local worker: ", a)
addprocs(1, restrict=GE_restrict_ports)
else
GE_print("Adding remote worker: ", a)
addprocs([a]; tunnel=GE_ssh_tunnel, max_parallel=1, sshflags=``)
end
end
else
# If we are here, we are in an SMP environment.
GE_print("No machine file detected, lauching local processes only.")
addprocs(GE_nprocs, restrict=true)
end
end
# Strip user script from args and run it.
# This will result in a warning in STDERR that you can ignore.
if length(ARGS) > 0
GE_script = joinpath(pwd(), popfirst!(ARGS))
# Run the user script.
include(GE_script)
else
error("No user script specified - exiting")
end