diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 4dbd232..23e8e65 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -26,14 +26,12 @@ jobs: fail-fast: false matrix: version: - - '1.8' + - '1.9' - 'nightly' os: - ubuntu-latest arch: - x64 - # - aarch64 - # - ppc64le steps: - uses: actions/checkout@v3 - uses: julia-actions/setup-julia@v1 diff --git a/Project.toml b/Project.toml index 4c554d7..a8ad7e4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,16 +1,11 @@ name = "Extrae" uuid = "8a0c07fa-ade5-4b2a-b81a-b192b2bedf88" -authors = ["Sergio Sánchez Ramírez and contributors"] +authors = ["Sergio Sánchez Ramírez "] version = "0.1.0" [deps] Extrae_jll = "2b2c4be0-e38c-5918-b8b4-9a308845a1e9" - -[weakdeps] -Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[extensions] -ExtraeDistributedExt = "Distributed" +Tracepoints = "7b3c9f6b-6d9c-4ea7-b055-3d1c3a6586ed" [compat] Extrae_jll = "4.0.3" diff --git a/ext/ExtraeDistributedExt.jl b/ext/ExtraeDistributedExt.jl deleted file mode 100644 index 67d1ff8..0000000 --- a/ext/ExtraeDistributedExt.jl +++ /dev/null @@ -1,134 +0,0 @@ -using Distributed - -""" - ExtraeLocalManager.jl - -Implements a copy of the default `LocalClusterManager` that starts -workers with an overdubbed event loop. -""" - -struct ExtraeLocalManager <: ClusterManager - np::Int - restrict::Bool # Restrict binding to 127.0.0.1 only -end - -Base.show(io::IO, manager::ExtraeLocalManager) = print(io, "ExtraeLocalManager(#procs=$(manager.np), restrict=$(manager.restrict))") - -function Distributed.launch(manager::ExtraeLocalManager, params::Dict, launched::Array, c::Condition) - dir = params[:dir] - exename = params[:exename] - exeflags = params[:exeflags] - bind_to = manager.restrict ? `127.0.0.1` : `$(LPROC.bind_addr)` - cookie = cluster_cookie() - - active_project = Base.active_project() - hookline = """ - using Distributed - using Extrae - using Cassette - - Cassette.overdub(Extrae.ExtraeCtx(), start_worker, $(repr(cookie))) - """ - - # Bug: Instead of using julia_cmd(exename), I directly use exename because idk howto access Base.julia_cmd - for i in 1:manager.np - cmd = `env EXTRAE_SKIP_AUTO_LIBRARY_INITIALIZE=1 $exename $exeflags --project=$active_project --bind-to $bind_to -e "$hookline"` - io = open(detach(setenv(cmd, dir=dir)), "r+") - - # Cluster cookie is not passed through IO. Instead, we set it - # as a parameter when starting worker, through the hookline - #Distributed.write_cookie(io) - - wconfig = WorkerConfig() - wconfig.process = io - wconfig.io = io.out - wconfig.enable_threaded_blas = params[:enable_threaded_blas] - push!(launched, wconfig) - end - - notify(c) -end - -function Distributed.manage(manager::ExtraeLocalManager, id::Integer, config::WorkerConfig, op::Symbol) - if op === :interrupt - kill(config.process, 2) - end -end - -export ExtraeLocalManager - -const DistributedEvent{ValueCode} = Event{400002,ValueCode} -const DistributedUsefulWorkEvent{ValueCode} = Event{400001,ValueCode} -const DistributedMessageHandlingEvent{ValueCode} = Event{400004,ValueCode} - -description(::Type{DistributedEvent}) = "Distributed runtime calls" -description(::Type{DistributedUsefulWorkEvent}) = "Distributed workload execution" -description(::Type{DistributedMessageHandlingEvent}) = "Distributed message handling functions" - -const DistributedEnd = DistributedEvent{0}() -const DistributedAddProcs = DistributedEvent{1}() -const DistributedRmProcs = DistributedEvent{2}() -const DistributedInitWorker = DistributedEvent{3}() -const DistributedStartWorker = DistributedEvent{4}() -const DistributedRemoteCall = DistributedEvent{5}() -const DistributedRemoteCallFetch = DistributedEvent{6}() -const DistributedRemoteCallWait = DistributedEvent{7}() -const DistributedProcessMessages = DistributedEvent{8}() -const DistributedInterrupt = DistributedEvent{9}() - -const DistributedUsefulWork = DistributedUsefulWorkEvent{1}() -const DistributedNotUsefulWork = DistributedUsefulWorkEvent{0}() - -const DistributedHandleEnd = DistributedMessageHandlingEvent{0}() -const DistributedHandleCall = DistributedMessageHandlingEvent{1}() -const DistributedHandleCallFetch = DistributedMessageHandlingEvent{2}() -const DistributedHandleCallWait = DistributedMessageHandlingEvent{3}() -const DistributedHandleRemoteDo = DistributedMessageHandlingEvent{4}() -const DistributedHandleResult = DistributedMessageHandlingEvent{5}() -const DistributedHandleIdentifySocket = DistributedMessageHandlingEvent{6}() -const DistributedHandleIdentifySocketAck = DistributedMessageHandlingEvent{7}() -const DistributedHandleJoinPGRP = DistributedMessageHandlingEvent{8}() -const DistributedHandleJoinComplete = DistributedMessageHandlingEvent{9}() - -description(::typeof(DistributedEnd)) = "end" -description(::typeof(DistributedAddProcs)) = "addprocs" -description(::typeof(DistributedRmProcs)) = "rmprocs" -description(::typeof(DistributedInitWorker)) = "init_worker" -description(::typeof(DistributedStartWorker)) = "start_worker" -description(::typeof(DistributedRemoteCall)) = "remotecall" -description(::typeof(DistributedRemoteCallFetch)) = "remotecall_fetch" -description(::typeof(DistributedRemoteCallWait)) = "remotecall_wait" -description(::typeof(DistributedProcessMessages)) = "process_messages" -description(::typeof(DistributedInterrupt)) = "interrupt" - -description(::typeof(DistributedHandleEnd)) = "End" -description(::typeof(DistributedHandleCall)) = "CallMsg{:call}" -description(::typeof(DistributedHandleCallFetch)) = "CallMsg{:call_fetch}" -description(::typeof(DistributedHandleCallWait)) = "CallWaitMsg" -description(::typeof(DistributedHandleRemoteDo)) = "RemoteDoMsg" -description(::typeof(DistributedHandleResult)) = "ResultMsg" -description(::typeof(DistributedHandleIdentifySocket)) = "IdentifySocketMsg" -description(::typeof(DistributedHandleIdentifySocketAck)) = "IdentifySocketAckMsg" -description(::typeof(DistributedHandleJoinPGRP)) = "JoinPGRPMsg" -description(::typeof(DistributedHandleJoinComplete)) = "JoinCompletesg" - -description(::typeof(DistributedUsefulWork)) = "Useful" -description(::typeof(DistributedNotUsefulWork)) = "Not Useful" - -# resource identification -dist_taskid()::Cuint = Distributed.myid() - 1 -dist_numtasks()::Cuint = Distributed.nworkers() + 1 - -# cluster manager addprocs -function addprocs_extrae(np::Integer; restrict=true, kwargs...) - manager = Extrae.ExtraeLocalManager(np, restrict) - #check_addprocs_args(manager, kwargs) - new_workers = addprocs(manager; kwargs...) - - Extrae.init() - for pid in new_workers - @fetchfrom pid Extrae.init() - end - return new_workers -end -export addprocs_extrae diff --git a/src/API.jl b/src/API.jl index a0d527c..61046fc 100644 --- a/src/API.jl +++ b/src/API.jl @@ -28,47 +28,16 @@ This routine is called automatically in different circumstances, which include: No major problems should occur if the library is initialized twice, only a warning appears in the terminal output noticing the intent of double initialization. """ function init() - ## TODO: This setup should depend on isntrumentation options. ## For example, if isntrumenting Distributed, here we setup the ## Distributed functions to identify resources - FFI.Extrae_set_numtasks_function(dist_numtasks) - FFI.Extrae_set_taskid_function(dist_taskid) + # FFI.Extrae_set_numtasks_function(dist_numtasks) + # FFI.Extrae_set_taskid_function(dist_taskid) ## Setup traceid for not intereference ENV["EXTRAE_PROGRAM_NAME"] = "JULIATRACE$(Distributed.myid())" FFI.Extrae_init() - Libc.flush_cstdio() - - register([DistributedUsefulWork, DistributedNotUsefulWork]) - register([ - DistributedEnd, - DistributedAddProcs, - DistributedRmProcs, - DistributedInitWorker, - DistributedStartWorker, - DistributedRemoteCall, - DistributedRemoteCallFetch, - DistributedRemoteCallWait, - DistributedProcessMessages, - DistributedInterrupt - ]) - register([ - DistributedHandleEnd, - DistributedHandleCall, - DistributedHandleCallFetch, - DistributedHandleCallWait, - DistributedHandleRemoteDo, - DistributedHandleResult, - DistributedHandleIdentifySocket, - DistributedHandleIdentifySocketAck, - DistributedHandleJoinPGRP, - DistributedHandleJoinComplete, - ]) - - @debug "Extrae initialized in worker $(myid())" - end @@ -122,7 +91,6 @@ description(::E) where {E<:Event} = description(E) Add a single timestampted event into the tracefile. """ function emit(::Event{T,V}; counters::Bool=false) where {T,V} - @debug "Event emit: $(T): $(V)" if counters FFI.Extrae_eventandcounters(FFI.Type(T), FFI.Value(V)) else @@ -132,26 +100,6 @@ end emit(events::Vector{Event}; counters::Bool=false) = foreach(e -> event(e; counters=counters), events) -""" - register(event) - register(event, description) - register(events, description) - -Document to the Paraver Configuration File human readable information regarding type type and its values values. -""" -register(::E) where {E<:Event} = register(E) -register(::Type{E}) where {E<:Event} = register(E, description(E)) -register(events::Vector{<:Event{T,V} where {V}}) where {T} = register(events, description(Event{T})) -register(::E, desc::String) where {E<:Event} = register(E, desc) -register(::Type{<:Event{T}}, desc::String) where {T} = FFI.Extrae_define_event_type(T, Base.cconvert(Cstring, desc), 0, Nothing, Nothing) -function register(events::Vector{<:Event{T,V} where {V}}, desc::String) where {T} - nvalues = length(events) - values = valuecode.(events) - descs = Base.cconvert.((Cstring,), description.(events)) - @debug "Registering event [$(T)] $(desc) with values [$(values)] $(descs)" - FFI.Extrae_define_event_type(T, Base.cconvert(Cstring, desc), nvalues, values, descs) -end - """ previous_hwc_set() diff --git a/src/Extrae.jl b/src/Extrae.jl index d6ff0fa..c1a4949 100644 --- a/src/Extrae.jl +++ b/src/Extrae.jl @@ -5,6 +5,4 @@ include("API.jl") export Event, typecode, valuecode, description export version, init, isinit, finish, flush, instrumentation, emit, register, previous_hwc_set, next_hwc_set, set_tracing_tasks, setoption, network_counters, network_routes, user_function -include("Instrumentation/Threads.jl") - end diff --git a/src/Instrumentation/Threads.jl b/src/Instrumentation/Threads.jl deleted file mode 100644 index cfc56a9..0000000 --- a/src/Instrumentation/Threads.jl +++ /dev/null @@ -1,22 +0,0 @@ -using Base.Threads: threading_run -using Base: do_threadcall - -const ThreadsEvent{ValueCode} = Event{400003,ValueCode} - -description(::Type{ThreadsEvent}) = "Threads" - -const ThreadsEnd = ThreadsEvent{0}() -const ThreadsThreadCall = ThreadsEvent{1}() # NOTE tracing `do_threadcall` which is called by @threadcall -const ThreadsThreads = ThreadsEvent{2}() # NOTE tracing `threading_run` which is called by `_threadsfor` (used by `@threads`) -# const ThreadsSpawn = ThreadsEvent{3}() # TODO creates a `Task` - -description(::typeof(ThreadsEnd)) = "end" -description(::typeof(ThreadsThreadCall)) = "@threadcall" -description(::typeof(ThreadsThreads)) = "@threads" -# description(::typeof(ThreadsSpawn)) = "@spawn" - -Cassette.prehook(::ExtraeCtx, ::typeof(do_threadcall), args...) = emit(ThreadsThreadCall) -Cassette.posthook(::ExtraeCtx, _, ::typeof(do_threadcall), args...) = emit(ThreadsEnd) - -Cassette.prehook(::ExtraeCtx, ::typeof(threading_run), args...) = emit(ThreadsThreads) -Cassette.posthook(::ExtraeCtx, _, ::typeof(threading_run), args...) = emit(ThreadsEnd)