From 2d5d15f0b00ad750e17a2cf4c11e60a8d46a3acf Mon Sep 17 00:00:00 2001 From: Antoniu Pop Date: Fri, 19 Jul 2024 16:53:12 +0100 Subject: [PATCH] fix(compiler): [GPU runtime] prevent early deallocation of on-device data when multiple processes use the same input. --- compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp b/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp index 68124212d0..01111e72ae 100644 --- a/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp +++ b/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp @@ -488,6 +488,7 @@ struct Stream { bool ct_stream; bool pt_stream; size_t generation; + std::atomic uses = {0}; const char *name; Stream(stream_type t, const char *sname = nullptr) : dep(nullptr), type(t), producer(nullptr), dfg(nullptr), @@ -527,6 +528,7 @@ struct Stream { dep = d; } dep->stream_generation = generation; + uses = 0; } // For a given dependence, traverse the DFG backwards to extract the lattice // of kernels required to execute to produce this data @@ -835,7 +837,8 @@ struct Stream { } } for (auto i : inputs) - i->dep->free_chunk_device_data(c, dfg); + if (++i->uses == i->consumers.size()) + i->dep->free_chunk_device_data(c, dfg); for (auto iv : intermediate_values) iv->dep->free_chunk_device_data(c, dfg); for (auto o : outputs)