diff --git a/.github/workflows/ci_linux_x64-libshortfin.yml b/.github/workflows/ci_linux_x64-libshortfin.yml index 20f944c5b..babcf0245 100644 --- a/.github/workflows/ci_linux_x64-libshortfin.yml +++ b/.github/workflows/ci_linux_x64-libshortfin.yml @@ -103,7 +103,7 @@ jobs: - name: Test libshortfin (full) run: | cd ${{ env.LIBSHORTFIN_DIR }}/build - cmake --build . --target test + ctest --timeout 30 --output-on-failure cd ${{ env.LIBSHORTFIN_DIR }} pytest -s -v -m "not requires_amd_gpu" diff --git a/.github/workflows/ci_linux_x64_asan-libshortfin.yml b/.github/workflows/ci_linux_x64_asan-libshortfin.yml index 5c5310e2a..14aa26bda 100644 --- a/.github/workflows/ci_linux_x64_asan-libshortfin.yml +++ b/.github/workflows/ci_linux_x64_asan-libshortfin.yml @@ -160,7 +160,7 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 run: | cd ${{ env.LIBSHORTFIN_DIR }}/build - cmake --build . --target test + ctest --timeout 30 --output-on-failure - name: Run pytest if: ${{ !cancelled() }} diff --git a/libshortfin/CMakeLists.txt b/libshortfin/CMakeLists.txt index 082577f24..c5a2f0f6a 100644 --- a/libshortfin/CMakeLists.txt +++ b/libshortfin/CMakeLists.txt @@ -46,6 +46,9 @@ option(SHORTFIN_ENABLE_ASAN "Enable ASAN" OFF) if(SHORTFIN_ENABLE_ASAN) add_compile_options(-fsanitize=address) add_link_options(-fsanitize=address) + + # Enable more ASAN checks. + add_compile_definitions(IREE_SANITIZER_ADDRESS) endif() option(SHORTFIN_SYSTEMS_AMDGPU "Builds for AMD GPU systems" ON) diff --git a/libshortfin/bindings/python/array_binding.cc b/libshortfin/bindings/python/array_binding.cc index fc4694107..9858c2350 100644 --- a/libshortfin/bindings/python/array_binding.cc +++ b/libshortfin/bindings/python/array_binding.cc @@ -123,6 +123,7 @@ void BindArray(py::module_ &m) { PyBufferReleaser py_view_releaser(py_view); self.Fill(py_view.buf, py_view.len); }) + .def("copy_from", [](storage &self, storage &src) { self.CopyFrom(src); }) .def( "map", [](storage &self, bool read, bool write, bool discard) { @@ -232,13 +233,12 @@ void BindArray(py::module_ &m) { py::type(), /*keep_alive=*/device.scope(), device_array::for_host(device, shape, dtype)); }) - .def_static("for_transfer", - [](device_array &existing) { - return custom_new_keep_alive( - py::type(), - /*keep_alive=*/existing.device().scope(), - device_array::for_transfer(existing)); - }) + .def("for_transfer", + [](device_array &self) { + return custom_new_keep_alive( + py::type(), + /*keep_alive=*/self.device().scope(), self.for_transfer()); + }) .def_prop_ro("device", &device_array::device, py::rv_policy::reference_internal) .def_prop_ro("storage", &device_array::storage, diff --git a/libshortfin/examples/python/mobilenet_server/inference_system.py b/libshortfin/examples/python/mobilenet_server/inference_system.py index e2be35910..8ae7773db 100644 --- a/libshortfin/examples/python/mobilenet_server/inference_system.py +++ b/libshortfin/examples/python/mobilenet_server/inference_system.py @@ -12,7 +12,7 @@ import shortfin as sf import shortfin.array as sfnp -MAX_BATCH = 8 +MAX_BATCH = 1 class InferenceRequest(sf.Message): @@ -27,18 +27,23 @@ def __init__(self, program, request_queue, **kwargs): self.program = program self.request_reader = request_queue.reader() self.device = self.scope.device(0) - self.host_staging = sfnp.host_array( - self.device, [MAX_BATCH, 3, 224, 224], sfnp.float32 - ) self.device_input = sfnp.device_array( self.device, [MAX_BATCH, 3, 224, 224], sfnp.float32 ) + self.host_staging = self.device_input.for_transfer() async def run(self): print(f"Inference process: {self.pid}") while request := await self.request_reader(): print(f"[{self.pid}] Got request {request}") - # self.host_staging.data = self.raw_image_data + # TODO: Should really be taking a slice and writing that. For now, + # just writing to the backing storage is the best we have API + # support for. Generally, APIs on storage should be mirrored onto + # the array. + self.host_staging.storage.data = request.raw_image_data + print(self.host_staging) + self.device_input.storage.copy_from(self.host_staging.storage) + print(self.device_input) class Main: @@ -95,7 +100,10 @@ def client(): # Dumb way to prepare some data to feed [1, 3, 224, 224] f32. import array - dummy_data = array.array("f", [0.2] * (3 * 224 * 224)) + dummy_data = array.array( + "f", ([0.2] * (224 * 224)) + ([0.4] * (224 * 224)) + ([-0.2] * (224 * 224)) + ) + # dummy_data = array.array("f", [0.2] * (3 * 224 * 224)) message = InferenceRequest(dummy_data) writer(message) diff --git a/libshortfin/src/shortfin/array/array.h b/libshortfin/src/shortfin/array/array.h index 31deb665a..c3ab6e302 100644 --- a/libshortfin/src/shortfin/array/array.h +++ b/libshortfin/src/shortfin/array/array.h @@ -80,10 +80,9 @@ class SHORTFIN_API device_array shape, dtype); } - // Allocates a host array for transfer to/from the given device array. - static device_array for_transfer(device_array &with_device_array) { - return for_host(with_device_array.storage().device(), - with_device_array.shape(), with_device_array.dtype()); + // Allocates a host array for transfer to/from this array. + device_array for_transfer() { + return for_host(storage().device(), shape(), dtype()); } // Untyped access to the backing data. The array must be mappable. Specific diff --git a/libshortfin/src/shortfin/array/storage.cc b/libshortfin/src/shortfin/array/storage.cc index 6554f3e74..fa9e0f4b8 100644 --- a/libshortfin/src/shortfin/array/storage.cc +++ b/libshortfin/src/shortfin/array/storage.cc @@ -26,6 +26,15 @@ void ThrowIllegalDeviceAffinity(Device *first, Device *second) { } } // namespace detail +storage::storage(local::ScopedDevice device, iree::hal_buffer_ptr buffer, + local::detail::TimelineResource::Ref timeline_resource) + : timeline_resource_(std::move(timeline_resource)), + buffer_(std::move(buffer)), + device_(device) { + logging::construct("array::storage", this); +} +storage::~storage() { logging::destruct("array::storage", this); } + storage storage::AllocateDevice(ScopedDevice &device, iree_device_size_t allocation_size) { if (!device.raw_device()) { @@ -103,7 +112,28 @@ void storage::Fill(const void *pattern, iree_host_size_t pattern_length) { } void storage::CopyFrom(storage &source_storage) { - throw std::logic_error("CopyFrom NYI"); + device_.scope().scheduler().AppendCommandBuffer( + device_, TransactionType::TRANSFER, [&](Account &account) { + // Must depend on the source's mutation dependencies to avoid + // read-before-write hazard. + account.active_deps_extend( + source_storage.timeline_resource_->mutation_barrier()); + // And depend on our own use and mutations dependencies. + account.active_deps_extend(timeline_resource_->use_barrier()); + account.active_deps_extend(timeline_resource_->mutation_barrier()); + + SHORTFIN_THROW_IF_ERROR(iree_hal_command_buffer_copy_buffer( + account.active_command_buffer(), + /*source_ref=*/ + iree_hal_make_buffer_ref(source_storage.buffer_, 0, byte_length()), + /*target_ref=*/ + iree_hal_make_buffer_ref(buffer_, 0, byte_length()))); + + // And move our own mutation barrier to the current pending timeline + // value. + timeline_resource_->set_mutation_barrier( + account.timeline_sem(), account.timeline_idle_timepoint()); + }); } bool storage::is_mappable_for_read() const { @@ -127,8 +157,7 @@ void storage::MapExplicit(mapping &mapping, iree_hal_memory_access_t access) { buffer_, IREE_HAL_MAPPING_MODE_SCOPED, access, /*byte_offset=*/0, byte_length(), &mapping.mapping_)); mapping.access_ = access; - mapping.hal_device_ownership_baton_ = - iree::hal_device_ptr::borrow_reference(hal_device_ownership_baton_); + mapping.timeline_resource_ = timeline_resource_; } iree_hal_memory_type_t storage::memory_type() const { @@ -169,16 +198,22 @@ std::string storage::to_s() const { // mapping // -------------------------------------------------------------------------- // -mapping::mapping() { std::memset(&mapping_, 0, sizeof(mapping_)); } +mapping::mapping() { + logging::construct("array::mapping", this); + std::memset(&mapping_, 0, sizeof(mapping_)); +} -mapping::~mapping() noexcept { reset(); } +mapping::~mapping() noexcept { + logging::destruct("array::mapping", this); + reset(); +} void mapping::reset() noexcept { if (*this) { // Crash the process on failure to unmap. We don't have a good mitigation, IREE_CHECK_OK(iree_hal_buffer_unmap_range(&mapping_)); access_ = IREE_HAL_MEMORY_ACCESS_NONE; - hal_device_ownership_baton_.reset(); + timeline_resource_.reset(); } } diff --git a/libshortfin/src/shortfin/array/storage.h b/libshortfin/src/shortfin/array/storage.h index 36f117cb4..0db73d28f 100644 --- a/libshortfin/src/shortfin/array/storage.h +++ b/libshortfin/src/shortfin/array/storage.h @@ -23,18 +23,17 @@ class SHORTFIN_API mapping { mapping(const mapping &) = delete; mapping &operator=(const mapping &) = delete; mapping &operator=(mapping &&other) { + timeline_resource_ = std::move(other.timeline_resource_); access_ = other.access_; mapping_ = other.mapping_; - hal_device_ownership_baton_ = std::move(other.hal_device_ownership_baton_); other.access_ = IREE_HAL_MEMORY_ACCESS_NONE; std::memset(&other.mapping_, 0, sizeof(other.mapping_)); return *this; } mapping(mapping &&other) - : access_(other.access_), - mapping_(other.mapping_), - hal_device_ownership_baton_( - std::move(other.hal_device_ownership_baton_)) { + : timeline_resource_(std::move(other.timeline_resource_)), + access_(other.access_), + mapping_(other.mapping_) { other.access_ = IREE_HAL_MEMORY_ACCESS_NONE; std::memset(&other.mapping_, 0, sizeof(other.mapping_)); } @@ -63,15 +62,17 @@ class SHORTFIN_API mapping { bool writable() const { return access_ & IREE_HAL_MEMORY_ACCESS_WRITE; } private: + // See note on storage::timeline_resource_. Must be declared first. + local::detail::TimelineResource::Ref timeline_resource_; iree_hal_memory_access_t access_ = IREE_HAL_MEMORY_ACCESS_NONE; iree_hal_buffer_mapping_t mapping_; - iree::hal_device_ptr hal_device_ownership_baton_; friend class storage; }; // Array storage backed by an IREE buffer of some form. class SHORTFIN_API storage { public: + ~storage(); local::ScopedDevice &device() { return device_; } local::Scope &scope() { return device_.scope(); } const local::ScopedDevice &device() const { return device_; } @@ -162,23 +163,13 @@ class SHORTFIN_API storage { private: storage(local::ScopedDevice device, iree::hal_buffer_ptr buffer, - local::detail::TimelineResource::Ref timeline_resource) - : hal_device_ownership_baton_(iree::hal_device_ptr::borrow_reference( - device.raw_device()->hal_device())), - buffer_(std::move(buffer)), - device_(device), - timeline_resource_(std::move(timeline_resource)) {} - // TODO(ownership): Since storage is a free-standing object in the system, - // it needs an ownership baton that keeps the device/driver alive. - // Otherwise, it can outlive the backing device and then then crashes on - // buffer deallocation. For now, we stash an RAII hal_device_ptr, which - // keeps everything alive. This isn't quite what we want but keeps us going - // for now. When fixing, add a test that creates an array, destroys the - // System, and then frees the array. - iree::hal_device_ptr hal_device_ownership_baton_; + local::detail::TimelineResource::Ref timeline_resource); + // The timeline resource holds the back reference to the owning scope, + // which keeps all devices alive. Buffers must be destroyed before devices, + // so this must be declared first. + local::detail::TimelineResource::Ref timeline_resource_; iree::hal_buffer_ptr buffer_; local::ScopedDevice device_; - local::detail::TimelineResource::Ref timeline_resource_; }; // Wraps an untyped mapping, providing typed access. diff --git a/libshortfin/src/shortfin/local/scheduler.cc b/libshortfin/src/shortfin/local/scheduler.cc index 64e4247e6..c5a9fc062 100644 --- a/libshortfin/src/shortfin/local/scheduler.cc +++ b/libshortfin/src/shortfin/local/scheduler.cc @@ -30,6 +30,9 @@ void Account::Initialize() { void Account::Reset() { active_tx_type_ = TransactionType::NONE; + // if (active_command_buffer_) { + // iree_hal_command_buffer_end(active_command_buffer_); + // } active_command_buffer_.reset(); } @@ -67,10 +70,17 @@ CompletionEvent Account::OnSync() { // TimelineResource // -------------------------------------------------------------------------- // -TimelineResource::TimelineResource(iree_allocator_t host_allocator, - size_t semaphore_capacity) { - SHORTFIN_THROW_IF_ERROR(iree_hal_fence_create( - semaphore_capacity, host_allocator, use_barrier_fence_.for_output())); +TimelineResource::TimelineResource(std::shared_ptr scope, + size_t semaphore_capacity) + : scope_(std::move(scope)) { + logging::construct("TimelineResource", this); + SHORTFIN_THROW_IF_ERROR( + iree_hal_fence_create(semaphore_capacity, scope_->host_allocator(), + use_barrier_fence_.for_output())); +} + +TimelineResource::~TimelineResource() { + logging::destruct("TimelineResource", this); } void TimelineResource::use_barrier_insert(iree_hal_semaphore_t *sem, @@ -83,6 +93,19 @@ void TimelineResource::use_barrier_insert(iree_hal_semaphore_t *sem, // Scheduler // -------------------------------------------------------------------------- // +Scheduler::Scheduler(System &system) : system_(system) { + logging::construct("Scheduler", this); +} + +Scheduler::~Scheduler() { + logging::destruct("Scheduler", this); + + // Explicitly reset account state prior to implicit destruction. + for (auto &account : accounts_) { + account.Reset(); + } +} + void Scheduler::Initialize(std::span devices) { for (Device *device : devices) { accounts_.emplace_back(*this, device); diff --git a/libshortfin/src/shortfin/local/scheduler.h b/libshortfin/src/shortfin/local/scheduler.h index 057bfbd9f..2f606ced3 100644 --- a/libshortfin/src/shortfin/local/scheduler.h +++ b/libshortfin/src/shortfin/local/scheduler.h @@ -83,13 +83,35 @@ class SHORTFIN_API TimelineResource { Ref() : res_(nullptr) {} explicit Ref(TimelineResource *res) : res_(res) { res_->Retain(); } Ref(const Ref &other) : res_(other.res_) { res_->Retain(); } - void operator=(const Ref &other) = delete; - Ref(Ref &&other) : res_(other.res_) { other.res_ = nullptr; } - ~Ref() { - if (res_) res_->Release(); + Ref &operator=(const Ref &other) { + if (other.res_ != res_) { + reset(); + if (other.res_) { + other.res_->Retain(); + res_ = other.res_; + } + } + return *this; + } + Ref &operator=(Ref &&other) { + if (other.res_ != res_) { + reset(); + res_ = other.res_; + other.res_ = nullptr; + } + return *this; } + Ref(Ref &&other) : res_(other.res_) { other.res_ = nullptr; } + ~Ref() { reset(); } TimelineResource *operator->() { return res_; } + void reset() { + if (res_) { + res_->Release(); + res_ = nullptr; + } + } + private: TimelineResource *res_; }; @@ -121,13 +143,18 @@ class SHORTFIN_API TimelineResource { } private: - TimelineResource(iree_allocator_t host_allocator, size_t semaphore_capacity); + TimelineResource(std::shared_ptr scope, size_t semaphore_capacity); + ~TimelineResource(); void Retain() { refcnt_++; } void Release() { if (--refcnt_ == 0) delete this; } int refcnt_ = 0; + + // Back reference to the owning scope. + std::shared_ptr scope_; + // Non-owning mutation barrier semaphore and timepoint. The fact that this // is a single semaphore is an implementation detail that may be generalized // in the future should it be necessary to track multiple write sources. @@ -171,11 +198,13 @@ class SHORTFIN_API Account { void Initialize(); void Reset(); Scheduler &scheduler_; + iree::hal_semaphore_ptr sem_; + iree::hal_fence_ptr active_deps_; + iree::hal_command_buffer_ptr active_command_buffer_; + Device *device_; iree_hal_device_t *hal_device_; TransactionType active_tx_type_ = TransactionType::NONE; - iree::hal_fence_ptr active_deps_; - iree::hal_command_buffer_ptr active_command_buffer_; iree_hal_queue_affinity_t active_queue_affinity_bits_; // Timepoint at which this device is considered idle, inclusive of any @@ -193,14 +222,14 @@ class SHORTFIN_API Account { // an eventual submission would submit a duplicate timepoint). This // timepoint is only valid for the local sem_. uint64_t idle_timepoint_ = 0; - iree::hal_semaphore_ptr sem_; friend class Scheduler; }; // Handles scheduling state for a scope. class SHORTFIN_API Scheduler { public: - Scheduler(System &system) : system_(system) {} + Scheduler(System &system); + ~Scheduler(); TransactionMode transaction_mode() const { return tx_mode_; } @@ -224,9 +253,9 @@ class SHORTFIN_API Scheduler { // Gets a fresh TimelineResource which can be used for tracking resource // read/write and setting barriers. Note that these are all allocated fresh // on each call today but may be pooled in the future. - TimelineResource::Ref NewTimelineResource(iree_allocator_t host_allocator) { + TimelineResource::Ref NewTimelineResource(std::shared_ptr scope) { return TimelineResource::Ref( - new TimelineResource(host_allocator, semaphore_count_)); + new TimelineResource(std::move(scope), semaphore_count_)); } System &system() { return system_; } diff --git a/libshortfin/src/shortfin/local/scope.cc b/libshortfin/src/shortfin/local/scope.cc index f0eb9ca77..39784f196 100644 --- a/libshortfin/src/shortfin/local/scope.cc +++ b/libshortfin/src/shortfin/local/scope.cc @@ -21,10 +21,11 @@ namespace shortfin::local { Scope::Scope(std::shared_ptr system, Worker &worker, std::span> devices) - : host_allocator_(system->host_allocator()), - scheduler_(*system), - system_(std::move(system)), + : system_(std::move(system)), + host_allocator_(system_->host_allocator()), + scheduler_(*system_), worker_(worker) { + logging::construct("Scope", this); for (auto &it : devices) { AddDevice(it.first, it.second); } @@ -33,17 +34,18 @@ Scope::Scope(std::shared_ptr system, Worker &worker, Scope::Scope(std::shared_ptr system, Worker &worker, std::span devices) - : host_allocator_(system->host_allocator()), - scheduler_(*system), - system_(std::move(system)), + : system_(std::move(system)), + host_allocator_(system_->host_allocator()), + scheduler_(*system_), worker_(worker) { + logging::construct("Scope", this); for (auto *device : devices) { AddDevice(device->address().logical_device_class, device); } Initialize(); } -Scope::~Scope() = default; +Scope::~Scope() { logging::destruct("Scope", this); } std::string Scope::to_s() const { return fmt::format("Scope(worker='{}', devices=[{}])", worker_.name(), diff --git a/libshortfin/src/shortfin/local/scope.h b/libshortfin/src/shortfin/local/scope.h index cc6ee8329..0cb566b89 100644 --- a/libshortfin/src/shortfin/local/scope.h +++ b/libshortfin/src/shortfin/local/scope.h @@ -91,6 +91,9 @@ class SHORTFIN_API Scope : public std::enable_shared_from_this { // All scopes are created as shared pointers. std::shared_ptr shared_ptr() { return shared_from_this(); } + // The host allocator. + iree_allocator_t host_allocator() { return host_allocator_; } + // The worker that this scope is bound to. Worker &worker() { return worker_; } @@ -126,7 +129,7 @@ class SHORTFIN_API Scope : public std::enable_shared_from_this { } detail::Scheduler &scheduler() { return scheduler_; } detail::TimelineResource::Ref NewTimelineResource() { - return scheduler().NewTimelineResource(host_allocator_); + return scheduler().NewTimelineResource(shared_ptr()); } // Loads a program from a list of modules onto the devices managed by this @@ -141,19 +144,19 @@ class SHORTFIN_API Scope : public std::enable_shared_from_this { void AddDevice(std::string_view device_class, Device *device); void Initialize(); // Called after all devices are added. - iree_allocator_t host_allocator_; + // Back reference to owning system. + std::shared_ptr system_; string_interner interner_; + iree_allocator_t host_allocator_; + detail::Scheduler scheduler_; + Worker &worker_; + // Map of `` to the count of that class contained. std::unordered_map device_class_count_; // Ordered devices. std::vector devices_; // Map of `` to Device. std::unordered_map named_devices_; - detail::Scheduler scheduler_; - - // Back reference to owning system. - std::shared_ptr system_; - Worker &worker_; }; } // namespace shortfin::local diff --git a/libshortfin/src/shortfin/local/system.cc b/libshortfin/src/shortfin/local/system.cc index 28c8c9654..2eaf3eaf7 100644 --- a/libshortfin/src/shortfin/local/system.cc +++ b/libshortfin/src/shortfin/local/system.cc @@ -19,6 +19,7 @@ namespace shortfin::local { System::System(iree_allocator_t host_allocator) : host_allocator_(host_allocator) { + logging::construct("System", this); SHORTFIN_THROW_IF_ERROR(iree_vm_instance_create(IREE_VM_TYPE_CAPACITY_DEFAULT, host_allocator_, vm_instance_.for_output())); @@ -27,6 +28,7 @@ System::System(iree_allocator_t host_allocator) } System::~System() { + logging::destruct("System", this); bool needs_shutdown = false; { iree::slim_mutex_lock_guard guard(lock_); @@ -40,6 +42,21 @@ System::~System() { "explicitly for maximum stability."); Shutdown(); } + + // Orderly destruction of heavy-weight objects. + // Shutdown order is important so we don't leave it to field ordering. + vm_instance_.reset(); + + // Devices. + devices_.clear(); + named_devices_.clear(); + retained_devices_.clear(); + + // HAL drivers. + hal_drivers_.clear(); + + // If support for logging refs was compiled in, report now. + iree::detail::LogLiveRefs(); } void System::Shutdown() { @@ -63,20 +80,7 @@ void System::Shutdown() { } } blocking_executor_.Kill(); - local_workers.clear(); - - // Orderly destruction of heavy-weight objects. - // Shutdown order is important so we don't leave it to field ordering. - vm_instance_.reset(); - - // Devices. - devices_.clear(); - named_devices_.clear(); - retained_devices_.clear(); - - // HAL drivers. - hal_drivers_.clear(); } std::shared_ptr System::CreateScope(Worker &worker, @@ -180,7 +184,7 @@ void System::InitializeHalDriver(std::string_view moniker, throw std::logic_error(fmt::format( "Cannot register multiple hal drivers with moniker '{}'", moniker)); } - slot.reset(driver.release()); + slot = std::move(driver); } void System::InitializeHalDevice(std::unique_ptr device) { diff --git a/libshortfin/src/shortfin/support/blocking_executor.cc b/libshortfin/src/shortfin/support/blocking_executor.cc index fc739ec0c..fde3cc593 100644 --- a/libshortfin/src/shortfin/support/blocking_executor.cc +++ b/libshortfin/src/shortfin/support/blocking_executor.cc @@ -59,9 +59,18 @@ void BlockingExecutor::Kill(bool wait, iree_timeout_t warn_timeout) { iree::slim_mutex_lock_guard g(control_mu_); last_live_thread_count = live_thread_count_; total_thread_count = created_thread_count_; + // If transitioned to 0 live threads, there is a short period of time + // that can exist between the scan of the free list above and a task + // getting scheduled. Therefore, the first time we hit this condition, + // enter the inhibited state, which denies further scheduling. Then + // the next time we encounter no live threads, that will be a true + // count. if (live_thread_count_ == 0) { - inhibit_ = true; - break; + if (inhibit_) { + break; + } else { + inhibit_ = true; + } } } diff --git a/libshortfin/src/shortfin/support/blocking_executor_test.cc b/libshortfin/src/shortfin/support/blocking_executor_test.cc index 78f99cf4a..92a9b31f5 100644 --- a/libshortfin/src/shortfin/support/blocking_executor_test.cc +++ b/libshortfin/src/shortfin/support/blocking_executor_test.cc @@ -13,7 +13,13 @@ namespace shortfin { -TEST(BlockingExecutor, concurrent_tasks) { +class BlockingExecutorTest : public testing::Test { + protected: + void SetUp() override {} + void TearDown() override { iree::detail::LogLiveRefs(); } +}; + +TEST_F(BlockingExecutorTest, concurrent_tasks) { { std::atomic tasks_run{0}; @@ -33,7 +39,7 @@ TEST(BlockingExecutor, concurrent_tasks) { } } -TEST(BlockingExecutor, inhibit_when_shutdown) { +TEST_F(BlockingExecutorTest, inhibit_when_shutdown) { { std::atomic tasks_run{0}; @@ -46,6 +52,7 @@ TEST(BlockingExecutor, inhibit_when_shutdown) { } executor.Kill(/*wait=*/true); + logging::info("Killed"); // New work should be inhibited. try { @@ -57,7 +64,7 @@ TEST(BlockingExecutor, inhibit_when_shutdown) { } } -TEST(BlockingExecutor, warn_deadline) { +TEST_F(BlockingExecutorTest, warn_deadline) { { std::atomic tasks_run{0}; @@ -75,7 +82,7 @@ TEST(BlockingExecutor, warn_deadline) { } } -TEST(BlockingExecutor, threads_recycle) { +TEST_F(BlockingExecutorTest, threads_recycle) { { std::atomic tasks_run{0}; diff --git a/libshortfin/src/shortfin/support/iree_concurrency.h b/libshortfin/src/shortfin/support/iree_concurrency.h index 6ccd1792e..28ef1e99b 100644 --- a/libshortfin/src/shortfin/support/iree_concurrency.h +++ b/libshortfin/src/shortfin/support/iree_concurrency.h @@ -18,8 +18,15 @@ namespace shortfin::iree { namespace detail { struct thread_ptr_helper { - static void retain(iree_thread_t *obj) { iree_thread_retain(obj); } - static void release(iree_thread_t *obj) { iree_thread_release(obj); } + static void steal(iree_thread_t *obj) { LogIREESteal("iree_thread_t", obj); } + static void retain(iree_thread_t *obj) { + LogIREERetain("iree_thread_t", obj); + iree_thread_retain(obj); + } + static void release(iree_thread_t *obj) { + LogIREERelease("iree_thread_t", obj); + iree_thread_release(obj); + } }; }; // namespace detail diff --git a/libshortfin/src/shortfin/support/iree_helpers.cc b/libshortfin/src/shortfin/support/iree_helpers.cc index 8344377b4..d518e99c3 100644 --- a/libshortfin/src/shortfin/support/iree_helpers.cc +++ b/libshortfin/src/shortfin/support/iree_helpers.cc @@ -6,8 +6,81 @@ #include "shortfin/support/iree_helpers.h" +#include + +#include +#include + +#include "shortfin/support/iree_concurrency.h" +#include "shortfin/support/logging.h" + namespace shortfin::iree { +namespace detail { + +#if SHORTFIN_IREE_LOG_RC + +slim_mutex log_mutex; +std::unordered_map app_ref_counts; + +void LogIREERetain(const char *type_name, void *ptr) { + slim_mutex_lock_guard g(log_mutex); + std::string key = fmt::format("{}({})", type_name, ptr); + int &rc = app_ref_counts[key]; + rc += 1; + if (rc == 1) { + logging::info("IREE new {}", key); + } else { + logging::info("IREE retain {} = {}", key, rc); + } +} + +void LogIREERelease(const char *type_name, void *ptr) { + slim_mutex_lock_guard g(log_mutex); + std::string key = fmt::format("{}({})", type_name, ptr); + int &rc = app_ref_counts[key]; + rc -= 1; + if (rc == 0) { + logging::info("IREE delete {}", key); + } else { + logging::info("IREE release {} = {}", key, rc); + } +} + +void LogIREESteal(const char *type_name, void *ptr) { + slim_mutex_lock_guard g(log_mutex); + std::string key = fmt::format("{}({})", type_name, ptr); + int &rc = app_ref_counts[key]; + rc += 1; + if (rc == 1) { + logging::info("IREE steal {}", key); + } else { + logging::info("IREE retain {} = {}", key, rc); + } +} + +void SHORTFIN_API LogLiveRefs() { + slim_mutex_lock_guard g(log_mutex); + bool logged_banner = false; + for (auto &it : app_ref_counts) { + if (it.second == 0) continue; + if (it.second < 0) { + logging::error("Shortfin IREE negative reference count: {} = {}", + it.first, it.second); + continue; + } + if (!logged_banner) { + logged_banner = true; + logging::warn("Shortfin visible live IREE refs remain:"); + } + logging::warn(" Live IREE ref {} = {}", it.first, it.second); + } +} + +#endif + +} // namespace detail + error::error(std::string message, iree_status_t failing_status) : message_(std::move(message)), failing_status_(failing_status) { message_.append(": "); @@ -19,7 +92,7 @@ void error::AppendStatus() const noexcept { status_appended_ = false; iree_allocator_t allocator = iree_allocator_system(); - char* status_buffer = nullptr; + char *status_buffer = nullptr; iree_host_size_t length = 0; if (iree_status_to_string(failing_status_, &allocator, &status_buffer, &length)) { diff --git a/libshortfin/src/shortfin/support/iree_helpers.h b/libshortfin/src/shortfin/support/iree_helpers.h index 8cbe368fd..c77ddbaa8 100644 --- a/libshortfin/src/shortfin/support/iree_helpers.h +++ b/libshortfin/src/shortfin/support/iree_helpers.h @@ -17,6 +17,10 @@ #include "iree/vm/api.h" #include "shortfin/support/api.h" +#if !defined(SHORTFIN_IREE_LOG_RC) +#define SHORTFIN_IREE_LOG_RC 0 +#endif + namespace shortfin { // -------------------------------------------------------------------------- // @@ -36,59 +40,142 @@ namespace iree { namespace detail { +#if SHORTFIN_IREE_LOG_RC +void SHORTFIN_API LogIREERetain(const char *type_name, void *ptr); +void SHORTFIN_API LogIREERelease(const char *type_name, void *ptr); +void SHORTFIN_API LogIREESteal(const char *type_name, void *ptr); +void SHORTFIN_API LogLiveRefs(); +#else +inline void LogIREERetain(const char *type_name, void *ptr) {} +inline void LogIREERelease(const char *type_name, void *ptr) {} +inline void LogIREESteal(const char *type_name, void *ptr) {} +inline void LogLiveRefs() {} +#endif + struct hal_buffer_ptr_helper { - static void retain(iree_hal_buffer_t *obj) { iree_hal_buffer_retain(obj); } - static void release(iree_hal_buffer_t *obj) { iree_hal_buffer_release(obj); } + static void steal(iree_hal_buffer_t *obj) { + LogIREESteal("iree_hal_buffer_t", obj); + } + static void retain(iree_hal_buffer_t *obj) { + LogIREERetain("iree_hal_buffer_t", obj); + iree_hal_buffer_retain(obj); + } + static void release(iree_hal_buffer_t *obj) { + LogIREERelease("iree_hal_buffer_t", obj); + iree_hal_buffer_release(obj); + } }; struct hal_command_buffer_helper { + static void steal(iree_hal_command_buffer_t *obj) { + LogIREESteal("iree_hal_command_buffer_t", obj); + } static void retain(iree_hal_command_buffer_t *obj) { + LogIREERetain("iree_hal_command_buffer_t", obj); iree_hal_command_buffer_retain(obj); } static void release(iree_hal_command_buffer_t *obj) { + LogIREERelease("iree_hal_command_buffer_t", obj); iree_hal_command_buffer_release(obj); } }; struct hal_device_ptr_helper { - static void retain(iree_hal_device_t *obj) { iree_hal_device_retain(obj); } - static void release(iree_hal_device_t *obj) { iree_hal_device_release(obj); } + static void steal(iree_hal_device_t *obj) { + LogIREESteal("iree_hal_device_t", obj); + } + static void retain(iree_hal_device_t *obj) { + LogIREERetain("iree_hal_device_t", obj); + iree_hal_device_retain(obj); + } + static void release(iree_hal_device_t *obj) { + LogIREERelease("iree_hal_device_t", obj); + iree_hal_device_release(obj); + } }; struct hal_driver_ptr_helper { - static void retain(iree_hal_driver_t *obj) { iree_hal_driver_retain(obj); } - static void release(iree_hal_driver_t *obj) { iree_hal_driver_release(obj); } + static void steal(iree_hal_driver_t *obj) { + LogIREESteal("iree_hal_driver_t", obj); + } + static void retain(iree_hal_driver_t *obj) { + LogIREERetain("iree_hal_driver_t", obj); + iree_hal_driver_retain(obj); + } + static void release(iree_hal_driver_t *obj) { + LogIREERelease("iree_hal_driver_t", obj); + iree_hal_driver_release(obj); + } }; struct hal_fence_ptr_helper { - static void retain(iree_hal_fence_t *obj) { iree_hal_fence_retain(obj); } - static void release(iree_hal_fence_t *obj) { iree_hal_fence_release(obj); } + static void steal(iree_hal_fence_t *obj) { + LogIREESteal("iree_hal_fence_t", obj); + } + static void retain(iree_hal_fence_t *obj) { + LogIREERetain("iree_hal_fence_t", obj); + iree_hal_fence_retain(obj); + } + static void release(iree_hal_fence_t *obj) { + LogIREERelease("iree_hal_fence_t", obj); + iree_hal_fence_release(obj); + } }; struct hal_semaphore_ptr_helper { + static void steal(iree_hal_semaphore_t *obj) { + LogIREESteal("iree_hal_semaphore_t", obj); + } static void retain(iree_hal_semaphore_t *obj) { + LogIREERetain("iree_hal_semaphore_t", obj); iree_hal_semaphore_retain(obj); } static void release(iree_hal_semaphore_t *obj) { + LogIREERelease("iree_hal_semaphore_t", obj); iree_hal_semaphore_release(obj); } }; struct vm_context_ptr_helper { - static void retain(iree_vm_context_t *obj) { iree_vm_context_retain(obj); } - static void release(iree_vm_context_t *obj) { iree_vm_context_release(obj); } + static void steal(iree_vm_context_t *obj) { + LogIREESteal("iree_vm_context_t", obj); + } + static void retain(iree_vm_context_t *obj) { + LogIREERetain("iree_vm_context_t", obj); + iree_vm_context_retain(obj); + } + static void release(iree_vm_context_t *obj) { + LogIREERelease("iree_vm_context_t", obj); + iree_vm_context_release(obj); + } }; struct vm_instance_ptr_helper { - static void retain(iree_vm_instance_t *obj) { iree_vm_instance_retain(obj); } + static void steal(iree_vm_instance_t *obj) { + LogIREESteal("iree_vm_instance_t", obj); + } + static void retain(iree_vm_instance_t *obj) { + LogIREERetain("iree_vm_instance_t", obj); + iree_vm_instance_retain(obj); + } static void release(iree_vm_instance_t *obj) { + LogIREERelease("iree_vm_instance_t", obj); iree_vm_instance_release(obj); } }; struct vm_module_ptr_helper { - static void retain(iree_vm_module_t *obj) { iree_vm_module_retain(obj); } - static void release(iree_vm_module_t *obj) { iree_vm_module_release(obj); } + static void steal(iree_vm_module_t *obj) { + LogIREESteal("iree_vm_module_t", obj); + } + static void retain(iree_vm_module_t *obj) { + LogIREERetain("iree_vm_module_t", obj); + iree_vm_module_retain(obj); + } + static void release(iree_vm_module_t *obj) { + LogIREERelease("iree_vm_module_t", obj); + iree_vm_module_release(obj); + } }; }; // namespace detail @@ -105,41 +192,60 @@ class object_ptr { } } object_ptr(object_ptr &&other) : ptr(other.ptr) { other.ptr = nullptr; } + object_ptr &operator=(const object_ptr &other) = delete; object_ptr &operator=(object_ptr &&other) { + reset(); ptr = other.ptr; other.ptr = nullptr; return *this; } - ~object_ptr() { - if (ptr) { - Helper::release(ptr); - } - } + ~object_ptr() { reset(); } // Constructs a new object_ptr by transferring ownership of a raw // pointer. - static object_ptr steal_reference(T *owned) { return object_ptr(owned); } + static object_ptr steal_reference(T *owned) { + Helper::steal(owned); + return object_ptr(owned); + } + // Constructs a new object_ptr by retaining a raw pointer. static object_ptr borrow_reference(T *owned) { Helper::retain(owned); return object_ptr(owned); } operator T *() const noexcept { return ptr; } + class Assignment { + public: + explicit Assignment(object_ptr *assign) : assign(assign) {} + ~Assignment() { + if (assign->ptr) { + Helper::steal(assign->ptr); + } + } + + constexpr operator T **() noexcept { + return reinterpret_cast(&assign->ptr); + } + + private: + object_ptr *assign = nullptr; + }; + // Releases any current reference held by this instance and returns a // pointer to the raw backing pointer. This is typically used for passing // to out parameters which are expected to store a new owned pointer directly. - T **for_output() { + constexpr Assignment for_output() noexcept { reset(); - return &ptr; + return Assignment(this); } operator bool() const { return ptr != nullptr; } T *get() const { return ptr; } - void reset(T *other = nullptr) { + void reset() { if (ptr) { Helper::release(ptr); } - ptr = other; + ptr = nullptr; } T *release() { T *ret = ptr; @@ -151,6 +257,8 @@ class object_ptr { // Assumes the reference count for owned_ptr. object_ptr(T *owned_ptr) : ptr(owned_ptr) {} T *ptr = nullptr; + + friend class Assignment; }; using hal_buffer_ptr = diff --git a/libshortfin/src/shortfin/support/iree_helpers_test.cc b/libshortfin/src/shortfin/support/iree_helpers_test.cc index a13b81b72..bf059ee98 100644 --- a/libshortfin/src/shortfin/support/iree_helpers_test.cc +++ b/libshortfin/src/shortfin/support/iree_helpers_test.cc @@ -29,6 +29,7 @@ struct iree_dummy_t { }; struct dummy_ptr_helper { + static void steal(iree_dummy_t *obj) {} static void retain(iree_dummy_t *obj) { obj->retain_count++; } static void release(iree_dummy_t *obj) { obj->release_count++; } }; diff --git a/libshortfin/src/shortfin/support/logging.h b/libshortfin/src/shortfin/support/logging.h index 55bd36347..337ebacae 100644 --- a/libshortfin/src/shortfin/support/logging.h +++ b/libshortfin/src/shortfin/support/logging.h @@ -9,6 +9,10 @@ #include "spdlog/spdlog.h" +#if !defined(SHORTFIN_LOG_LIFETIMES) +#define SHORTFIN_LOG_LIFETIMES 0 +#endif + namespace shortfin::logging { // TODO: Re-export doesn't really work like this. Need to define API @@ -18,6 +22,22 @@ using spdlog::error; using spdlog::info; using spdlog::warn; +#if SHORTFIN_LOG_LIFETIMES +template +inline void construct(const char* type_name, T* inst) { + info("new {}({})", type_name, static_cast(inst)); +} +template +inline void destruct(const char* type_name, T* inst) { + info("delete {}({})", type_name, static_cast(inst)); +} +#else +template +inline void construct(const char *type_name, T *) {} +template +inline void destruct(const char *type_name, T *) {} +#endif + } // namespace shortfin::logging #endif // SHORTFIN_SUPPORT_LOGGING_H