diff --git a/src/Database/LSMTree/Internal/BlobRef.hs b/src/Database/LSMTree/Internal/BlobRef.hs index 9ef9f6554..1e460005d 100644 --- a/src/Database/LSMTree/Internal/BlobRef.hs +++ b/src/Database/LSMTree/Internal/BlobRef.hs @@ -39,10 +39,10 @@ import System.FS.BlockIO.API (HasBlockIO) -- | A raw blob reference is a reference to a blob within a blob file. -- --- The \"raw\" means that it does no reference counting, so does not maintain --- ownership of the 'BlobFile'. Thus these are only safe to use in the context --- of code that already (directly or indirectly) owns the blob file that the --- blob ref uses (such as within run merging). +-- The \"raw\" means that it does not maintain ownership of the 'BlobFile' to +-- keep it open. Thus these are only safe to use in the context of code that +-- already (directly or indirectly) owns the blob file that the blob ref uses +-- (such as within run merging). -- -- Thus these cannot be handed out via the API. Use 'WeakBlobRef' for that. -- @@ -56,9 +56,9 @@ data RawBlobRef m h = RawBlobRef { -- can return in the public API and can outlive their parent table. -- -- They are weak references in that they do not keep the file open using a --- reference count. So when we want to use our weak reference we have to --- dereference them to obtain a normal strong reference while we do the I\/O --- to read the blob. This ensures the file is not closed under our feet. +-- reference. So when we want to use our weak reference we have to dereference +-- them to obtain a normal strong reference while we do the I\/O to read the +-- blob. This ensures the file is not closed under our feet. -- -- See 'Database.LSMTree.Common.BlobRef' for more info. -- diff --git a/src/Database/LSMTree/Internal/Merge.hs b/src/Database/LSMTree/Internal/Merge.hs index 3bfc7830e..a5f30903c 100644 --- a/src/Database/LSMTree/Internal/Merge.hs +++ b/src/Database/LSMTree/Internal/Merge.hs @@ -142,8 +142,6 @@ abort Merge {..} = do -- -- All resources held by the merge are released, so do not use the it any more! -- --- The resulting run has a reference count of 1. --- -- This function will /not/ do any merging work if there is any remaining. That -- is, if not enough 'steps' were performed to exhaust the input 'Readers', this -- function will throw an error. @@ -153,7 +151,8 @@ abort Merge {..} = do -- -- Note: this function creates new 'Run' resources, so it is recommended to run -- this function with async exceptions masked. Otherwise, these resources can --- leak. +-- leak. And it must eventually be released with 'releaseRef'. +-- complete :: (MonadSTM m, MonadST m, MonadMask m) => Merge m h diff --git a/src/Database/LSMTree/Internal/MergeSchedule.hs b/src/Database/LSMTree/Internal/MergeSchedule.hs index 7332059fe..fc0de7924 100644 --- a/src/Database/LSMTree/Internal/MergeSchedule.hs +++ b/src/Database/LSMTree/Internal/MergeSchedule.hs @@ -169,8 +169,8 @@ releaseTableContent reg (TableContent _wb wbb levels cache) = do -- handles. This allows for quick access in the lookup code. Recomputing this -- cache should be relatively rare. -- --- Caches take reference counts for its runs on construction, and they release --- references when the cache is invalidated. This is done so that incremental +-- Caches keep references to its runs on construction, and they release each +-- reference when the cache is invalidated. This is done so that incremental -- merges can remove references for their input runs when a merge completes, -- without closing runs that might be in use for other operations such as -- lookups. This does mean that a cache can keep runs open for longer than diff --git a/src/Database/LSMTree/Internal/Run.hs b/src/Database/LSMTree/Internal/Run.hs index 7d905e854..c066ab4bc 100644 --- a/src/Database/LSMTree/Internal/Run.hs +++ b/src/Database/LSMTree/Internal/Run.hs @@ -208,7 +208,8 @@ fromMutable runRunDataCaching builder = do -> Ref (WriteBufferBlobs IO h) -> IO (Ref (Run IO h)) #-} -- | Write a write buffer to disk, including the blobs it contains. --- The resulting run has a reference count of 1. +-- +-- This creates a new 'Run' which must eventually be released with 'releaseRef'. -- -- TODO: As a possible optimisation, blobs could be written to a blob file -- immediately when they are added to the write buffer, avoiding the need to do @@ -250,7 +251,8 @@ data FileFormatError = FileFormatError FS.FsPath String -> IO (Ref (Run IO h)) #-} -- | Load a previously written run from disk, checking each file's checksum -- against the checksum file. --- The resulting 'Run' has a reference count of 1. +-- +-- This creates a new 'Run' which must eventually be released with 'releaseRef'. -- -- Exceptions will be raised when any of the file's contents don't match their -- checksum ('ChecksumError') or can't be parsed ('FileFormatError'). diff --git a/src/Database/LSMTree/Internal/RunBuilder.hs b/src/Database/LSMTree/Internal/RunBuilder.hs index 8a0fd6567..f439c59a9 100644 --- a/src/Database/LSMTree/Internal/RunBuilder.hs +++ b/src/Database/LSMTree/Internal/RunBuilder.hs @@ -73,7 +73,6 @@ data RunBuilder m h = RunBuilder { -> RunBloomFilterAlloc -> IO (RunBuilder IO h) #-} -- | Create an 'RunBuilder' to start building a run. --- The result will have an initial reference count of 1. -- -- NOTE: 'new' assumes that 'runDir' that the run is created in exists. new :: diff --git a/src/Database/LSMTree/Internal/RunReader.hs b/src/Database/LSMTree/Internal/RunReader.hs index 452e82083..8a9c95995 100644 --- a/src/Database/LSMTree/Internal/RunReader.hs +++ b/src/Database/LSMTree/Internal/RunReader.hs @@ -51,8 +51,15 @@ import System.FS.BlockIO.API (HasBlockIO) -- | Allows reading the k\/ops of a run incrementally, using its own read-only -- file handle and in-memory cache of the current disk page. -- --- Creating a 'RunReader' does not increase the run's reference count, so make --- sure the run remains open while using the reader. +-- Creating a 'RunReader' does not retain a reference to the 'Run', but does +-- retain an independent reference on the run's blob file. It is not necessary +-- to separately retain the 'Run' for correct use of the 'RunReader'. There is +-- one important caveat however: the 'RunReader' maintains the validity of +-- 'BlobRef's only up until the point where the reader is drained (or +-- explicitly closed). In particular this means 'BlobRefs' can be invalidated +-- as soon as the 'next' returns 'Empty'. If this is not sufficient then it is +-- necessary to separately retain a reference to the 'Run' or its 'BlobFile' to +-- preserve the validity of 'BlobRefs'. -- -- New pages are loaded when trying to read their first entry. -- diff --git a/src/Database/LSMTree/Internal/RunReaders.hs b/src/Database/LSMTree/Internal/RunReaders.hs index de914302c..75863845c 100644 --- a/src/Database/LSMTree/Internal/RunReaders.hs +++ b/src/Database/LSMTree/Internal/RunReaders.hs @@ -47,8 +47,18 @@ import qualified System.FS.API as FS -- Construct with 'new', then keep calling 'pop'. -- If aborting early, remember to call 'close'! -- --- Creating a 'RunReaders' does not increase the runs' reference count, so make --- sure they remain open while using the 'RunReaders'. +-- Creating a 'Readers' does not retain a reference to the input 'Run's or the +-- 'WriteBufferBlobs', but does retain an independent reference on their blob +-- files. It is not necessary to separately retain the 'Run's or the +-- 'WriteBufferBlobs' for correct use of the 'Readers'. There is one important +-- caveat however: to preserve the validity of 'BlobRef's then it is necessary +-- to separately retain a reference to the 'Run' or its 'BlobFile' to preserve +-- the validity of 'BlobRefs'. +-- +-- TODO: do this more nicely by changing 'Reader' to preserve the 'BlobFile' +-- ref until it is explicitly closed, and also retain the 'BlobFile' from the +-- WBB and release all of these 'BlobFiles' once the 'Readers' is itself closed. +-- data Readers m h = Readers { readersHeap :: !(Heap.MutableHeap (PrimState m) (ReadCtx m h)) -- | Since there is always one reader outside of the heap, we need to