diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index fef27d6196..636c5c773c 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -11,10 +11,6 @@ from cpython.bytes cimport (PyBytes_GET_SIZE, PyBytes_FromString, PyBytes_FromStringAndSize) -from cpython.mem cimport (PyMem_Malloc, - PyMem_Realloc, - PyMem_Free) - from cpython.ref cimport (Py_INCREF, Py_DECREF, PyTypeObject) from libc.stdio cimport (FILE, stdout) @@ -52,6 +48,12 @@ cdef extern from "numpy/arrayobject.h": object obj) # Steals a reference to dtype, need to incref the dtype object PyArray_Scalar(void* ptr, np.dtype descr, object itemsize) + void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) + # Defensively use the NumPy allocator (which should be the same + # as malloc/PyMem_Malloc in almost all situations) + char* PyDataMem_NEW(size_t nbytes) nogil + void PyDataMem_FREE(void* data) nogil + char* PyDataMem_RENEW(void* data, size_t nbytes) nogil import sys from os.path import abspath @@ -3840,7 +3842,6 @@ cdef class ReadQuery(object): @property def _offsets(self): return self._offsets - def __init__(self, Array array, np.ndarray subarray, list attr_names, tiledb_layout_t layout): self._buffers = dict() self._offsets = dict() @@ -3856,6 +3857,7 @@ cdef class ReadQuery(object): vector [uint64_t*] offsets_ptrs void* subarray_ptr = NULL np.npy_intp dims[1] + np.ndarray tmparray bytes battr_name Py_ssize_t nattr = len(attr_names) @@ -3880,11 +3882,13 @@ cdef class ReadQuery(object): tiledb_query_free(&query_ptr) _raise_ctx_err(ctx_ptr, rc) - cdef uint64_t* buffer_sizes_ptr = PyMem_Malloc(nattr * sizeof(uint64_t)) + # lifetime: free in finally clause + cdef uint64_t* buffer_sizes_ptr = PyDataMem_NEW(nattr * sizeof(uint64_t)) if buffer_sizes_ptr == NULL: tiledb_query_free(&query_ptr) raise MemoryError() - cdef uint64_t* offsets_sizes_ptr = PyMem_Malloc(nattr * sizeof(uint64_t)) + # lifetime: free in finally clause + cdef uint64_t* offsets_sizes_ptr = PyDataMem_NEW(nattr * sizeof(uint64_t)) if offsets_sizes_ptr == NULL: tiledb_query_free(&query_ptr) raise MemoryError() @@ -3911,8 +3915,11 @@ cdef class ReadQuery(object): # allocate buffer to hold offsets for var-length attribute # NOTE offsets_sizes is in BYTES - offsets_ptrs.push_back( PyMem_Malloc((offsets_sizes_ptr[i]))) - #self._offsets[name] = np.empty(offsets_sizes_ptr[i], dtype=np.uint8) + + # lifetime: + # - free on exception + # - otherwise, ownership transferred to NumPy + offsets_ptrs.push_back( PyDataMem_NEW((offsets_sizes_ptr[i]))) else: rc = tiledb_array_max_buffer_size(ctx_ptr, array_ptr, battr_name, @@ -3922,8 +3929,10 @@ cdef class ReadQuery(object): _raise_ctx_err(ctx_ptr, rc) offsets_ptrs.push_back(NULL) - buffer_ptrs.push_back( PyMem_Malloc((buffer_sizes_ptr[i]))) - #self._buffers[name] = np.empty(buffer_sizes_ptr[i], dtype=np.uint8) + # lifetime: + # - free on exception + # - otherwise, ownership transferred to NumPy + buffer_ptrs.push_back( PyDataMem_NEW((buffer_sizes_ptr[i]))) # set the query buffers for i in range(nattr): @@ -3964,31 +3973,38 @@ cdef class ReadQuery(object): if name != "coords" and schema.attr(name).isvar: dims[0] = offsets_sizes_ptr[i] Py_INCREF(dtype) - self._offsets[name] = \ + tmparray = \ PyArray_NewFromDescr( np.ndarray, dtype, 1, dims, NULL, - PyMem_Realloc(offsets_ptrs[i], (offsets_sizes_ptr[i])), - np.NPY_OWNDATA, NULL) + PyDataMem_RENEW(offsets_ptrs[i], (offsets_sizes_ptr[i])), + 0, NULL) + PyArray_ENABLEFLAGS(tmparray, np.NPY_OWNDATA) + self._offsets[name] = tmparray dims[0] = buffer_sizes_ptr[i] Py_INCREF(dtype) - self._buffers[name] = \ + tmparray = \ PyArray_NewFromDescr( np.ndarray, dtype, 1, dims, NULL, - PyMem_Realloc(buffer_ptrs[i], (buffer_sizes_ptr[i])), - np.NPY_OWNDATA, NULL) + PyDataMem_RENEW(buffer_ptrs[i], (buffer_sizes_ptr[i])), + 0, NULL) + PyArray_ENABLEFLAGS(tmparray, np.NPY_OWNDATA) + self._buffers[name] = tmparray + except: + # we only free the PyDataMem_NEW malloc'd buffers on exception + # otherwise NumPy manages them for i in range(nattr): if buffer_ptrs[i] != NULL: - PyMem_Free(buffer_ptrs[i]) + PyDataMem_FREE(buffer_ptrs[i]) if offsets_ptrs[i] != NULL: - PyMem_Free(offsets_ptrs[i]) + PyDataMem_FREE(offsets_ptrs[i]) raise finally: - PyMem_Free(buffer_sizes_ptr) - PyMem_Free(offsets_sizes_ptr) + PyDataMem_FREE(buffer_sizes_ptr) + PyDataMem_FREE(offsets_sizes_ptr) tiledb_query_free(&query_ptr)