Skip to content

Commit

Permalink
Merge pull request #581 from JuliaIO/dev
Browse files Browse the repository at this point in the history
* fix attribute loading

* Update ci.yml

* WIP: Mmappable Arrays (#582)

* wip: mmappable arrays

* tests

* include mmap_test

* disable broken mmap on windows

* update warning test

* downgrade testing (#547)

* downgrade testing

* bump compat for Requires

* bump compat for FileIO

* rm UUIDs

* skip Mmap

* remove test that only tests a FileIO feature

---------

Co-authored-by: Jonas Isensee <[email protected]>

* experimental disable commit (#544)

* experimental disable commit

* 1.6 compat and make disable_commit a field of the file

* some code cleanup

* get rid of StorageMessage structs

* chunked storage fix

* clean up filterpipeline

* Draft: experimental plain reconstruction (#522)

* experimental plain reconstruction

* Upgrade

* add test

---------

Co-authored-by: Jonas Isensee <[email protected]>
  • Loading branch information
JonasIsensee and Jonas Isensee authored Aug 26, 2024
2 parents 4387b69 + 5ec40d2 commit 99782ad
Show file tree
Hide file tree
Showing 24 changed files with 544 additions and 288 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/Downgrade.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Downgrade
on:
pull_request:
branches:
- master
paths-ignore:
- 'docs/**'
push:
branches:
- master
paths-ignore:
- 'docs/**'
jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
version: ['1']
steps:
- uses: actions/checkout@v4
- uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
- uses: cjdoris/julia-downgrade-compat-action@v1
with:
skip: Pkg,TOML,Mmap
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ on:
pull_request:
branches:
- master
- dev
push:
branches:
- master
- dev
tags: '*'
jobs:
test:
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## 0.4.52
- fix attribute loading
- new features: `readmmap` `ismmappable` and `allocate_early` (api experimental)
- adds Downgrade testing
- new feature: disable committing datatypes. (restrict to h5 numbers, strings, and arrays)
- internal cleanup
- new experimental feature: reconstruct all committed types as `NamedTuple`s

## 0.4.51
- remove Unicode normalization support due to excessive performance loss
- rework of header message internals
Expand Down
12 changes: 3 additions & 9 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
name = "JLD2"
uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
version = "0.4.51"
version = "0.4.52"

[deps]
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

[compat]
FileIO = "1"
FileIO = "1.5"
MacroTools = "0.5"
Mmap = "1"
OrderedCollections = "1"
PrecompileTools = "1"
Reexport = "1"
Requires = "1"
Requires = "1.3"
TranscodingStreams = "0.9, 0.10, 0.11"
UUIDs = "1"
julia = "1.6"
21 changes: 13 additions & 8 deletions src/JLD2.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
module JLD2
using OrderedCollections: OrderedDict
using Reexport: @reexport
using MacroTools: MacroTools, @capture
using Mmap: Mmap
using Unicode: Unicode
using TranscodingStreams: TranscodingStreams
@reexport using FileIO: load, save
using FileIO: load, save
export load, save
using Requires: @require
using PrecompileTools: @setup_workload, @compile_workload

Expand Down Expand Up @@ -106,9 +105,12 @@ mutable struct JLDFile{T<:IO}
path::String
writable::Bool
written::Bool
plain::Bool
compress#::Union{Bool,Symbol}
mmaparrays::Bool
n_times_opened::Int
# Experimental feature: disable committing structs
disable_commit::Bool
datatype_locations::OrderedDict{RelOffset,CommittedDatatype}
datatypes::Vector{H5Datatype}
datatype_wsession::JLDWriteSession{Dict{UInt,RelOffset}}
Expand All @@ -124,11 +126,13 @@ mutable struct JLDFile{T<:IO}
root_group::Group{JLDFile{T}}
types_group::Group{JLDFile{T}}
base_address::UInt64


function JLDFile{T}(io::IO, path::AbstractString, writable::Bool, written::Bool,
plain::Bool,
compress,#::Union{Bool,Symbol},
mmaparrays::Bool) where T
f = new(io, path, writable, written, compress, mmaparrays, 1,
f = new(io, path, writable, written, plain, compress, mmaparrays, 1, false,
OrderedDict{RelOffset,CommittedDatatype}(), H5Datatype[],
JLDWriteSession(), Dict{String,Any}(), IdDict(), IdDict(), Dict{RelOffset,WeakRef}(),
DATA_START, Dict{RelOffset,GlobalHeap}(),
Expand All @@ -137,8 +141,8 @@ mutable struct JLDFile{T<:IO}
f
end
end
JLDFile(io::IO, path::AbstractString, writable::Bool, written::Bool, compress, mmaparrays::Bool) =
JLDFile{typeof(io)}(io, path, writable, written, compress, mmaparrays)
JLDFile(io::IO, path::AbstractString, writable::Bool, written::Bool, plain::Bool, compress, mmaparrays::Bool) =
JLDFile{typeof(io)}(io, path, writable, written, plain, compress, mmaparrays)

"""
fileoffset(f::JLDFile, x::RelOffset)
Expand Down Expand Up @@ -188,6 +192,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
mmaparrays::Bool=false,
typemap::Dict{String}=Dict{String,Any}(),
parallel_read::Bool=false,
plain::Bool=false
) where T<:Union{Type{IOStream},Type{MmapIO}}
mmaparrays && @warn "mmaparrays keyword is currently ignored" maxlog=1
verify_compressor(compress)
Expand Down Expand Up @@ -239,7 +244,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
io = openfile(iotype, fname, wr, create, truncate, fallback)
created = !exists || truncate
rname = realpath(fname)
f = JLDFile(io, rname, wr, created, compress, mmaparrays)
f = JLDFile(io, rname, wr, created, plain, compress, mmaparrays)

if !parallel_read
OPEN_FILES[rname] = WeakRef(f)
Expand Down Expand Up @@ -481,8 +486,8 @@ printtoc(io::IO, f::JLDFile; numlines = typemax(Int64)) =



include("headermessages.jl")
include("object_headers.jl")
include("headermessages.jl")
include("groups.jl")
include("dataspaces.jl")
include("attributes.jl")
Expand Down
1 change: 0 additions & 1 deletion src/committed_datatype_introspection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ function stringify_object(f, offset)
dataspace = ReadDataspace()
attrs = EMPTY_READ_ATTRIBUTES
datatype::H5Datatype = PlaceholderH5Datatype()
chunked_storage::Bool = false
layout::DataLayout = DataLayout(0,LcCompact,0,-1)
filter_pipeline::FilterPipeline = FilterPipeline(Filter[])
for msg in HeaderMessageIterator(f, offset)
Expand Down
47 changes: 18 additions & 29 deletions src/compression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,15 @@ function get_compressor(::Bool)
false, COMPRESSOR_TO_ID[:ZlibCompressor], m.ZlibCompressor()
end

function get_compressor(filter_id::UInt16)
modname, compressorname, decompressorname, = ID_TO_DECOMPRESSOR[filter_id]
invoke_again, m = checked_import(modname)
if invoke_again || !applicable(getproperty(m,compressorname))
_, compressor = Base.invokelatest(get_compressor, filter_id)
return true, compressor
end
return invoke_again, getproperty(m,compressorname)()
end
function get_decompressor(filter_id::UInt16)
modname, compressorname, decompressorname, = ID_TO_DECOMPRESSOR[filter_id]
invoke_again, m = checked_import(modname)
Expand Down Expand Up @@ -180,35 +189,15 @@ function write_chunked_storage_message( io::IO,
elsize::Int,
dims::NTuple{N,Int},
filtered_size::Int,
offset::RelOffset) where N
jlwrite(io, HeaderMessage(HmDataLayout, chunked_storage_message_size(N) - jlsizeof(HeaderMessage), 0))
jlwrite(io, UInt8(4)) # Version
jlwrite(io, UInt8(LcChunked)) # Layout Class
jlwrite(io, UInt8(2)) # Flags (= SINGLE_INDEX_WITH_FILTER)
jlwrite(io, UInt8(N+1)) # Dimensionality
jlwrite(io, UInt8(jlsizeof(Length))) # Dimensionality Size
for i = N:-1:1
jlwrite(io, Length(dims[i])) # Dimensions 1...N
end
jlwrite(io, Length(elsize)) # Element size (last dimension)
jlwrite(io, UInt8(1)) # Chunk Indexing Type (= Single Chunk)
jlwrite(io, Length(filtered_size)) # Size of filtered chunk
jlwrite(io, UInt32(0)) # Filters for chunk
jlwrite(io, offset) # Address
end


function write_compressed_data(cio, f, data, odr, wsession, filter_id, compressor)
write_filter_pipeline_message(cio, filter_id)

# deflate first
deflated = deflate_data(f, data, odr, wsession, compressor)

write_chunked_storage_message(cio, odr_sizeof(odr), size(data), length(deflated), h5offset(f, f.end_of_data))
jlwrite(f.io, end_checksum(cio))

f.end_of_data += length(deflated)
jlwrite(f.io, deflated)
data_address::RelOffset) where N
write_header_message(io, Val(HmDataLayout);
layout_class = LcChunked,
flags = 2, # (= SINGLE_INDEX_WITH_FILTER)
dimensions = UInt64.((reverse(dims)..., elsize)), # Reversed dimensions with element size as last dim
chunk_indexing_type = 1, # (= Single Chunk)
data_size = filtered_size,
filters = 0, # Filters for chunk
data_address)
end

function decompress!(inptr::Ptr, data_length, element_size, n, decompressor::TranscodingStreams.Codec)
Expand Down
8 changes: 8 additions & 0 deletions src/data/reconstructing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ function jltype(f::JLDFile, cdt::CommittedDatatype)
end

datatype = read_attr_data(f, julia_type_attr)
if f.plain && !(datatype isa Upgrade) && !(datatype <: Tuple)
rr = jltype(f, dt)
return f.h5jltype[cdt] = rr
end

if written_type_attr !== nothing
# Custom serialization
custom_datatype = read_attr_data(f, written_type_attr)
Expand Down Expand Up @@ -415,6 +420,9 @@ function jlconvert(rr::ReadRepresentation{T,DataTypeODR()},
isunknowntype(m) && return m
unknown_params && return UnknownType{m, Tuple{params...}}
if hasparams
if f.plain && !(m === Tuple)
return Any
end
try
m = m{params...}
catch e
Expand Down
9 changes: 9 additions & 0 deletions src/data/writing_datatypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ h5type(f::JLDFile, @nospecialize(x)) = h5type(f, writeas(typeof(x)), x)
# Make a compound datatype from a set of names and types
@nospecializeinfer function commit_compound(f::JLDFile, names::AbstractVector{Symbol},
@nospecialize(writtenas::DataType), @nospecialize(readas::Type))
if f.disable_commit
throw(ArgumentError("Attempted to commit DataType $writtenas but committing is disabled."))
end
types = writtenas.types
offsets = Int[]
h5names = Symbol[]
Expand Down Expand Up @@ -192,6 +195,9 @@ end
@nospecialize(writeas::DataType),
@nospecialize(readas::DataType),
attributes::WrittenAttribute...)
if f.disable_commit
throw(ArgumentError("Attempted to commit DataType $readas but committing is disabled."))
end
io = f.io

# This needs to be written this way or type inference gets unhappy...
Expand Down Expand Up @@ -362,6 +368,9 @@ function h5fieldtype(f::JLDFile, ::Type{T}, readas::Type, ::Initialized) where T
end

@lookup_committed f DataType
if f.disable_commit
throw(ArgumentError("Attempted to commit DataType $readas but committing is disabled."))
end
io = f.io
offset = f.end_of_data

Expand Down
8 changes: 8 additions & 0 deletions src/dataio.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,14 @@ function write_data(io::IOStream, f::JLDFile, data::Array{T}, odr::Type{T}, ::Re
nothing
end

function write_data(io::IOStream, f::JLDFile, data, odr, _, wsession::JLDWriteSession)
buf = Vector{UInt8}(undef, odr_sizeof(odr))
cp = Ptr{Cvoid}(pointer(buf))
h5convert!(cp, odr, f, data, wsession)
unsafe_write(io, Ptr{UInt8}(pointer(buf)), odr_sizeof(odr))
nothing
end

function write_data(io::BufferedWriter, f::JLDFile, data::Array{T}, odr::S,
::DataMode, wsession::JLDWriteSession) where {T,S}
position = io.position[]
Expand Down
84 changes: 15 additions & 69 deletions src/datalayouts.jl
Original file line number Diff line number Diff line change
@@ -1,31 +1,3 @@
struct CompactStorageMessage
hm::HeaderMessage
version::UInt8
layout_class::LayoutClass
data_size::UInt16
end
define_packed(CompactStorageMessage)
CompactStorageMessage(datasz::Int) =
CompactStorageMessage(
HeaderMessage(HmDataLayout, jlsizeof(CompactStorageMessage) - jlsizeof(HeaderMessage) + datasz, 0),
4, LcCompact, datasz
)

struct ContiguousStorageMessage
hm::HeaderMessage
version::UInt8
layout_class::LayoutClass
address::RelOffset
data_size::Length
end
define_packed(ContiguousStorageMessage)
ContiguousStorageMessage(datasz::Int, offset::RelOffset) =
ContiguousStorageMessage(
HeaderMessage(HmDataLayout, jlsizeof(ContiguousStorageMessage) - jlsizeof(HeaderMessage), 0),
4, LcContiguous, offset, datasz
)


## Left over header message parsing that does not have a good place.

struct DataLayout
Expand Down Expand Up @@ -87,47 +59,21 @@ function FilterPipeline(msg_::Hmessage)
nfilters = msg.nfilters
io = msg.m.io
seek(io, msg.m.address+2)
if version == 1
skip(io, 6)
filters = map(1:nfilters) do _
id = jlread(io, UInt16)
name_length = jlread(io, UInt16)
flags = jlread(io, UInt16)
nclient_vals = jlread(io, UInt16)
if iszero(name_length)
name = ""
else
name = read_bytestring(io)
skip(io, 8-mod1(sizeof(name), 8)-1)
end
client_data = jlread(io, UInt32, nclient_vals)
isodd(nclient_vals) && skip(io, 4)
Filter(id, flags, name, client_data)
end
return FilterPipeline(filters)
elseif version == 2
filters = map(1:nfilters) do _
id = jlread(io, UInt16)
if id > 255
name_length = jlread(io, UInt16)
flags = jlread(io, UInt16)
nclient_vals = jlread(io, UInt16)
if iszero(name_length)
name = ""
else
name = read_bytestring(io)
skip(io, 8-mod1(sizeof(name), 8)-1)
end
else
name = ""
flags = jlread(io, UInt16)
nclient_vals = jlread(io, UInt16)
end
client_data = jlread(io, UInt32, nclient_vals)
Filter(id, flags, name, client_data)
version == 1 && skip(io, 6)
filters = map(1:nfilters) do _
id = jlread(io, UInt16)
name_length = (version == 2 && id < 255) ? zero(UInt16) : jlread(io, UInt16)
flags = jlread(io, UInt16)
nclient_vals = jlread(io, UInt16)
if iszero(name_length)
name = ""
else
name = read_bytestring(io)
skip(io, 8-mod1(sizeof(name), 8)-1)
end
return FilterPipeline(filters)
else
throw(UnsupportedVersionException("Filter Pipeline Message version $version is not implemented"))
client_data = jlread(io, UInt32, nclient_vals)
(version == 1 && isodd(nclient_vals)) && skip(io, 4)
Filter(id, flags, name, client_data)
end
return FilterPipeline(filters)
end
Loading

2 comments on commit 99782ad

@JonasIsensee
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/113843

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.52 -m "<description of version>" 99782ad05f1f5fd106d3ff7fc8d7f10852a608a8
git push origin v0.4.52

Please sign in to comment.