Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "YAXArrayBase"
uuid = "90b8fcef-0c2d-428d-9c56-5f86629e9d14"
authors = ["Fabian Gans <fgans@bgc-jena.mpg.de>"]
version = "0.7.7"
version = "0.7.8"

[deps]
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Expand All @@ -18,21 +18,24 @@ DimensionalData = "0.27, 0.28, 0.29"
NetCDF = "0.11, 0.12"
Zarr = "0.8, 0.9"


[extensions]
ArchGDALExt = "ArchGDAL"
AxisArraysExt = "AxisArrays"
AxisKeysExt = "AxisKeys"
DimensionalDataExt = "DimensionalData"
NamedDimsExt = "NamedDims"
NetCDFExt = "NetCDF"
ZarrExt = "Zarr"
ZarrExt = ["Zarr", "ZipArchives", "DiskArrays"]

[weakdeps]
ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3"
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
DimensionalData = "0703355e-b756-11e9-17c0-8b28908087d0"
Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f"
NetCDF = "30363a11-5582-574a-97bb-aa9a979735b9"
Zarr = "0a941bbe-ad1d-11e8-39d9-ab76183a1d99"
ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c"
148 changes: 96 additions & 52 deletions ext/ZarrExt.jl
Original file line number Diff line number Diff line change
@@ -1,64 +1,108 @@
module ZarrExt
using YAXArrayBase
using Zarr: ZArray, ZGroup, zgroup, zcreate, to_zarrtype, zopen, Compressor
import YAXArrayBase: YAXArrayBase as YAB
export ZarrDataset

function __init__()
@debug "new driver key :zarr, updating backendlist."
YAB.backendlist[:zarr] = ZarrDataset
push!(YAB.backendregex, r"(.zarr$)|(.zarr/$)"=>ZarrDataset)
end
using YAXArrayBase
using Zarr: ZArray, ZGroup, zgroup, zcreate, to_zarrtype, zopen, Compressor, ZipStore
import DiskArrays: AbstractDiskArray, DiskArrays, Unchunked, Chunked, GridChunks
using ZipArchives: ZipReader
import YAXArrayBase: YAXArrayBase as YAB
export ZarrDataset

struct ZarrDataset
g::ZGroup
end
ZarrDataset(g::String;mode="r") = ZarrDataset(zopen(g,mode,fill_as_missing=false))

YAB.get_var_dims(ds::ZarrDataset,name) = reverse(ds[name].attrs["_ARRAY_DIMENSIONS"])
YAB.get_varnames(ds::ZarrDataset) = collect(keys(ds.g.arrays))
function YAB.get_var_attrs(ds::ZarrDataset, name)
#We add the fill value to the attributes to be consistent with NetCDF
a = ds[name]
if a.metadata.fill_value !== nothing
merge(ds[name].attrs,Dict("_FillValue"=>a.metadata.fill_value))
else
ds[name].attrs
end
function __init__()
@debug "new driver key :zarr, updating backendlist."
YAB.backendlist[:zarr] = ZarrDataset
push!(YAB.backendregex, r"(.zarr$)|(.zarr/$)|(zarr.zip$)" => ZarrDataset)
end

struct ZarrDataset
g::ZGroup
end
function ZarrDataset(g::String; mode="r")
store = if endswith(g, "zip")
ZipStore(ZipReader(SimpleFileDiskArray(g)))
else
g
end
YAB.get_global_attrs(ds::ZarrDataset) = ds.g.attrs
Base.getindex(ds::ZarrDataset, i) = ds.g[i]
Base.haskey(ds::ZarrDataset,k) = haskey(ds.g,k)

# function add_var(p::ZarrDataset, T::Type{>:Missing}, varname, s, dimnames, attr; kwargs...)
# S = Base.nonmissingtype(T)
# add_var(p,S, varname, s, dimnames, attr; fill_value = defaultfillval(S), fill_as_missing=true, kwargs...)
# end

function YAB.add_var(p::ZarrDataset, T::Type, varname, s, dimnames, attr;
chunksize=s, fill_as_missing=false, kwargs...)
attr2 = merge(attr,Dict("_ARRAY_DIMENSIONS"=>reverse(collect(dimnames))))
fv = get(attr,"_FillValue",get(attr,"missing_value",YAB.defaultfillval(T)))
za = zcreate(T, p.g, varname,s...;fill_value = fv,fill_as_missing,attrs=attr2,chunks=chunksize,kwargs...)
za
ZarrDataset(zopen(store, mode, fill_as_missing=false))
end

YAB.get_var_dims(ds::ZarrDataset, name) = reverse(ds[name].attrs["_ARRAY_DIMENSIONS"])
YAB.get_varnames(ds::ZarrDataset) = collect(keys(ds.g.arrays))
function YAB.get_var_attrs(ds::ZarrDataset, name)
#We add the fill value to the attributes to be consistent with NetCDF
a = ds[name]
if a.metadata.fill_value !== nothing
merge(ds[name].attrs, Dict("_FillValue" => a.metadata.fill_value))
else
ds[name].attrs
end
end
YAB.get_global_attrs(ds::ZarrDataset) = ds.g.attrs
Base.getindex(ds::ZarrDataset, i) = ds.g[i]
Base.haskey(ds::ZarrDataset, k) = haskey(ds.g, k)

#Special case for init with Arrays
function YAB.add_var(p::ZarrDataset, a::AbstractArray, varname, dimnames, attr;
kwargs...)
T = to_zarrtype(a)
b = add_var(p,T,varname,size(a),dimnames,attr;kwargs...)
b .= a
a
# function add_var(p::ZarrDataset, T::Type{>:Missing}, varname, s, dimnames, attr; kwargs...)
# S = Base.nonmissingtype(T)
# add_var(p,S, varname, s, dimnames, attr; fill_value = defaultfillval(S), fill_as_missing=true, kwargs...)
# end

function YAB.add_var(p::ZarrDataset, T::Type, varname, s, dimnames, attr;
chunksize=s, fill_as_missing=false, kwargs...)
attr2 = merge(attr, Dict("_ARRAY_DIMENSIONS" => reverse(collect(dimnames))))
fv = get(attr, "_FillValue", get(attr, "missing_value", YAB.defaultfillval(T)))
attr3 = filter(attr2) do (k, v)
!isa(v, AbstractFloat) || !isnan(v)
end
za = zcreate(T, p.g, varname, s...; fill_value=fv, fill_as_missing, attrs=attr3, chunks=chunksize, kwargs...)
za
end

#Special case for init with Arrays
function YAB.add_var(p::ZarrDataset, a::AbstractArray, varname, dimnames, attr;
kwargs...)
T = to_zarrtype(a)
b = add_var(p, T, varname, size(a), dimnames, attr; kwargs...)
b .= a
a
end

YAB.create_empty(::Type{ZarrDataset}, path, gatts=Dict()) = ZarrDataset(zgroup(path, attrs=gatts))

YAB.create_empty(::Type{ZarrDataset}, path, gatts=Dict()) = ZarrDataset(zgroup(path, attrs=gatts))


YAB.allow_parallel_write(::ZarrDataset) = true
YAB.allow_missings(::ZarrDataset) = false
YAB.to_dataset(g::ZGroup; kwargs...) = ZarrDataset(g)
YAB.iscompressed(a::ZArray{<:Any,<:Any,<:Compressor}) = true


#Add ability to read zipped zarrs


struct SimpleFileDiskArray{C<:Union{Int,Nothing}} <: AbstractDiskArray{UInt8,1}
file::String
s::Int
chunksize::C
end
Base.size(s::SimpleFileDiskArray) = (s.s,)
function SimpleFileDiskArray(filename; chunksize=nothing)
isfile(filename) || throw(ArgumentError("File $filename does not exist"))
s = filesize(filename)
SimpleFileDiskArray(filename, s, chunksize)
end
function DiskArrays.readblock!(a::SimpleFileDiskArray, aout, i::AbstractUnitRange)
open(a.file) do f
seek(f, first(i) - 1)
read!(f, aout)
end
end
DiskArrays.haschunks(a::SimpleFileDiskArray) = a.chunksize === nothing ? Unchunked() : Chunked()
function DiskArrays.eachchunk(a::SimpleFileDiskArray)
if a.chunksize === nothing
DiskArrays.estimate_chunksize(a)
else
GridChunks((a.s,), (a.chunksize,))
end
end


YAB.allow_parallel_write(::ZarrDataset) = true
YAB.allow_missings(::ZarrDataset) = false
YAB.to_dataset(g::ZGroup; kwargs...) = ZarrDataset(g)
YAB.iscompressed(a::ZArray{<:Any,<:Any,<:Compressor}) = true

end
Loading