diff --git a/pyproject.toml b/pyproject.toml index d25ae73..46ea08f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.17", "numcodecs-combinators[xarray]~=0.2.13", "numcodecs-observers~=0.1.2", + "numcodecs-random-projection==0.1.0a3", "numcodecs-replace==0.1.0", "numcodecs-safeguards==0.1.0b2", "numcodecs-wasm==0.2.2", @@ -25,6 +26,7 @@ dependencies = [ "numcodecs-wasm-round==0.5.0", "numcodecs-wasm-sperr==0.2.0", "numcodecs-wasm-stochastic-rounding==0.2.0", + "numcodecs-wasm-swizzle-reshape==0.4.0", "numcodecs-wasm-sz3==0.7.0", "numcodecs-wasm-tthresh==0.3.0", "numcodecs-wasm-zfp==0.6.0", diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index 63da691..a92346c 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -2,7 +2,11 @@ "BitRound", "BitRoundPco", "Jpeg2000", + "RP", + "RPDct", "SafeguardedBitRoundPco", + "SafeguardedRP", + "SafeguardedRPDct", "SafeguardedSperr", "SafeguardedSz3", "SafeguardedZero", @@ -21,8 +25,12 @@ from .bitround import BitRound from .bitround_pco import BitRoundPco from .jpeg2000 import Jpeg2000 +from .rp import RP +from .rp_dct import RPDct from .safeguarded import ( SafeguardedBitRoundPco, + SafeguardedRP, + SafeguardedRPDct, SafeguardedSperr, SafeguardedSz3, SafeguardedZero, diff --git a/src/climatebenchpress/compressor/compressors/rp.py b/src/climatebenchpress/compressor/compressors/rp.py new file mode 100644 index 0000000..03ae677 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/rp.py @@ -0,0 +1,25 @@ +__all__ = ["RP"] + +import numcodecs_random_projection +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.framed import FramedCodecStack + +from .abc import Compressor + + +class RP(Compressor): + name = "rp" + description = "Random Projection (Gaussian)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, + method="gaussian", + seed=42, + max_block_memory=2**28, # 256 MiB + debug=True, + ), + ) diff --git a/src/climatebenchpress/compressor/compressors/rp_dct.py b/src/climatebenchpress/compressor/compressors/rp_dct.py new file mode 100644 index 0000000..ab8e4e5 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/rp_dct.py @@ -0,0 +1,24 @@ +__all__ = ["RPDct"] + +import numcodecs_random_projection +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.framed import FramedCodecStack + +from .abc import Compressor + + +class RPDct(Compressor): + name = "rp-dct" + description = "Random Projection (DCT)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, + method="dct", + max_block_memory=2**28, # 256 MiB + debug=True, + ), + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py index 2fb5669..dc501c9 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py @@ -1,5 +1,7 @@ __all__ = [ "SafeguardedBitRoundPco", + "SafeguardedRP", + "SafeguardedRPDct", "SafeguardedSperr", "SafeguardedSz3", "SafeguardedZero", @@ -8,6 +10,8 @@ ] from .bitround_pco import SafeguardedBitRoundPco +from .rp import SafeguardedRP +from .rp_dct import SafeguardedRPDct from .sperr import SafeguardedSperr from .sz3 import SafeguardedSz3 from .zero import SafeguardedZero diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/rp.py b/src/climatebenchpress/compressor/compressors/safeguarded/rp.py new file mode 100644 index 0000000..736ebef --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguarded/rp.py @@ -0,0 +1,56 @@ +__all__ = ["SafeguardedRP"] + +import numcodecs_random_projection +import numcodecs_safeguards +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.framed import FramedCodecStack + +from ..abc import Compressor + + +class SafeguardedRP(Compressor): + """Safeguarded RP compressor.""" + + name = "safeguarded-rp" + description = "Safeguarded(RP[Gaussian])" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardedCodec( + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, + method="gaussian", + seed=42, + max_block_memory=2**28, # 256 MiB + debug=True, + ), + ), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): + assert data_abs_min is not None, "data_abs_min must be provided" + + return numcodecs_safeguards.SafeguardedCodec( + # conservative rel->abs error bound transformation, + # same as convert_rel_error_to_abs_error + # so that we can inform the safeguards of the rel bound + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound * data_abs_min, + method="gaussian", + seed=42, + max_block_memory=2**28, # 256 MiB + debug=True, + ), + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py b/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py new file mode 100644 index 0000000..adc5e08 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py @@ -0,0 +1,54 @@ +__all__ = ["SafeguardedRPDct"] + +import numcodecs_random_projection +import numcodecs_safeguards +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.framed import FramedCodecStack + +from ..abc import Compressor + + +class SafeguardedRPDct(Compressor): + """Safeguarded RP (DCT) compressor.""" + + name = "safeguarded-rp-dct" + description = "Safeguarded(RP[DCT])" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardedCodec( + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, + method="dct", + max_block_memory=2**28, # 256 MiB + debug=True, + ), + ), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): + assert data_abs_min is not None, "data_abs_min must be provided" + + return numcodecs_safeguards.SafeguardedCodec( + # conservative rel->abs error bound transformation, + # same as convert_rel_error_to_abs_error + # so that we can inform the safeguards of the rel bound + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound * data_abs_min, + method="dct", + max_block_memory=2**28, # 256 MiB + debug=True, + ), + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/scripts/compress.py b/src/climatebenchpress/compressor/scripts/compress.py index df7d40f..6f1e000 100644 --- a/src/climatebenchpress/compressor/scripts/compress.py +++ b/src/climatebenchpress/compressor/scripts/compress.py @@ -180,9 +180,7 @@ def compress( json.dump(measurements, f) with progress_bar(progress): - ds_new.to_zarr( - compressed_dataset_path, encoding=dict(), compute=False - ).compute() + ds_new.to_zarr(compressed_dataset_path, compute=False).compute() def compress_decompress(