From c7ad7316ddcf22ef4fdec090726e9e975acaa971 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 25 Aug 2025 11:44:45 +0300 Subject: [PATCH 1/9] Add random projection codec to the benchmark --- .../compressor/compressors/__init__.py | 4 ++++ .../compressor/compressors/rp.py | 21 +++++++++++++++++++ .../compressor/compressors/rp_dct.py | 19 +++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 src/climatebenchpress/compressor/compressors/rp.py create mode 100644 src/climatebenchpress/compressor/compressors/rp_dct.py diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index 63da691..fbbbac0 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -2,6 +2,8 @@ "BitRound", "BitRoundPco", "Jpeg2000", + "RP", + "RPDct", "SafeguardedBitRoundPco", "SafeguardedSperr", "SafeguardedSz3", @@ -21,6 +23,8 @@ from .bitround import BitRound from .bitround_pco import BitRoundPco from .jpeg2000 import Jpeg2000 +from .rp import RP +from .rp_dct import RPDct from .safeguarded import ( SafeguardedBitRoundPco, SafeguardedSperr, diff --git a/src/climatebenchpress/compressor/compressors/rp.py b/src/climatebenchpress/compressor/compressors/rp.py new file mode 100644 index 0000000..e3a1456 --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/rp.py @@ -0,0 +1,21 @@ +__all__ = ["RP"] + +import numcodecs_random_projection +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.stack import CodecStack + +from .abc import Compressor + + +class RP(Compressor): + name = "rp" + description = "Random Projection (Gaussian)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return CodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, method="gaussian", seed=42 + ), + ) diff --git a/src/climatebenchpress/compressor/compressors/rp_dct.py b/src/climatebenchpress/compressor/compressors/rp_dct.py new file mode 100644 index 0000000..0a3422b --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/rp_dct.py @@ -0,0 +1,19 @@ +__all__ = ["RPDct"] + +import numcodecs_random_projection +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.stack import CodecStack + +from .abc import Compressor + + +class RPDct(Compressor): + name = "rp-dct" + description = "Random Projection (DCT)" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return CodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec(mae=error_bound, method="dct", seed=0), + ) From b8984b8d5b04b7c26d20a3540f28f31fb174657e Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Mon, 25 Aug 2025 11:48:31 +0300 Subject: [PATCH 2/9] Add numcodecs-wasm-swizzle-reshape to the deps --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index d25ae73..70b7770 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "numcodecs-wasm-round==0.5.0", "numcodecs-wasm-sperr==0.2.0", "numcodecs-wasm-stochastic-rounding==0.2.0", + "numcodecs-wasm-swizzle-reshape==0.4.0", "numcodecs-wasm-sz3==0.7.0", "numcodecs-wasm-tthresh==0.3.0", "numcodecs-wasm-zfp==0.6.0", From 320662d8b4f757ab7be0ecb818bb2f6ad8b861bd Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 29 Aug 2025 12:52:55 +0300 Subject: [PATCH 3/9] Enable debug logging for the RP codecs --- src/climatebenchpress/compressor/compressors/rp.py | 2 +- src/climatebenchpress/compressor/compressors/rp_dct.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/climatebenchpress/compressor/compressors/rp.py b/src/climatebenchpress/compressor/compressors/rp.py index e3a1456..dfa942b 100644 --- a/src/climatebenchpress/compressor/compressors/rp.py +++ b/src/climatebenchpress/compressor/compressors/rp.py @@ -16,6 +16,6 @@ def abs_bound_codec(error_bound, **kwargs): return CodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( - mae=error_bound, method="gaussian", seed=42 + mae=error_bound, method="gaussian", seed=42, debug=True ), ) diff --git a/src/climatebenchpress/compressor/compressors/rp_dct.py b/src/climatebenchpress/compressor/compressors/rp_dct.py index 0a3422b..9a8d771 100644 --- a/src/climatebenchpress/compressor/compressors/rp_dct.py +++ b/src/climatebenchpress/compressor/compressors/rp_dct.py @@ -15,5 +15,7 @@ class RPDct(Compressor): def abs_bound_codec(error_bound, **kwargs): return CodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), - numcodecs_random_projection.RPCodec(mae=error_bound, method="dct", seed=0), + numcodecs_random_projection.RPCodec( + mae=error_bound, method="dct", seed=0, debug=True + ), ) From 6a7a38f19eeaf76de1096760249c540b379e34eb Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 25 Feb 2026 11:32:16 +0200 Subject: [PATCH 4/9] Add safeguarded RP codecs --- .../compressor/compressors/__init__.py | 4 ++ .../compressor/compressors/rp.py | 4 +- .../compressor/compressors/rp_dct.py | 4 +- .../compressors/safeguarded/__init__.py | 4 ++ .../compressor/compressors/safeguarded/rp.py | 51 +++++++++++++++++++ .../compressors/safeguarded/rp_dct.py | 51 +++++++++++++++++++ 6 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 src/climatebenchpress/compressor/compressors/safeguarded/rp.py create mode 100644 src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py diff --git a/src/climatebenchpress/compressor/compressors/__init__.py b/src/climatebenchpress/compressor/compressors/__init__.py index fbbbac0..a92346c 100644 --- a/src/climatebenchpress/compressor/compressors/__init__.py +++ b/src/climatebenchpress/compressor/compressors/__init__.py @@ -5,6 +5,8 @@ "RP", "RPDct", "SafeguardedBitRoundPco", + "SafeguardedRP", + "SafeguardedRPDct", "SafeguardedSperr", "SafeguardedSz3", "SafeguardedZero", @@ -27,6 +29,8 @@ from .rp_dct import RPDct from .safeguarded import ( SafeguardedBitRoundPco, + SafeguardedRP, + SafeguardedRPDct, SafeguardedSperr, SafeguardedSz3, SafeguardedZero, diff --git a/src/climatebenchpress/compressor/compressors/rp.py b/src/climatebenchpress/compressor/compressors/rp.py index dfa942b..a4ecf16 100644 --- a/src/climatebenchpress/compressor/compressors/rp.py +++ b/src/climatebenchpress/compressor/compressors/rp.py @@ -2,7 +2,7 @@ import numcodecs_random_projection import numcodecs_wasm_swizzle_reshape -from numcodecs_combinators.stack import CodecStack +from numcodecs_combinators.framed import FramedCodecStack from .abc import Compressor @@ -13,7 +13,7 @@ class RP(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): - return CodecStack( + return FramedCodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( mae=error_bound, method="gaussian", seed=42, debug=True diff --git a/src/climatebenchpress/compressor/compressors/rp_dct.py b/src/climatebenchpress/compressor/compressors/rp_dct.py index 9a8d771..a0e81f4 100644 --- a/src/climatebenchpress/compressor/compressors/rp_dct.py +++ b/src/climatebenchpress/compressor/compressors/rp_dct.py @@ -2,7 +2,7 @@ import numcodecs_random_projection import numcodecs_wasm_swizzle_reshape -from numcodecs_combinators.stack import CodecStack +from numcodecs_combinators.framed import FramedCodecStack from .abc import Compressor @@ -13,7 +13,7 @@ class RPDct(Compressor): @staticmethod def abs_bound_codec(error_bound, **kwargs): - return CodecStack( + return FramedCodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( mae=error_bound, method="dct", seed=0, debug=True diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py index 2fb5669..dc501c9 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/__init__.py @@ -1,5 +1,7 @@ __all__ = [ "SafeguardedBitRoundPco", + "SafeguardedRP", + "SafeguardedRPDct", "SafeguardedSperr", "SafeguardedSz3", "SafeguardedZero", @@ -8,6 +10,8 @@ ] from .bitround_pco import SafeguardedBitRoundPco +from .rp import SafeguardedRP +from .rp_dct import SafeguardedRPDct from .sperr import SafeguardedSperr from .sz3 import SafeguardedSz3 from .zero import SafeguardedZero diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/rp.py b/src/climatebenchpress/compressor/compressors/safeguarded/rp.py new file mode 100644 index 0000000..804b24e --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguarded/rp.py @@ -0,0 +1,51 @@ +__all__ = ["SafeguardedRP"] + +import numcodecs_random_projection +import numcodecs_safeguards +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.framed import FramedCodecStack + +from ..abc import Compressor + + +class SafeguardedRP(Compressor): + """Safeguarded RP compressor.""" + + name = "safeguarded-rp" + description = "Safeguarded(RP[Gaussian])" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardedCodec( + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, method="gaussian", seed=42, debug=True + ), + ), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): + assert data_abs_min is not None, "data_abs_min must be provided" + + return numcodecs_safeguards.SafeguardedCodec( + # conservative rel->abs error bound transformation, + # same as convert_rel_error_to_abs_error + # so that we can inform the safeguards of the rel bound + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound * data_abs_min, + method="gaussian", + seed=42, + debug=True, + ), + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py b/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py new file mode 100644 index 0000000..012b37c --- /dev/null +++ b/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py @@ -0,0 +1,51 @@ +__all__ = ["SafeguardedRPDct"] + +import numcodecs_random_projection +import numcodecs_safeguards +import numcodecs_wasm_swizzle_reshape +from numcodecs_combinators.framed import FramedCodecStack + +from ..abc import Compressor + + +class SafeguardedRPDct(Compressor): + """Safeguarded RP (DCT) compressor.""" + + name = "safeguarded-rp-dct" + description = "Safeguarded(RP[DCT])" + + @staticmethod + def abs_bound_codec(error_bound, **kwargs): + return numcodecs_safeguards.SafeguardedCodec( + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound, method="dct", seed=0, debug=True + ), + ), + safeguards=[ + dict(kind="eb", type="abs", eb=error_bound, equal_nan=True), + ], + ) + + @staticmethod + def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): + assert data_abs_min is not None, "data_abs_min must be provided" + + return numcodecs_safeguards.SafeguardedCodec( + # conservative rel->abs error bound transformation, + # same as convert_rel_error_to_abs_error + # so that we can inform the safeguards of the rel bound + codec=FramedCodecStack( + numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), + numcodecs_random_projection.RPCodec( + mae=error_bound * data_abs_min, + method="dct", + seed=0, + debug=True, + ), + ), + safeguards=[ + dict(kind="eb", type="rel", eb=error_bound, equal_nan=True), + ], + ) From b9aec1d6ce7ccfb5c0ff44828ca66d6947b42ca1 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Wed, 25 Feb 2026 11:51:51 +0200 Subject: [PATCH 5/9] Add numcodecs-random-projection dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 70b7770..12adc21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.17", "numcodecs-combinators[xarray]~=0.2.13", "numcodecs-observers~=0.1.2", + "numcodecs-random-projection==0.1.0a1", "numcodecs-replace==0.1.0", "numcodecs-safeguards==0.1.0b2", "numcodecs-wasm==0.2.2", From df690086d9e95773787074724481eab9f5aac4f3 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 26 Feb 2026 12:03:09 +0200 Subject: [PATCH 6/9] minor cleanup --- src/climatebenchpress/compressor/scripts/compress.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/climatebenchpress/compressor/scripts/compress.py b/src/climatebenchpress/compressor/scripts/compress.py index df7d40f..6f1e000 100644 --- a/src/climatebenchpress/compressor/scripts/compress.py +++ b/src/climatebenchpress/compressor/scripts/compress.py @@ -180,9 +180,7 @@ def compress( json.dump(measurements, f) with progress_bar(progress): - ds_new.to_zarr( - compressed_dataset_path, encoding=dict(), compute=False - ).compute() + ds_new.to_zarr(compressed_dataset_path, compute=False).compute() def compress_decompress( From 94bf85421acf08dbc56916adffd7beb5d8eba955 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 26 Feb 2026 12:25:19 +0200 Subject: [PATCH 7/9] Upgrade numcodecs-random-projection to v0.1.0a2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 12adc21..9f6a062 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.17", "numcodecs-combinators[xarray]~=0.2.13", "numcodecs-observers~=0.1.2", - "numcodecs-random-projection==0.1.0a1", + "numcodecs-random-projection==0.1.0a2", "numcodecs-replace==0.1.0", "numcodecs-safeguards==0.1.0b2", "numcodecs-wasm==0.2.2", From 93c30ef005b08c557166afeb634583e246a59172 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 27 Feb 2026 06:26:45 +0200 Subject: [PATCH 8/9] prepare for reproducible block size --- src/climatebenchpress/compressor/compressors/rp.py | 6 +++++- src/climatebenchpress/compressor/compressors/rp_dct.py | 5 ++++- .../compressor/compressors/safeguarded/rp.py | 7 ++++++- .../compressor/compressors/safeguarded/rp_dct.py | 7 +++++-- 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/climatebenchpress/compressor/compressors/rp.py b/src/climatebenchpress/compressor/compressors/rp.py index a4ecf16..03ae677 100644 --- a/src/climatebenchpress/compressor/compressors/rp.py +++ b/src/climatebenchpress/compressor/compressors/rp.py @@ -16,6 +16,10 @@ def abs_bound_codec(error_bound, **kwargs): return FramedCodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( - mae=error_bound, method="gaussian", seed=42, debug=True + mae=error_bound, + method="gaussian", + seed=42, + max_block_memory=2**28, # 256 MiB + debug=True, ), ) diff --git a/src/climatebenchpress/compressor/compressors/rp_dct.py b/src/climatebenchpress/compressor/compressors/rp_dct.py index a0e81f4..ab8e4e5 100644 --- a/src/climatebenchpress/compressor/compressors/rp_dct.py +++ b/src/climatebenchpress/compressor/compressors/rp_dct.py @@ -16,6 +16,9 @@ def abs_bound_codec(error_bound, **kwargs): return FramedCodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( - mae=error_bound, method="dct", seed=0, debug=True + mae=error_bound, + method="dct", + max_block_memory=2**28, # 256 MiB + debug=True, ), ) diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/rp.py b/src/climatebenchpress/compressor/compressors/safeguarded/rp.py index 804b24e..736ebef 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/rp.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/rp.py @@ -20,7 +20,11 @@ def abs_bound_codec(error_bound, **kwargs): codec=FramedCodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( - mae=error_bound, method="gaussian", seed=42, debug=True + mae=error_bound, + method="gaussian", + seed=42, + max_block_memory=2**28, # 256 MiB + debug=True, ), ), safeguards=[ @@ -42,6 +46,7 @@ def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): mae=error_bound * data_abs_min, method="gaussian", seed=42, + max_block_memory=2**28, # 256 MiB debug=True, ), ), diff --git a/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py b/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py index 012b37c..adc5e08 100644 --- a/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py +++ b/src/climatebenchpress/compressor/compressors/safeguarded/rp_dct.py @@ -20,7 +20,10 @@ def abs_bound_codec(error_bound, **kwargs): codec=FramedCodecStack( numcodecs_wasm_swizzle_reshape.SwizzleReshape(axes=[[0, 1, 2], [3, 4]]), numcodecs_random_projection.RPCodec( - mae=error_bound, method="dct", seed=0, debug=True + mae=error_bound, + method="dct", + max_block_memory=2**28, # 256 MiB + debug=True, ), ), safeguards=[ @@ -41,7 +44,7 @@ def rel_bound_codec(error_bound, *, data_abs_min=None, **kwargs): numcodecs_random_projection.RPCodec( mae=error_bound * data_abs_min, method="dct", - seed=0, + max_block_memory=2**28, # 256 MiB debug=True, ), ), From 3c24522a90c620a89d73cf90acc9a999e3c3380b Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Fri, 27 Feb 2026 09:37:33 +0200 Subject: [PATCH 9/9] Bump numcodecs-random-projection to v0.1.0a3 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9f6a062..46ea08f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ dependencies = [ "numcodecs>=0.13.0,<0.17", "numcodecs-combinators[xarray]~=0.2.13", "numcodecs-observers~=0.1.2", - "numcodecs-random-projection==0.1.0a2", + "numcodecs-random-projection==0.1.0a3", "numcodecs-replace==0.1.0", "numcodecs-safeguards==0.1.0b2", "numcodecs-wasm==0.2.2",